diff --git a/src/main/java/com/hy/java/uct/sdtocode/mapper/CodeMessageTracer.java b/src/main/java/com/hy/java/uct/sdtocode/mapper/CodeMessageTracer.java index c0e6bdae7cc5ccf283f25df9ba04a950e9646183..37d42f9f5f4e11597eb116187b1eaf9326193c95 100644 --- a/src/main/java/com/hy/java/uct/sdtocode/mapper/CodeMessageTracer.java +++ b/src/main/java/com/hy/java/uct/sdtocode/mapper/CodeMessageTracer.java @@ -1,12 +1,29 @@ package com.hy.java.uct.sdtocode.mapper; +import java.io.File; +import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.collections4.SetUtils; + +import com.github.javaparser.StaticJavaParser; +import com.github.javaparser.ast.CompilationUnit; +import com.github.javaparser.ast.ImportDeclaration; +import com.github.javaparser.ast.NodeList; +import com.github.javaparser.ast.body.ClassOrInterfaceDeclaration; +import com.github.javaparser.ast.body.FieldDeclaration; +import com.github.javaparser.ast.body.MethodDeclaration; +import com.github.javaparser.ast.stmt.BlockStmt; +import com.hy.java.uct.sdtocode.util.TracedMessagePath; import com.hy.java.uct.util.sd.Message; import com.hy.java.uct.util.sd.UMLObject; import com.hy.java.utility.common.Pair; +import com.hy.java.utility.common.Triple; /** * 对每条消息,先辨别其消息内容是否包含多个标识符。如果只有单个标识符、且该标识符是一个方法,则依次用前两种即可;如果有多个标识符、或标识符是一个类,则除了使用前两种之外,还需使用第三种追踪。 @@ -42,7 +59,7 @@ public class CodeMessageTracer { * * 2)①在支撑类中找该属性的相关操作,对比每个操作与消息的语义相似度。比如new操作可以对应消息内容run。②对属性类型对应的类,找其中与消息内容语义相似的方法。 */ - traceByAttri(res, objs_in_SD, classFullName_javaFileDir_map); + traceByAttri(msg_inSD, res, objs_in_SD, classFullName_javaFileDir_map); /* * 2、第二种追踪:针对对象和方法 * @@ -52,7 +69,7 @@ public class CodeMessageTracer { * * 3)①找该方法内是否有对消息另一端的调用,如参数、方法内的变量、语句等。②对与消息相似的方法,找到其所属的类,在这个类里看该方法的返回类型是否与另一端相似。 */ - traceByMethod(res, objs_in_SD, classFullName_javaFileDir_map); + traceByMethod(msg_inSD, res, objs_in_SD, classFullName_javaFileDir_map); /* * 3、第三种追踪:针对消息 * @@ -64,8 +81,9 @@ public class CodeMessageTracer { * 3)对比链条两端与消息两端的对象。如果存在支撑类之类的关系,则认为消息两端应该是支撑类;否则仍保留链条两端作为消息两端。 */ if (msg_is_complex) { - traceByMsg(res, objs_in_SD, classFullName_javaFileDir_map); + traceByMsg(msg_inSD, res, objs_in_SD, classFullName_javaFileDir_map); } + System.out.println("================================================================="); } } return res; @@ -95,20 +113,339 @@ public class CodeMessageTracer { /** * 第一种追踪:针对对象和属性 * - * 1)支撑类中遍历属性,找那种名称或类型与消息另一端语义相似的属性,记录其类型。 + * 1)遍历支撑类。在每个支撑类中遍历属性。找那种名称或类型与消息另一端语义相似的属性,记录属性的类型。 * * 2)①在支撑类中找该属性的相关操作,对比每个操作与消息的语义相似度。比如new操作可以对应消息内容run。②对属性类型对应的类,找其中与消息内容语义相似的方法。 + * + * @param msg_inSD */ - private static void traceByAttri(List res, Pair, List> objs_in_SD, Map classFullName_javaFileDir_map) { + private static void traceByAttri(Message msg_inSD, List res, Pair, List> objs_in_SD, Map classFullName_javaFileDir_map) { + System.out.println("①对于" + msg_inSD.msg + ",开始针对消息两端对象和代码中的属性进行追踪"); + // 解析出msg中的方法名 + String method_in_msg = parseMethodInMsg(msg_inSD.msg); /* - * 来来来来来来来来来来~ - * - * 来来来来来来来来来来~ - * - * 来来来来来来来来~ - * - * 小时候悄悄在路上~ + * 遍历支撑类。在每个支撑类中遍历属性。 */ + UMLObject src_uo = msg_inSD.source; + UMLObject target_uo = msg_inSD.target; + // 遍历支撑类 + for (String uo_mapped_file_dir : src_uo.mapped_file_dir_ls) { + // uo_mapped_file_dir是支撑类的文件地址。根据文件地址,用javaparser去解析文件 + try { + CompilationUnit uo_mapped_file = StaticJavaParser.parse(new File(uo_mapped_file_dir)); + String cls_shortName = uo_mapped_file_dir.substring(uo_mapped_file_dir.lastIndexOf("\\") + 1, uo_mapped_file_dir.lastIndexOf(".")); + Optional o_class = uo_mapped_file.getClassByName(cls_shortName); + if (o_class.isPresent()) { + // 用javaparser得到的支撑类代码 + ClassOrInterfaceDeclaration clsCode_unit = o_class.get(); + // 在每个支撑类中遍历属性 + List attris = clsCode_unit.getFields(); + // 记录那种名称或类型与消息另一端语义相似的属性。针对这种属性进行追踪 + List> sim_attris = new ArrayList<>(); + for (FieldDeclaration attri : attris) { + /* + * 找那种名称或类型与消息另一端语义相似的属性,记录属性的类型。 + * + * 这里有个“名称或类型与另一端相似的概率”,即ifAttriSim_similarity.getRight() + */ + Pair ifAttriSim_similarity = checkAttriSim(attri.getElementType().asString(), attri.getVariable(0).getNameAsString(), target_uo); + if (ifAttriSim_similarity.getLeft()) { + sim_attris.add(Pair.createPair(attri, ifAttriSim_similarity.getRight())); + } + } + /* + * 目前已找到名称或类型与消息另一端语义相似的属性sim_attri。接下来追踪: + * + * ①在支撑类clsCode_unit中找与属性sim_attri相关的操作,对比每个操作与消息内容的语义相似度。比如new操作可以对应消息内容run。 + * + * ②对属性sim_attri的类型getElementType()对应的类,找其中与消息内容语义相似的方法。 + */ + for (Pair attri_p : sim_attris) { + FieldDeclaration sim_attri = attri_p.getLeft(); + // ①在支撑类中找该属性的相关操作,对比每个操作与消息的语义相似度。比如new操作可以对应消息内容run。 + trace_attri_in_supportingClsCode(sim_attri, attri_p.getRight(), clsCode_unit, msg_inSD, method_in_msg, res, uo_mapped_file_dir); + // ②对属性类型对应的类,找其中与消息内容语义相似的方法。 + Pair attri_clsCode = getAttriClsCode(sim_attri.getElementType().asString(), uo_mapped_file, uo_mapped_file_dir, classFullName_javaFileDir_map); + trace_attri_in_typeClsCode(sim_attri, attri_p.getRight(), attri_clsCode, msg_inSD, method_in_msg, res, clsCode_unit); + } + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + System.out.println("对于" + msg_inSD.msg + ",完成针对对象和属性的追踪了"); + } + + /** + * msg中可能有括号、也可能没括号;可能只有一个方法名,也可能有“.”分开多段 + */ + private static String parseMethodInMsg(String msg) { + String res = msg; + // msg中可能有括号 + if (msg.contains("(")) { + String temp = msg.substring(0, msg.lastIndexOf("(")); + // 也可能有“.”分开多段 + if (temp.contains(".")) { + res = temp.substring(temp.lastIndexOf(".") + 1); + } else if (temp.contains(" ")) { + res = temp.substring(temp.lastIndexOf(" ") + 1); + } else { + // 可能只有一个方法名 + res = temp; + } + } + // 也可能没括号 + return res; + } + + /** + * 找那种名称或类型与消息另一端语义相似的属性,记录属性的类型。 + */ + private static Pair checkAttriSim(String type, String attri_name, UMLObject target_uo) { + Pair res = new Pair<>(false, 0.0); + boolean type_sim = false; + boolean name_sim = false; + for (String uo_mapped_file_dir : target_uo.mapped_file_dir_ls) { + String cls_shortName = uo_mapped_file_dir.substring(uo_mapped_file_dir.lastIndexOf("\\") + 1, uo_mapped_file_dir.lastIndexOf(".")); + double type_sim_p = jaccard_similarity(type, cls_shortName); + if (type_sim_p > 0.85) { + type_sim = true; + if (type_sim_p > res.getRight()) { + res.setRight(type_sim_p); + } + } + double name_sim_p = jaccard_similarity(attri_name, cls_shortName); + if (name_sim_p > 0.85) { + name_sim = true; + if (name_sim_p > res.getRight()) { + res.setRight(name_sim_p); + } + } + } + if (type_sim || name_sim) { + res.setLeft(true); + } + return res; + } + + /** + * 根据属性的类型,在原项目中找对应的代码:可能是个java文件;可能是支撑类的内部类 + * + * @param uo_mapped_file_dir + */ + private static Pair getAttriClsCode(String attri_type, CompilationUnit uo_mapped_file, String uo_mapped_file_dir, Map classFullName_javaFileDir_map) { + Pair res = Pair.createPair(null, null); + ClassOrInterfaceDeclaration _clsCode = null; + /* + * 获取属性类型的full_name,用于在classFullName_javaFileDir_map中找java文件 + */ + String full_name = ""; + // 根据包依赖情况,获取属性类型的full_name + NodeList imports = uo_mapped_file.getImports(); + for (ImportDeclaration _import : imports) { + String import_name = _import.getNameAsString().substring(_import.getNameAsString().lastIndexOf(".") + 1); + if (import_name.equals(attri_type)) { + full_name = _import.getNameAsString(); + break; + } + } + if (full_name.equals("")) { + full_name = uo_mapped_file.getPackageDeclaration().get().getNameAsString() + "." + attri_type; + } + /* + * 在classFullName_javaFileDir_map中找java文件。如果找不到,则说明属性类型是支撑类的内部类 + */ + // 如果能找到属性类型对应的java文件,则用javaparser解析一下并返回ClassOrInterfaceDeclaration + if (classFullName_javaFileDir_map.containsKey(full_name)) { + try { + Optional o_class = StaticJavaParser.parse(new File(classFullName_javaFileDir_map.get(full_name))).getClassByName(attri_type); + if (o_class.isPresent()) { + _clsCode = o_class.get(); + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + res.setLeft(classFullName_javaFileDir_map.get(full_name)); + res.setRight(_clsCode); + } + // 如果不能找到,则说明属性类型是支撑类的内部类 + else { + List inner_class_declaration = uo_mapped_file.getLocalDeclarationFromClassname(attri_type); + if (!inner_class_declaration.isEmpty()) { + _clsCode = inner_class_declaration.get(0); + } + res.setLeft(uo_mapped_file_dir); + res.setRight(_clsCode); + } + return res; + } + + /** + * ①在支撑类中找该属性的相关操作,对比每个操作与消息的语义相似度: + * + * 1、按行读取支撑类clsCode_unit的文件内容。 + * + * 2、查找每一行里是否包含属性sim_attri.getVariable(0).getNameAsString()。 + * + * 3、如果包含,则分析改行内的方法是否与消息msg_inSD相似。 + * + * @param msg_inSD + * + * @param uo_mapped_file_dir + */ + private static void trace_attri_in_supportingClsCode(FieldDeclaration sim_attri, Double attri_name_similarity, ClassOrInterfaceDeclaration clsCode_unit, Message msg_inSD, String msg, List res, String uo_mapped_file_dir) { + String[] clsCode_plainText = clsCode_unit.toString().split("\n"); + /* + * 1、按行读取支撑类clsCode_unit的文件内容。 + */ + for (String code_line : clsCode_plainText) { + /* + * 2、查找每一行里是否包含属性sim_attri.getVariable(0).getNameAsString()。 + * + * 忽略注释行 + */ + if (code_line.contains(sim_attri.getVariable(0).getNameAsString()) && !(code_line.contains("// ") || code_line.contains("* "))) { + /* + * 3、如果包含,则分析改行内的方法是否与消息msg_inSD相似。 + * + * 方法名:每个“(”前面直到第一个“.”或“ ”之间的字符串都是一个方法名。如果是“ ”,则还需检查前面是否是“new”的形式 + * + * 例外:if、for之类后面是空格加括号,要去除这类“无名方法” + */ + // 解析code_line中的方法名。由于语句中可能有多个方法,所以与每个方法都做对比,至少有一个相似的即可 + List methods_in_codeLine = parseMethodsInCL(code_line.trim()); + // 记录code_line中的方法与msg的相似度 + List> method_similarities = new ArrayList<>(); + for (String method_in_codeLine : methods_in_codeLine) { + double js = 0.0; + if (method_in_codeLine.startsWith("new ")) { + if (msg.equals("run")) { + js = jaccard_similarity(sim_attri.getVariable(0).getNameAsString(), method_in_codeLine.substring(4)); + } else { + js = jaccard_similarity(msg, method_in_codeLine.substring(4)); + } + } else { + js = jaccard_similarity(msg, method_in_codeLine); + } + method_similarities.add(Pair.createPair(method_in_codeLine, js)); + } + // 只要有一个方法跟msg相似,则返回本行作为追踪记录。追踪概率为attri_name_similarity*方法相似度 + double max_codeLine_sim = 0.0; + for (int i = 0; i < method_similarities.size(); i++) { + if (method_similarities.get(i).getRight() > max_codeLine_sim) { + max_codeLine_sim = method_similarities.get(i).getRight(); + } + } + if (max_codeLine_sim >= 0.625) { + // 把这句代码记下来作为msg_inSD的追踪结果之一 + TracedMessagePath trace_path = new TracedMessagePath(); + trace_path.path.add(Triple.createTriple(uo_mapped_file_dir, code_line.trim(), attri_name_similarity * max_codeLine_sim)); + msg_inSD.traced_path_ls.add(trace_path); + } + } + } + if (res.contains(msg_inSD)) { + res.set(res.indexOf(msg_inSD), msg_inSD); + } else { + res.add(msg_inSD); + } + System.out.println("检查完支撑类" + clsCode_unit.getNameAsString() + "中所有包含属性" + sim_attri.getVariable(0).getNameAsString() + "的代码行与消息" + msg + "的相似度了"); + } + + /** + * 方法名:每个“(”前面直到第一个“.”或“ ”之间的字符串都是一个方法名。 + * + * 例外:①if、for之类后面是空格加括号,要去除这类“无名方法”。②如果是“ ”,则还需检查前面是否是“new”的形式 + */ + private static List parseMethodsInCL(String code_line) { + List res = new ArrayList<>(); + if (code_line.contains("(")) { + // 方法名:每个“(”前面直到第一个“.”或“ ”之间的字符串都是一个方法名。 + String[] method_strs = code_line.split("\\("); + int i = 0; + do { + // 如果包含“.”,则直接找到方法名了 + if (method_strs[i].contains(".")) { + if (method_strs[i].charAt(method_strs[i].lastIndexOf(".") + 1) != ' ') { + res.add(method_strs[i].substring(method_strs[i].lastIndexOf(".") + 1)); + } else { + res.add(method_strs[i].substring(method_strs[i].lastIndexOf(" ") + 1)); + } + } + // 如果不包含“.”但包含“ ”,则需判断是否if、for、new之类。如果不是,则直接找到方法名了 + else if (method_strs[i].contains(" ")) { + // 判断空格前是否if、for、new之类 + String check_before_space = method_strs[i].substring(0, method_strs[i].lastIndexOf(" ")); + Pair if_for_new = if_for_new(check_before_space); + // 如果是,则还需检查前面是否是“new”的形式 + if (if_for_new.getLeft()) { + if (if_for_new.getRight().equals("new")) { + res.add("new " + method_strs[i].substring(method_strs[i].lastIndexOf(" ") + 1)); + } + } + // 如果不是,则直接找到方法名了 + else { + res.add(method_strs[i].substring(method_strs[i].lastIndexOf(" ") + 1)); + } + } else { + // 这是些奇奇怪怪的字符串,可能有方法、也可能没有。看它跟消息像不像吧 + res.add(method_strs[i]); + } + i++; + } while (i < method_strs.length - 1); + } + return res; + } + + /** + * 判断空格前是否if、for、new之类 + */ + private static Pair if_for_new(String check_before_space) { + Pair res = new Pair<>(false, "反正不是new"); + int str_length = check_before_space.length(); + if (str_length >= 2) { + // 从最后一个字符往前走两个,看是不是if + if (check_before_space.substring(str_length - 2).equals("if")) { + res.setLeft(true); + } + // 如果不是if,则从最后一个字符往前走三个,看是不是for或new + else if (str_length >= 3) { + if (check_before_space.substring(str_length - 3).equals("for")) { + res.setLeft(true); + } else if (check_before_space.substring(str_length - 3).equals("new")) { + res.setLeft(true); + res.setRight("new"); + } + } + } + return res; + } + + /** + * ②对属性类型对应的类,找其中与消息内容语义相似的方法。 + * + * @param msg_inSD + * @param clsCode_unit + */ + private static void trace_attri_in_typeClsCode(FieldDeclaration sim_attri, Double attri_name_sim, Pair attri_clsCode, Message msg_inSD, String msg, List res, ClassOrInterfaceDeclaration clsCode_unit) { + List methods_in_attriClsCode = attri_clsCode.getRight().getMethods(); + for (MethodDeclaration md : methods_in_attriClsCode) { + String md_name = md.getNameAsString(); + double sim = jaccard_similarity(md_name, msg); + // 这个阈值可以再改 + if (sim > 0.5) { + // 把这句代码记下来作为msg_inSD的追踪结果之一 + TracedMessagePath trace_path = new TracedMessagePath(); + trace_path.path.add(Triple.createTriple(attri_clsCode.getLeft(), md.getDeclarationAsString().trim(), attri_name_sim * sim)); + msg_inSD.traced_path_ls.add(trace_path); + } + } + if (res.contains(msg_inSD)) { + res.set(res.indexOf(msg_inSD), msg_inSD); + } else { + res.add(msg_inSD); + } + System.out.println("检查完支撑类" + clsCode_unit.getNameAsString() + "中属性" + sim_attri.getVariable(0).getNameAsString() + "的类型实现类中所有方法与" + msg + "的相似度了"); } /** @@ -119,10 +456,78 @@ public class CodeMessageTracer { * 2)计算每个方法与消息内容的语义相似度。找到相似的方法。 * * 3)①找该方法内是否有对消息另一端的调用,如参数、方法内的变量、语句等。②对与消息相似的方法,找到其所属的类,在这个类里看该方法的返回类型是否与另一端相似。 + * + * @param msg_inSD */ - private static void traceByMethod(List res, Pair, List> objs_in_SD, Map classFullName_javaFileDir_map) { - // TODO Auto-generated method stub + private static void traceByMethod(Message msg_inSD, List res, Pair, List> objs_in_SD, Map classFullName_javaFileDir_map) { + System.out.println("①对于" + msg_inSD.msg + ",开始针对消息两端对象和代码中的方法进行追踪"); + // 解析出msg中的方法名 + String method_in_msg = parseMethodInMsg(msg_inSD.msg); + /* + * 1)支撑类中阅读代码、记录所有方法(包括方法逻辑中用到的其他方法)。 + */ + UMLObject src_uo = msg_inSD.source; + UMLObject target_uo = msg_inSD.target; + // 遍历支撑类 + for (String uo_mapped_file_dir : src_uo.mapped_file_dir_ls) { + // uo_mapped_file_dir是支撑类的文件地址。根据文件地址,用javaparser去解析文件 + try { + CompilationUnit uo_mapped_file = StaticJavaParser.parse(new File(uo_mapped_file_dir)); + String cls_shortName = uo_mapped_file_dir.substring(uo_mapped_file_dir.lastIndexOf("\\") + 1, uo_mapped_file_dir.lastIndexOf(".")); + Optional o_class = uo_mapped_file.getClassByName(cls_shortName); + if (o_class.isPresent()) { + // 用javaparser得到的支撑类代码 + ClassOrInterfaceDeclaration clsCode_unit = o_class.get(); + // 记录支撑类中的方法 + List methods = clsCode_unit.getMethods(); + // 记录方法逻辑中用到的其他方法 + List> method_code_sim = new ArrayList<>(); + + /* + * 记录方法逻辑中用到的其他方法 + * + * 记录方法逻辑中用到的其他方法 + * + * 记录方法逻辑中用到的其他方法 + * + * 记录方法逻辑中用到的其他方法 + * + * 记录方法逻辑中用到的其他方法 + * + * 记录方法逻辑中用到的其他方法 + */ + for (MethodDeclaration _method : methods) { + + Optional o_body = _method.getBody(); + if (o_body.isPresent()) { + BlockStmt body = o_body.get(); + String[] codes = body.toString().split("\n"); + for (String code_line : codes) { + List methods_in_codeLine = parseMethodsInCL(code_line.trim()); + System.out.println(methods_in_codeLine.toString()); + } + } + } + /* + * 2)计算每个方法与消息内容的语义相似度。找到相似的方法。 + */ + for (MethodDeclaration _method : methods) { + + } + /* + * 3)①找该方法内是否有对消息另一端的调用,如参数、方法内的变量、语句等。 + * + * ②对与消息相似的方法,找到其所属的类,在这个类里看该方法的返回类型是否与另一端相似。 + */ + for (Triple tri : method_code_sim) { + } + } + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + System.out.println("对于" + msg_inSD.msg + ",完成针对对象和方法的追踪了"); } /** @@ -135,17 +540,42 @@ public class CodeMessageTracer { * * 3)对比链条两端与消息两端的对象。如果存在支撑类之类的关系,则认为消息两端应该是支撑类;否则仍保留链条两端作为消息两端。 * + * @param msg_inSD * @param res */ - private static void traceByMsg(List res, Pair, List> objs_in_SD, Map classFullName_javaFileDir_map) { + private static void traceByMsg(Message msg_inSD, List res, Pair, List> objs_in_SD, Map classFullName_javaFileDir_map) { // TODO Auto-generated method stub } + /** + * 计算两个字符串的Jaccard相似度 + */ + private static double jaccard_similarity(String string1, String string2) { + double result = 0.0; + // 两个字符串的字符集 + Set chars_of_string1 = string1.chars().boxed().collect(Collectors.toSet()); + Set chars_of_string2 = string2.chars().boxed().collect(Collectors.toSet()); + // 交集大小 + int intersection = SetUtils.intersection(chars_of_string1, chars_of_string2).size(); + // 并集大小 + int union = SetUtils.union(chars_of_string1, chars_of_string2).size(); + // Jaccard相似度 + if (union != 0) { + result = ((double) intersection) / ((double) union); + } + return result; + } + /** * 保存追踪结果 */ public static void save(List trace_msgs, String string) { - // TODO Auto-generated method stub - + for (Message msg : trace_msgs) { + for (TracedMessagePath traced_path : msg.traced_path_ls) { + for (Triple tri : traced_path.path) { + System.out.println(msg.msg + "有" + tri.getRight() + "概率追踪到" + tri.getLeft() + "的代码段" + tri.getMid()); + } + } + } } } diff --git a/src/main/java/com/hy/java/uct/sdtocode/util/TracedMessagePath.java b/src/main/java/com/hy/java/uct/sdtocode/util/TracedMessagePath.java index aead31edb5d3daae3ab50b7628138913ebcebfaf..36d66ce1b486a4887091599d45419b08b2642946 100644 --- a/src/main/java/com/hy/java/uct/sdtocode/util/TracedMessagePath.java +++ b/src/main/java/com/hy/java/uct/sdtocode/util/TracedMessagePath.java @@ -3,7 +3,7 @@ package com.hy.java.uct.sdtocode.util; import java.util.ArrayList; import java.util.List; -import com.hy.java.utility.common.Pair; +import com.hy.java.utility.common.Triple; /** * 消息的追踪结果 @@ -13,6 +13,6 @@ import com.hy.java.utility.common.Pair; * 每条链路有个总体的追踪概率 */ public class TracedMessagePath { - // 一条链路:由一系列组成的List - List> path = new ArrayList<>(); + // 一条链路:由一系列组成的List + public List> path = new ArrayList<>(); } diff --git a/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassDetector.java b/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassDetector.java index 98a75b5ee1df4fdfe38a34b2e269ab72642e894f..a161024f7493de552a30e8d8798b46d13e2094d2 100644 --- a/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassDetector.java +++ b/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassDetector.java @@ -35,27 +35,38 @@ public class ClassDetector { public void recog() { /* * 预处理 + * + * pre-processing */ // 读取图片。并灰度处理 + // read the image. and gray-scale Mat mat = Imgcodecs.imread(cd_path, Imgcodecs.IMREAD_GRAYSCALE); // 高斯锐化,提升类图图形清晰度 // Imgproc.Laplacian(mat, mat, 2); // 二值化,用于后续处理 + // thresholding Imgproc.threshold(mat, mat, 150, 255, Imgproc.THRESH_BINARY); /* * 识别类 + * + * recognize classes in the image */ // 矩形检测。识别所有矩形区域 + // detect rectangles. recognize all rectangle areas Pair> all_rect_areas_in_cd = detectRectArea(mat, 0.000555); // 对矩形进行整合,形成类区域 + // merge rectangles, forming classes List classes = mergeIntoClass(all_rect_areas_in_cd); // 对类区域进行文字检测 + // recognize texts within classes result = Pair.createPair(all_rect_areas_in_cd.getLeft(), detectText(Imgcodecs.imread(cd_path, Imgcodecs.IMREAD_GRAYSCALE), classes)); } /** * 检测方框 * + * detect rectangles + * * @param cls_diagram * @param ratio 检测最小矩形占全图面积的比例(0~1之间的一个小数)。如果类图中类的面积很小,则该比例应设的很小。 * @return @@ -64,41 +75,54 @@ public class ClassDetector { System.out.println("开始识别" + cd_path + "中所有矩形"); /* * 识别图中所有“轮廓”并存在contours中 + * + * recognize all "contours" in the image, and save them in the contours */ List contours = new ArrayList<>(); Imgproc.findContours(cls_diagram, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); /* * 对每个轮廓contour,检测是否为矩形,并将contour和检测结果存在Rectangle中。所有轮廓的检测结果最终存在all_rect_areas中 + * + * for each contour, check whether it is a rectangle, then save both the contour and the check result in Rectangle. All check results for contours are saved in all_rect_areas */ List all_rect_areas = new ArrayList<>(); // 这个rect_contours里只存那些检测结果为矩形的contour,用于后面边框涂白,防止矩形干扰关系符号和关系线识别 + // only rectangle contours are saved in rect_contours, for wiping, in order to avoid their influence on relationship recognition List rect_contours = new ArrayList<>(); // 根据图片像素计算轮廓面积阈值。如果轮廓面积太小或太大,则直接忽略 + // calculating the threshold for contour_area according to the image size. If the contour_area is too big or small, ignore it long cd_area = cls_diagram.width() * cls_diagram.height(); double min_cls_area = 60; double max_cls_area = cd_area * 0.5; for (MatOfPoint contour : contours) { // 如果轮廓面积太小或太大,则直接忽略 + // If the contour_area is too big or small, ignore it double contour_area = Imgproc.contourArea(contour); if (contour_area < min_cls_area || contour_area > max_cls_area) { continue; } // 如果轮廓面积合适,则检测是否为矩形。采用多边形逼近法,将轮廓转化为curve向多边形做逼近,并将逼近结果存在approx_curve中 + // If the contour_area is ok, then check if it is a rectangle based on polygon approximation MatOfPoint2f curve = new MatOfPoint2f(contour.toArray()); MatOfPoint2f approx_curve = new MatOfPoint2f(); Imgproc.approxPolyDP(curve, approx_curve, 0.01 * Imgproc.arcLength(curve, true), true); // 针对逼近结果approx_curve,若其共有4个顶点,则可认为是矩形。将检测结果绘制在原图中,便于后续处理 + // if the approximation has 4 vertices, it can be considered as a rectangle if (approx_curve.toArray().length == 4) { // 将矩形存在all_rect_areas中用于类的整合和文字识别。存完后将其从原图中抹掉,防止其干扰关系符号和关系线识别 + // save the rectangle in all_rect_areas for merging and text recognition, then wipe it all_rect_areas.add(new Rectangle(cls_diagram.clone(), contour, approx_curve)); rect_contours.add(contour); // 存完后将矩形从图中抹掉(涂白)。后面还需对所有边框进行涂白 + // wipe the rectangle to avoid its influence on relationship recognition Imgproc.fillConvexPoly(cls_diagram, contour, new Scalar(255, 255, 255)); } } // 对所有矩形边框进行涂白,防止其干扰关系符号和关系线识别 + // wipe the rectangle to avoid its influence on relationship recognition Imgproc.drawContours(cls_diagram, rect_contours, -1, new Scalar(255, 255, 255), 5); // 由于灰度问题,此时可能会有遗留矩形,所以重新检测一遍 + // due to the gray-scale problem, there may be other rectangles, so detect them again. all the following process are the same contours.clear(); rect_contours.clear(); Imgproc.findContours(cls_diagram, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); @@ -149,26 +173,42 @@ public class ClassDetector { * 如果是,则再看两个左上角的纵坐标距离是否大致等于“上面矩形”的高度 * * 如果是,则纳入同一个类中 + * + * The process: + * + * For current rectangle, assign it to a class. Then record the current rectangles belonged to this class + * + * Then, for the rectangle's left-top, check if there are any other rectangles that is horizontally 3~5 px nearby + * + * If so, then check if the difference of the vertical coordinates of these two rectangles' left-tops are about the height of the ``top rect'' + * + * If so, then the new rectangle is included into the same class */ for (int all_rect_index = 0; all_rect_index < rect_area_list.size(); all_rect_index++) { Rectangle current_rect = rect_area_list.get(all_rect_index); // 如果当前rect已经属于某个类,则跳过它 + // if current rect is already belonged to a class, ignore it if (current_rect.within_cls_obj) { continue; } // 如果当前rect不属于任何类,则将其赋给一个新的类 + // otherwise, assign it to a class UMLClass uml_class = new UMLClass(); uml_class.list.add(current_rect); current_rect.within_cls_obj = true; // 这个temp_rect用于每次拼接之后更新类的总大小。实际每次都是用temp_rect与其他未拼接的类作比较 + // this temp_rect is for updating the class's size uml_class.temp_rect = current_rect.clone(); // 对当前类的temp_rect,与all_rect_areas中其他所有矩形做比较 + // compare the temp_rect and other rects in all_rect_areas for (int j = all_rect_index + 1; j < rect_area_list.size(); j++) { // 获取all_rect_areas列表中位于current_rect后面的矩形 Rectangle other_rect = rect_area_list.get(j); // 针对该矩形的左上角,看是否与其他矩形的左上角横坐标相差不超过3~5个像素 + // for the rectangle's left-top, check if there are any other rectangles that is horizontally 3~5 px nearby if (Math.abs(other_rect.tl().x - uml_class.temp_rect.tl().x) <= 5) { // 如果是,则再看两个左上角的纵坐标距离是否大致等于“上面矩形”的高度 + // If so, then check if the difference of the vertical coordinates of these two rectangles' left-tops are about the height of the ``top rect'' // “上面矩形”是temp_rect if (uml_class.temp_rect.tl().y < other_rect.tl().y) { if (other_rect.tl().y - uml_class.temp_rect.tl().y - uml_class.temp_rect.height <= 5) { @@ -192,6 +232,7 @@ public class ClassDetector { } } // 将列表里的矩形按上、中、下排序 + // sort the rects in the class into top, mid, and bottom uml_class.list.sort(new Comparator() { @Override public int compare(Rectangle r1, Rectangle r2) { @@ -221,6 +262,8 @@ public class ClassDetector { } /* * 此时会出现top其实是整个矩形的情况。需将top、mid、bottom重新分割 + * + * sometimes the top is actually same as the whole. In this case, we should cut the top, mid, and bottom */ if (uml_class.mid != null) { if (uml_class.bottom != null) { @@ -243,6 +286,7 @@ public class ClassDetector { // 计算类框整体大小 int whole_height = 0; // 先将whole的高度设置为top的高度 + // set the height of whole the same as top whole_height += uml_class.top.height; if (uml_class.mid != null) { whole_height += uml_class.mid.height; @@ -265,6 +309,7 @@ public class ClassDetector { instance.setDatapath(UMLDiagramRecognizer.tessdata_path); try { // 将uc中的每个区域写入临时文件,然后识别临时文件中的文字 + // cut uc into a temp file, then recognize texts in the temp file if (uc.top != null) { Mat img = ImgProcessor.cutImage(cls_diagram, uc.top); if (!img.empty()) { @@ -287,7 +332,6 @@ public class ClassDetector { uc.setMethodsStr(instance.doOCR(new File(temp_res_path))); } } catch (TesseractException e) { - // TODO Auto-generated catch block e.printStackTrace(); } } diff --git a/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassRelationDetector.java b/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassRelationDetector.java index 3b19a9b46923edcd28b6f7236a098e2ba8228256..a635e62418f5161014234987d1163e6b8d028630 100644 --- a/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassRelationDetector.java +++ b/src/main/java/com/hy/java/uct/umlrecog/cddetector/ClassRelationDetector.java @@ -37,10 +37,13 @@ public class ClassRelationDetector { public void recog() { // 先检测实线。检测结果需借助类的位置关系、去除“单纯的线” + // first, detect all solid lines. The detection need the location info of classes, in order to eliminate "pure lines" Set solid_lines = detectLines(classes_in_cd, 0.0504, true); // 再检测虚线。虚线检测除了间隔设置与实线不同外,其他完全一样 + // second, detect all dash lines. Only the threshold are not the same as solid liness Set dash_lines = detectLines(classes_in_cd, 0.0504, false); // 再检测关系类型。由于会遍历所有关系,所以顺便将关系存在classes_in_cd中每个UMLClass的关系列表中 + // third, detect relationship types, and save all the results in classes_in_cd result = detectRelationType(classes_in_cd, solid_lines, dash_lines, 0.0001); } @@ -55,27 +58,35 @@ public class ClassRelationDetector { List UML_classes = classes_in_cd.getRight(); /* * 先检测边缘,然后从边缘集中初步检测“原始直线” + * + * detect all edges, then detect "raw lines" in the set of edges */ List line_segments = new ArrayList<>(); // 先检测边缘。然后从边缘集中检测直线 + // detect all edges Mat edges = new Mat(); Imgproc.Canny(cls_diagram, edges, 50, 50 * 3, 3, true); // 从边缘集中检测直线。膨胀(将线外面的空白区域膨胀)、腐蚀(让线侵蚀周围的空白区域),可以提高识别准确率 + // detect "raw lines" in the set of edges. dilation and erosion are used to improve accuracy Imgproc.dilate(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1.7, 1.7)), new Point(-1, -1), 2); Imgproc.erode(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1, 1)), new Point(-1, -1), 1); Mat lines = new Mat(); if (detect_solid) { // 实线检测时,“最小直线长度”与图像像素有关;“最大像素间隔”越小越接近实线 + // the "minLineLength" is related to the pixels; the smaller "maxLineGap" is, the closer to solid lines Imgproc.HoughLinesP(edges, lines, 1, Math.PI / 180, 8, 29, 2); } else { Imgproc.HoughLinesP(edges, lines, 1, Math.PI / 180, 8, 29, 10); } // 保存所有刚检测出来的“原始直线” + // save all "raw lines" for (int i = 0; i < lines.rows(); i++) { line_segments.add(new Line(new Point(lines.get(i, 0)[0], lines.get(i, 0)[1]), new Point(lines.get(i, 0)[2], lines.get(i, 0)[3]))); } /* * 检查“原始直线”集中是否有重合直线,去除冗余或合并 + * + * check if the "raw lines" are overlapped, or crossed, or broken */ int current_size = line_segments.size(); for (int i = 0; i < current_size; i++) { @@ -84,17 +95,21 @@ public class ClassRelationDetector { for (int j = i + 1; j < current_size; j++) { Line other_line = line_segments.get(j); // 下列情况算为一条线:“两端都接近”;或其中一端接近、且两直线斜率接近;或有一部分重合 + // such cases are considered as one line: "both the two endpoints are close"; or one side of the endpoints are close, and the K of two lines are close; or two lines are overlapped if (line.isCoincideWith(other_line)) { other_line.should_be_del = true; } } // 去除冗余;合并同斜率 + // delete redundant lines; merge lines with same K for (int j = i + 1; j < current_size;) { Line other_line = line_segments.get(j); if (other_line.should_be_del) { // 将两条直线合并为一条长直线,存在line中 + // merge coincide lines line_segments.set(i, mergeCoincideLines(line, other_line)); // 去除冗余 + // delete redundant lines line_segments.remove(other_line); current_size = line_segments.size(); } else { @@ -103,6 +118,7 @@ public class ClassRelationDetector { } } // 此时可以还有“斜率接近,但端点不接近”的线段,如果这种线段间距很小,则也合并 + // now there might exist lines that are "with similar K, but the endpoints are not close". If the distance between endpoints are small, then merge them current_size = line_segments.size(); for (int i = 0; i < current_size; i++) { Line line = line_segments.get(i); @@ -116,12 +132,15 @@ public class ClassRelationDetector { } } // 去除冗余;合并同斜率 + // delete redundant lines; merge lines with same K for (int j = i + 1; j < current_size;) { Line other_line = line_segments.get(j); if (other_line.should_be_del) { // 将两条直线合并为一条长直线,存在line中 + // merge coincide lines line_segments.set(i, mergeCoincideLines(line, other_line)); // 去除冗余 + // delete redundant lines line_segments.remove(other_line); current_size = line_segments.size(); } else { @@ -131,25 +150,35 @@ public class ClassRelationDetector { } /* * 对所有实线,将可能的“折线”识别并合并。合并为折线的直线,其belonged_polygonal_line会标识其所属折线 + * + * for all solid lines, detect possible "polygonal lines".belonged_polygonal_line is the flag to trace the polygonal line */ Set poly_lines = mergePolygonalLines(line_segments, cls_diagram); /* * 利用UML_classes的位置,进一步筛选关系线,存在result中 * * 针对每条候选关系线,计算每个类方框的4根边框线,看哪个边框线与当前候选关系线相交(允许一定像素的误差),则认为这条线确实是关系线,且属于相交的类 + * + * first, use the location of UML_classes to filter true relationship lines, save into result + * + * for each line, check to see if it coincide with at least one edge of any one rectangle (allow pixel errors). If so, then it is a true relationship line */ for (PolygonalLine pl : poly_lines) { ImgRelation rela = new ImgRelation(pl); // 对pl.pt1和pl.pt2,需要将其延直线方向延长一点,检查延长后的端点。 + // extend the pl.pt1 and pl.pt2 and check it for (UMLClass uml_class : UML_classes) { // 检查延长后的端点 + // check the extended endpoint if (uml_class.whole.contains(ImgProcessor.reachPt(pl.pt1, pl.l1, 35, false))) { // 暂时将与pt1相近的类记为source、与pt2相近的类为target,后面识别出符号来后再调 + // record the class near pt1 as source, the class near pt2 as target. This temp result will be adjusted after the relationship type recognition rela.source = uml_class; } if (pl.pt2 != null) { if (uml_class.whole.contains(ImgProcessor.reachPt(pl.pt2, pl.l2, 35, false))) { // 暂时将与pt1相近的类记为source、与pt2相近的类为target,后面识别出符号来后再调 + // record the class near pt1 as source, the class near pt2 as target. This temp result will be adjusted after the relationship type recognition rela.target = uml_class; } } else { @@ -162,14 +191,18 @@ public class ClassRelationDetector { } /* * 在图中抹掉关系线,防止其影响后续识别 + * + * wipe all relationship lines, in order to avoid their influence on the relationship type recognition */ if (detect_solid) { // 实线识别结果 + // the solid line recognition result for (ImgRelation rela : result) { // 两端坐标 // System.out.println("(" + rela.poly_line.pt1.x + ", " + rela.poly_line.pt1.y + ")" + "(" + rela.poly_line.pt2.x + ", " + rela.poly_line.pt2.y + // ")"); // 在图中抹掉已识别的线,防止干扰后续识别 + // wipe all relationship lines for (Line segment : rela.poly_line.line_list) { Imgproc.line(cls_diagram, segment.pt1, segment.pt2, new Scalar(255, 255, 255), 5); } @@ -180,11 +213,13 @@ public class ClassRelationDetector { System.out.println("完成对" + cd_path + "中的实线识别,共" + result.size() + "条"); } else { // 虚线识别结果 + // the dash line recognition result for (ImgRelation rela : result) { // 两端坐标 // System.out.println("(" + rela.poly_line.pt1.x + ", " + rela.poly_line.pt1.y + ")" + "(" + rela.poly_line.pt2.x + ", " + rela.poly_line.pt2.y + // ")"); // 在图中抹掉已识别的线,防止干扰后续识别 + // wipe all relationship lines for (Line segment : rela.poly_line.line_list) { Imgproc.line(cls_diagram, segment.pt1, segment.pt2, new Scalar(255, 255, 255), 5); } @@ -199,6 +234,8 @@ public class ClassRelationDetector { /** * 将斜率接近的两条直线合并成“最长”的一条线,并存在line中 + * + * merge coincide lines into one "longest line", save into line */ private Line mergeCoincideLines(Line line, Line other_line) { // 两条线的4个端点共可能有4C2条直线,把最长的一条存在line中即可 @@ -220,6 +257,8 @@ public class ClassRelationDetector { /** * 返回一列直线中最长的直线 + * + * get the longest line */ private static Line longestLine(List lines) { lines.sort(new Comparator() { @@ -234,6 +273,8 @@ public class ClassRelationDetector { /** * 将可能的“折线”识别并合并 * + * merge possible "polygonal lines" + * * @param cls_diagram */ private Set mergePolygonalLines(List lines, Mat cls_diagram) { @@ -241,6 +282,7 @@ public class ClassRelationDetector { Set init_result = new HashSet<>(); Set temp_result = new HashSet<>(); // 从第一条线开始,检查当前线与每条后面的线是否能合并 + // start from the first line, check to see if it can be merged with the lines behind it for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); if (line.pt1_in_polyline && line.pt2_in_polyline) { @@ -248,10 +290,12 @@ public class ClassRelationDetector { } if (line.belonged_poly == null) { // 将当前直线作为一条新折线的一段 + // treat current line as a segment of a new polygonal line line.belonged_poly = new PolygonalLine(line); init_result.add(line.belonged_poly); } // 检查当前线与每条后面的线是否能合并 + // check to see if it can be merged with the lines behind it for (int j = i + 1; j < lines.size(); j++) { Line other_line = lines.get(j); Pair pt1_other_line = Line.ptNearLine(line.pt1, other_line); @@ -260,28 +304,35 @@ public class ClassRelationDetector { if (pt1_other_line.getRight() == other_line.pt1) { if (Line.ptNearPt(line.pt1, other_line.pt1, false, false, other_line)) { // 在折线的线段list中添加other_line + // add other_line in the list of polygonal line line.belonged_poly.line_list.add(other_line); // 更新line和other_line的端点状态 + // update the endpoint of line and other_line line.pt1_in_polyline = true; other_line.pt1_in_polyline = true; // 顺便给other_line也存上折线 + // save other_line into polygonal line other_line.belonged_poly = line.belonged_poly; } } else { if (Line.ptNearPt(line.pt1, other_line.pt2, false, false, other_line)) { // 在折线的线段list中添加other_line + // add other_line in the list of polygonal line line.belonged_poly.line_list.add(other_line); // 更新line和other_line的端点状态 + // update the endpoint of line and other_line line.pt1_in_polyline = true; other_line.pt2_in_polyline = true; // 顺便给other_line也存上折线 + // save other_line into polygonal line other_line.belonged_poly = line.belonged_poly; } } } // 同上,只是换一个端点 + // all the same as the upper one, just another endpoint else if (pt2_other_line.getLeft() == true) { if (pt2_other_line.getRight() == other_line.pt1) { if (Line.ptNearPt(line.pt2, other_line.pt1, false, false, other_line)) { @@ -309,6 +360,7 @@ public class ClassRelationDetector { } } // 清理过长的线(比如边框线) + // wipe lines that are too long (such as the edge of the whole class diagram image) for (PolygonalLine pl : init_result) { boolean should_be_del = false; for (Line l : pl.line_list) { @@ -322,6 +374,7 @@ public class ClassRelationDetector { } } // 设置每条折线的两端。同时记录两端的点所在的线段 + // set the two endpoints of the polygonal line. record the segments in the meantime for (PolygonalLine pl : temp_result) { // 看一下每条折线,最后注释掉 /* @@ -331,12 +384,14 @@ public class ClassRelationDetector { Point pt = l.notInPolyPt(); if (pt != null) { // 如果该直线两端都“暂时不在折线中”,说明这是根单独的线。将其两端设置为折线两端即可 + // if the two endpoints of this line are "not in a polygonal line", then consider it as a polygonal line with only one segment if (pt.x == 23579 && pt.y == 59873572) { pl.pt1 = l.pt1; pl.pt2 = l.pt2; pl.l1 = l; pl.l2 = l; // 无需再检查(也没得可检查,因为pl.line_list中只有这一根线) + // no need to check break; } else { if (pl.pt1 == null) { @@ -351,6 +406,7 @@ public class ClassRelationDetector { } } // 此时仍有折线两端都是null,剔除掉 + // remove all polygonal lines that the endpoints are null for (PolygonalLine pl : temp_result) { if (pl.pt1 != null || pl.pt2 != null) { result.add(pl); @@ -362,6 +418,8 @@ public class ClassRelationDetector { /** * 识别关系符号,并根据关系符号与关系线端点的位置关系更新关系线的source和target * + * recognize the relationship type, and update the source and target + * * @param classes_in_cd * @param dash_lines * @param solid_lines @@ -374,35 +432,46 @@ public class ClassRelationDetector { Mat origin_cls_diagram = Imgcodecs.imread(cd_path, Imgcodecs.IMREAD_GRAYSCALE); /* * 识别图中所有“轮廓”并存在contours中 + * + * detect all "contours" in the image and save in the contours */ // 先腐蚀(让符号像素侵蚀周围空间,得以连接起来)、后膨胀(让点周围的空白区域膨胀,从而去掉孤立的噪点),提高识别准确率 + // erosion, then dilation, in order to increase the detection accuracy Imgproc.erode(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3.5, 3.5)), new Point(-1, -1), 4); Imgproc.dilate(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1.7, 1.7)), new Point(-1, -1), 5); List contours = new ArrayList<>(); Imgproc.findContours(cls_diagram, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); /* * 对每个轮廓contour,检测是否为某种关系符号 + * + * for each contour, check to see if it is a relationship type symbol */ // 根据图片像素计算轮廓面积阈值。如果轮廓面积太小或太大,则直接忽略 + // calculating the threshold for contour_area according to the image size. If the contour_area is too big or small, ignore it long cd_area = cls_diagram.width() * cls_diagram.height(); double min_area = cd_area * ratio; double max_area = cd_area * 0.5; for (MatOfPoint contour : contours) { // 如果轮廓面积太小或太大,则直接忽略 + // If the contour_area is too big or small, ignore it double contour_area = Imgproc.contourArea(contour); if (contour_area < min_area || contour_area > max_area) { // 将噪点直接涂白 + // wipe the noise points Imgproc.fillConvexPoly(cls_diagram, contour, new Scalar(255, 255, 255)); continue; } // 如果轮廓面积合适,则用矩形将其包裹,并裁剪原图中的矩形区域、重新识别符号 + // If the contour_area is ok, then make a rectangle to cover it, and cut the rectangle into a new image MatOfPoint2f curve = new MatOfPoint2f(contour.toArray()); MatOfPoint2f approx_curve = new MatOfPoint2f(); Imgproc.approxPolyDP(curve, approx_curve, 0.01 * Imgproc.arcLength(curve, true), true); // 获取包络矩形 + // get the bounding rect Rect rect_containing_rela_type = Imgproc.boundingRect(new MatOfPoint(approx_curve.toArray())); // 如果包络矩形包含某一条关系线的某一端,则检测该矩形内的符号 // 对于虚线,由于只可能是实现或生命线,所以只需要知道是否包含即可,不用检测矩形内部图形。belong_to_dash_rela为true表示当前关系符号属于某条虚线关系线 + // check the relationship type symbol in the bounding rect boolean belong_to_dash_rela = false; for (ImgRelation dash_rela : dash_lines) { // 如果dash_rela.source_pt_index != -1,则说明该dash_rela已经与某一包络矩形检测并对应上了,则跳过 @@ -410,11 +479,14 @@ public class ClassRelationDetector { continue; } // 对线的每个端点,检查其本身、延伸、反向延伸这三个点是否被包含于包络矩形中。如果包含,则belong_to_dash_line = true; + // for each endpoint of the line, check to see if itself, the extension, and the backward extension are in the bounding rect. If so, then belong_to_dash_line = true; if (rect_containing_rela_type.contains(dash_rela.poly_line.pt1) || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt1, dash_rela.poly_line.l1, 5, false)) || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt1, dash_rela.poly_line.l1, 5, true))) { // 包络矩形确实包含关系线的端点pt1 + // If so belong_to_dash_rela = true; // 重新设置虚线的source和target。由于目前虚线的端点pt1必须对应target类,所以如果虚线的原source类对应了pt1,则需对换source和target + // re-set the source and target. swap them if needed if (dash_rela.source.whole.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt1, dash_rela.poly_line.l1, 35, false))) { UMLClass temp = dash_rela.source; dash_rela.source = dash_rela.target; @@ -423,6 +495,7 @@ public class ClassRelationDetector { dash_rela.source_pt_index = 2; dash_rela.type = "实现"; // 记录当前关系到UMLClass里 + // record it in the UMLClass dash_rela.source.out_relas.add(dash_rela); dash_rela.target.in_relas.add(dash_rela); break; @@ -431,6 +504,7 @@ public class ClassRelationDetector { || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt2, dash_rela.poly_line.l2, 5, true))) { belong_to_dash_rela = true; // 重新设置虚线的source和target。由于目前虚线的端点pt2必须对应target类,所以如果虚线的原source类对应了pt2,则需对换source和target + // re-set the source and target. swap them if needed if (dash_rela.source.whole.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt2, dash_rela.poly_line.l2, 35, false))) { UMLClass temp = dash_rela.source; dash_rela.source = dash_rela.target; @@ -439,6 +513,7 @@ public class ClassRelationDetector { dash_rela.source_pt_index = 1; dash_rela.type = "实现"; // 记录当前关系到UMLClass里 + // record it in the UMLClass dash_rela.source.out_relas.add(dash_rela); dash_rela.target.in_relas.add(dash_rela); break; @@ -447,27 +522,34 @@ public class ClassRelationDetector { dash_rela.source_pt_index = 1; dash_rela.type = "实现"; // 记录当前关系到UMLClass里 + // record it in the UMLClass dash_rela.source.out_relas.add(dash_rela); dash_rela.target.in_relas.add(dash_rela); break; } } // 如果该关系符号不属于任何虚线关系,则检测其是否属于某实线关系 + // check the solid line relationship type if (!belong_to_dash_rela) { // 对于实线,需检测矩形内部图形,识别继承、聚合 + // check the shape within the rect for (ImgRelation solid_rela : solid_lines) { // 如果solid_line.source_pt_index != -1,则说明该solid_line已经与某一包络矩形检测并对应上了,则跳过 if (solid_rela.source_pt_index != -1) { continue; } // 记录该包络矩形是否属于当前实线关系线solid_rela。如果是的话,再去检测矩形内图形 + // check wether the bounding rect contains the solid_rela boolean belong_to_this_solid_rela = false; // 方法同对dash_rela的检测。对线的每个端点,检查其本身、延伸、反向延伸这三个点是否被包含于包络矩形中。 + // for each endpoint of the line, check to see if itself, the extension, and the backward extension are in the bounding rect. If so, then belong_to_dash_line = true; if (rect_containing_rela_type.contains(solid_rela.poly_line.pt1) || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt1, solid_rela.poly_line.l1, 5, false)) || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt1, solid_rela.poly_line.l1, 5, true))) { // 包络矩形确实包含关系线的端点pt1 + // If so belong_to_this_solid_rela = true; - // 重新设置虚线的source和target。由于目前虚线的端点pt1必须对应target类,所以如果虚线的原source类对应了pt1,则需对换source和target + // 重新设置实线的source和target。由于目前实线的端点pt1必须对应target类,所以如果实线的原source类对应了pt1,则需对换source和target + // re-set the source and target. swap them if needed if (solid_rela.source.whole.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt1, solid_rela.poly_line.l1, 35, false))) { UMLClass temp = solid_rela.source; solid_rela.source = solid_rela.target; @@ -477,7 +559,8 @@ public class ClassRelationDetector { } else if (rect_containing_rela_type.contains(solid_rela.poly_line.pt2) || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt2, solid_rela.poly_line.l2, 5, false)) || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt2, solid_rela.poly_line.l2, 5, true))) { belong_to_this_solid_rela = true; - // 重新设置虚线的source和target。由于目前虚线的端点pt2必须对应target类,所以如果虚线的原source类对应了pt2,则需对换source和target + // 重新设置实线的source和target。由于目前实线的端点pt2必须对应target类,所以如果实线的原source类对应了pt2,则需对换source和target + // re-set the source and target. swap them if needed if (solid_rela.source.whole.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt2, solid_rela.poly_line.l2, 35, false))) { UMLClass temp = solid_rela.source; solid_rela.source = solid_rela.target; @@ -486,35 +569,44 @@ public class ClassRelationDetector { solid_rela.source_pt_index = 1; } // 目前该关系符号确实属于当前实线关系solid_rela,所以需检查其是聚合还是继承 + // now the relationship type symbol belongs to the solid_rela. check to see whether it is generalization if (belong_to_this_solid_rela) { String type = "依赖"; // 裁剪原图中包含关系符号的矩形区域 + // cut the original area containing the relationship type symbol Mat cutted_origin_rela_type_area = ImgProcessor.cutImage(origin_cls_diagram, rect_containing_rela_type); Imgproc.erode(cutted_origin_rela_type_area, cutted_origin_rela_type_area, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2)), new Point(-1, -1), 2); Imgproc.dilate(cutted_origin_rela_type_area, cutted_origin_rela_type_area, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1, 1)), new Point(-1, -1), 2); // 针对包含关系符号的区域,重新识别各种“轮廓”,从而发现真正的关系符号 + // re-detect all "contours" to find real relationship type symbol List origin_contours = new ArrayList<>(); Imgproc.findContours(cutted_origin_rela_type_area, origin_contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); // 针对识别出来的各种“轮廓”做筛选 + // filter the "contours" for (MatOfPoint origin_contour : origin_contours) { MatOfPoint2f origin_curve = new MatOfPoint2f(origin_contour.toArray()); MatOfPoint2f origin_approx_curve = new MatOfPoint2f(); // 对“轮廓”做二次逼近,获取关系符号形状。第一次逼近可以误差大一点,得到大致形状;第二次逼近误差要小一点,得到精确形状 + // the double-approximation, to get the shape of the relationship type symbol. The first approximation stretches the shape; the second approximation is more delicate Imgproc.approxPolyDP(origin_curve, origin_approx_curve, 0.05 * Imgproc.arcLength(origin_curve, false), true); Imgproc.approxPolyDP(origin_approx_curve, origin_approx_curve, 0.01 * Imgproc.arcLength(origin_approx_curve, true), true); double threshold = BizaareSituations.getThreshold(1, 1, 0, 1, 1, 1, 0); // double threshold = BizaareSituations.getThreshold(1, 1, 0, 1, 1, 0, 0); // double threshold = BizaareSituations.getThreshold(0.2, 0.3, 0.5, 0.55, 0.6, 0.1, 0.03); // 如果逼近得到三角形,则基本就是继承了,再检查一下位置即可(其实检不检查作用不大) + // if we get a triangle, then it is a generalization if (origin_approx_curve.toArray().length <= 5) { // 获取三角形的包络矩形 + // get the bounding rect of the triangle Rect rect_possible_containing_ext = Imgproc.boundingRect(new MatOfPoint(origin_approx_curve.toArray())); // 检查包络矩形rect_possible_containing_ext是否真的包含当前关系线solid_rela的target端点的延长线。如果是的话,则确定是继承关系 // 先找到关系线solid_rela的target端点和对应线段。然后检查target端点的延长与包络矩形的关系 + // check to see if the rect_possible_containing_ext contains the extension of the solid_rela's endpoint Point target_point = solid_rela.poly_line.pt1; Line target_line = solid_rela.poly_line.l1; if (solid_rela.source_pt_index == 1) { // 如果source端点是1,则target端点是pt2 + // if the endpoint of source is 1, then the endpoint of target is pt2 target_point = solid_rela.poly_line.pt2; target_line = solid_rela.poly_line.l2; } @@ -534,6 +626,7 @@ public class ClassRelationDetector { temp_pt2.y -= rect_containing_rela_type.y; if (rect_possible_containing_ext.contains(temp_pt0) || rect_possible_containing_ext.contains(temp_pt1) || rect_possible_containing_ext.contains(temp_pt2)) { // 看图是否异常 + // some unexpected situations may influence the recognition if (threshold <= BizaareSituations.rect_containing_tri_to_be_ext) { type = "继承"; } else { diff --git a/src/main/java/com/hy/java/uct/util/sd/Message.java b/src/main/java/com/hy/java/uct/util/sd/Message.java index b95b47bf497e7dcb438e20b76b09918cc91e5960..75f939e50c7f0fa3938e319cee209414ab2373da 100644 --- a/src/main/java/com/hy/java/uct/util/sd/Message.java +++ b/src/main/java/com/hy/java/uct/util/sd/Message.java @@ -37,7 +37,7 @@ public class Message { * * 每条链路有个总体的追踪概率 */ - List traced_path_ls = new ArrayList<>(); + public List traced_path_ls = new ArrayList<>(); public Message() { }