diff --git a/src/main/java/com/hy/java/uct/umlrecog/SequenceDiagramRecognizer.java b/src/main/java/com/hy/java/uct/umlrecog/SequenceDiagramRecognizer.java index 2e719942e32c9f90c0d629289b3363eec74957f0..d25d6847497fa88b9455be71fb39fbfa36e24632 100644 --- a/src/main/java/com/hy/java/uct/umlrecog/SequenceDiagramRecognizer.java +++ b/src/main/java/com/hy/java/uct/umlrecog/SequenceDiagramRecognizer.java @@ -1,9 +1,102 @@ package com.hy.java.uct.umlrecog; +import java.util.List; + +import org.opencv.core.Core; +import org.opencv.core.Mat; + +import com.hy.java.uct.umlrecog.cddetector.ClassDetector; +import com.hy.java.uct.umlrecog.cddetector.ClassRelationDetector; +import com.hy.java.uct.umlrecog.util.Relation; +import com.hy.java.uct.umlrecog.util.UMLClass; +import com.hy.java.utility.common.FileEditor; +import com.hy.java.utility.common.Pair; +import com.hy.java.utility.common.Traverser; +import com.hy.java.utility.common.Traverser.FileNode; + public class SequenceDiagramRecognizer { - public static void recog(String string, String string2) { - // TODO Auto-generated method stub + private static String temp_res_path = null; + + public static void recog(String cd_dir, String repo_name) { + // 遍历cd_dir下的文件,寻找repo_name对应的图片,存在repo_cd_path中 + String repo_cd_path = findCD(cd_dir, repo_name); + if (repo_cd_path != null) { + // 导入OpenCV库,开始识别 + System.loadLibrary(Core.NATIVE_LIBRARY_NAME); + /* + * 识别类图中的类 + */ + ClassDetector cls_detector = new ClassDetector(repo_cd_path); + cls_detector.recog(); + Pair> classes = cls_detector.getResult(); + /* + * 识别类图中的关系 + */ + ClassRelationDetector cls_relation_detector = new ClassRelationDetector(repo_cd_path, classes); + cls_relation_detector.recog(); + Pair> classes_with_relations = cls_relation_detector.getResult(); + /* + * 目前classes_with_relations中包含所有类。每个类包含这些信息:类名、属性、方法;关系(分为in和out) + * + * 将所有类整合为一个txt,以每个类为划分,包含每个类的信息 + */ + // 图包路径+项目名称+.txt + FileEditor fe = new FileEditor(cd_dir + repo_name.replaceAll("/", "_") + ".txt"); + List class_list = classes_with_relations.getRight(); + for (UMLClass UML_class : class_list) { + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 想想写的格式!!!!!!!!! + // 比如@、#、¥、%等分隔符等级 + fe.write(UML_class.getTitle() + "@", true); + fe.write(UML_class.getAttrisStr() + "@", true); + fe.write(UML_class.getMethodsStr() + "@", true); + for (Relation out_rel : UML_class.out_relas) { + + } + for (Relation in_rel : UML_class.in_relas) { + + } + } + /* + * 看一下识别情况。可注释掉 + */ + /* + * for (UMLClass UML_class : class_list) { Imgproc.rectangle(classes_with_relations.getLeft(), UML_class.whole, new Scalar(5, 5, 5), 3); } temp_res_path = "把识别结果画进这张图里"; + * Imgcodecs.imwrite(temp_res_path, classes_with_relations.getLeft()); + */ + + } else { + System.err.println("不存在" + repo_name + "的类图"); + } + } + private static String findCD(String cd_dir, String repo_name) { + String result = null; + // 用repo_name去查找图片时所使用的字符串 + String search_string = cd_dir + "cd-" + repo_name.replaceAll("/", "_"); + // 遍历cd_dir下的文件 + List cd_dir_files = Traverser.traverseDir(cd_dir).children; + for (FileNode cd_dir_file : cd_dir_files) { + // 过滤掉result.txt文件,只找图片 + if (cd_dir_file.path.equals(cd_dir + "result.txt")) { + continue; + } + // 针对图片名,去掉后缀后,与search_string做匹配 + if (search_string.equals(cd_dir_file.path.substring(0, cd_dir_file.path.lastIndexOf(".")))) { + // 如果匹配成功,则保存图片路径 + result = cd_dir_file.path; + // 找到指定图片后就可以结束查找了 + break; + } + } + return result; } } diff --git a/src/main/java/com/hy/java/uct/umlrecog/sddetector/MessageDetector.java b/src/main/java/com/hy/java/uct/umlrecog/sddetector/MessageDetector.java index be6d04f3c76d8b8068d9c47d8fccb784c8d5947b..5f1ab5016551af3c4883dd0bb71aa920f5eb81f3 100644 --- a/src/main/java/com/hy/java/uct/umlrecog/sddetector/MessageDetector.java +++ b/src/main/java/com/hy/java/uct/umlrecog/sddetector/MessageDetector.java @@ -1,5 +1,506 @@ package com.hy.java.uct.umlrecog.sddetector; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.opencv.core.Mat; +import org.opencv.core.MatOfPoint; +import org.opencv.core.MatOfPoint2f; +import org.opencv.core.Point; +import org.opencv.core.Rect; +import org.opencv.core.Scalar; +import org.opencv.core.Size; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.imgproc.Imgproc; + +import com.hy.java.uct.umlrecog.cddetector.ClassRelationDetector; +import com.hy.java.uct.umlrecog.util.ImgProcessor; +import com.hy.java.uct.umlrecog.util.Line; +import com.hy.java.uct.umlrecog.util.PolygonalLine; +import com.hy.java.uct.umlrecog.util.Relation; +import com.hy.java.uct.umlrecog.util.UMLClass; +import com.hy.java.utility.common.Pair; + public class MessageDetector { + private String cd_path = null; + private String temp_res_path = null; + private Pair> classes_in_cd = null; + private Pair> result = null; + + public MessageDetector(String repo_cd_path, Pair> classes) { + this.cd_path = repo_cd_path; + this.temp_res_path = cd_path.replaceAll(cd_path.substring(cd_path.lastIndexOf("\\") + 1, cd_path.lastIndexOf(".")), "temp result"); + this.classes_in_cd = classes; + } + + public void recog() { + // 先检测实线。检测结果需借助类的位置关系、去除“单纯的线” + Set solid_lines = detectLines(classes_in_cd, 0.0504, true); + // 再检测虚线。虚线检测除了间隔设置与实线不同外,其他完全一样 + Set dash_lines = detectLines(classes_in_cd, 0.0504, false); + // 再检测关系类型检测。由于会遍历所有关系,所以顺便将关系存在classes_in_cd中每个UMLClass的关系列表中 + result = detectRelationType(classes_in_cd, solid_lines, dash_lines, 0.0001); + } + + private Set detectLines(Pair> classes_in_cd, double ratio, boolean detect_solid) { + Set result = new HashSet<>(); + if (detect_solid) { + System.out.println("开始识别" + cd_path + "中的实线"); + } else { + System.out.println("开始识别" + cd_path + "中的虚线"); + } + Mat cls_diagram = classes_in_cd.getLeft(); + List UML_classes = classes_in_cd.getRight(); + /* + * 先检测边缘,然后从边缘集中初步检测“原始直线” + */ + List line_segments = new ArrayList<>(); + // 先检测边缘。然后从边缘集中检测直线 + Mat edges = new Mat(); + Imgproc.Canny(cls_diagram, edges, 50, 50 * 3, 3, true); + // 从边缘集中检测直线。膨胀(将线外面的空白区域膨胀)、腐蚀(让线侵蚀周围的空白区域),可以提高识别准确率 + Imgproc.dilate(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1.7, 1.7)), new Point(-1, -1), 2); + Imgproc.erode(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1, 1)), new Point(-1, -1), 1); + Mat lines = new Mat(); + if (detect_solid) { + // 实线检测时,“最小直线长度”与图像像素有关;“最大像素间隔”越小越接近实线 + Imgproc.HoughLinesP(edges, lines, 1, Math.PI / 180, 8, 29, 2); + } else { + Imgproc.HoughLinesP(edges, lines, 1, Math.PI / 180, 8, 29, 10); + } + // 保存所有刚检测出来的“原始直线” + for (int i = 0; i < lines.rows(); i++) { + line_segments.add(new Line(new Point(lines.get(i, 0)[0], lines.get(i, 0)[1]), new Point(lines.get(i, 0)[2], lines.get(i, 0)[3]))); + } + /* + * 检查“原始直线”集中是否有重合直线,去除冗余或合并 + */ + int current_size = line_segments.size(); + for (int i = 0; i < current_size; i++) { + Line line = line_segments.get(i); + // 将line与other_line做比较,标记出所有与当前直线重合、或需合并的直线 + for (int j = i + 1; j < current_size; j++) { + Line other_line = line_segments.get(j); + // 下列情况算为一条线:“两端都接近”;或其中一端接近、且两直线斜率接近;或有一部分重合 + if (line.isCoincideWith(other_line)) { + other_line.should_be_del = true; + } + } + // 去除冗余;合并同斜率 + for (int j = i + 1; j < current_size;) { + Line other_line = line_segments.get(j); + if (other_line.should_be_del) { + // 将两条直线合并为一条长直线,存在line中 + line_segments.set(i, mergeCoincideLines(line, other_line)); + // 去除冗余 + line_segments.remove(other_line); + current_size = line_segments.size(); + } else { + j++; + } + } + } + // 此时可以还有“斜率接近,但端点不接近”的线段,如果这种线段间距很小,则也合并 + current_size = line_segments.size(); + for (int i = 0; i < current_size; i++) { + Line line = line_segments.get(i); + // 将line与other_line做比较 + for (int j = i + 1; j < current_size; j++) { + Line other_line = line_segments.get(j); + if (Line.Knear(line, other_line)) { + if (Line.ptNearPt(line.pt1, other_line.pt1, false, true, other_line) || Line.ptNearPt(line.pt1, other_line.pt2, false, true, other_line) || Line.ptNearPt(line.pt2, other_line.pt1, false, true, other_line) || Line.ptNearPt(line.pt2, other_line.pt2, false, true, other_line)) { + other_line.should_be_del = true; + } + } + } + // 去除冗余;合并同斜率 + for (int j = i + 1; j < current_size;) { + Line other_line = line_segments.get(j); + if (other_line.should_be_del) { + // 将两条直线合并为一条长直线,存在line中 + line_segments.set(i, mergeCoincideLines(line, other_line)); + // 去除冗余 + line_segments.remove(other_line); + current_size = line_segments.size(); + } else { + j++; + } + } + } + /* + * 对所有实线,将可能的“折线”识别并合并。合并为折线的直线,其belonged_polygonal_line会标识其所属折线 + */ + Set poly_lines = mergePolygonalLines(line_segments); + /* + * 利用UML_classes的位置,进一步筛选关系线,存在result中 + * + * 针对每条候选关系线,计算每个类方框的4根边框线,看哪个边框线与当前候选关系线相交(允许一定像素的误差),则认为这条线确实是关系线,且属于相交的类 + */ + for (PolygonalLine pl : poly_lines) { + Relation rela = new Relation(pl); + // 对pl.pt1和pl.pt2,需要将其延直线方向延长一点,检查延长后的端点。 + for (UMLClass uml_class : UML_classes) { + // 检查延长后的端点 + if (uml_class.whole.contains(ImgProcessor.reachPt(pl.pt1, pl.l1, 35, false))) { + // 暂时将与pt1相近的类记为source、与pt2相近的类为target,后面识别出符号来后再调 + rela.source = uml_class; + } + if (uml_class.whole.contains(ImgProcessor.reachPt(pl.pt2, pl.l2, 35, false))) { + // 暂时将与pt1相近的类记为source、与pt2相近的类为target,后面识别出符号来后再调 + rela.target = uml_class; + } + } + if (rela.source != null && rela.target != null) { + result.add(rela); + } + } + /* + * 在图中抹掉关系线,防止其影响后续识别 + */ + if (detect_solid) { + // 实线识别结果 + for (Relation rela : result) { + // 两端坐标 + // System.out.println("(" + rela.poly_line.pt1.x + ", " + rela.poly_line.pt1.y + ")" + "(" + rela.poly_line.pt2.x + ", " + rela.poly_line.pt2.y + ")"); + // 在图中抹掉已识别的线,防止干扰后续识别 + for (Line segment : rela.poly_line.line_list) { + Imgproc.line(cls_diagram, segment.pt1, segment.pt2, new Scalar(255, 255, 255), 5); + } + // 看一眼。看结果相当于把抹掉的线再画出来,会影响后续识别,所以看完就注释掉 + // Imgproc.line(cls_diagram, rela.poly_line.pt1, rela.poly_line.pt2, new Scalar(55, 55, 55), 5); + } + Imgcodecs.imwrite(temp_res_path, cls_diagram); + System.out.println("完成对" + cd_path + "中的实线识别,共" + result.size() + "条"); + } else { + // 虚线识别结果 + for (Relation rela : result) { + // 两端坐标 + // System.out.println("(" + rela.poly_line.pt1.x + ", " + rela.poly_line.pt1.y + ")" + "(" + rela.poly_line.pt2.x + ", " + rela.poly_line.pt2.y + ")"); + // 在图中抹掉已识别的线,防止干扰后续识别 + for (Line segment : rela.poly_line.line_list) { + Imgproc.line(cls_diagram, segment.pt1, segment.pt2, new Scalar(255, 255, 255), 5); + } + // 看一眼。看结果相当于把抹掉的线再画出来,会影响后续识别,所以看完就注释掉 + // Imgproc.line(cls_diagram, rela.poly_line.pt1, rela.poly_line.pt2, new Scalar(55, 55, 55), 5); + } + Imgcodecs.imwrite(temp_res_path, cls_diagram); + System.out.println("完成对" + cd_path + "中的虚线识别,共" + result.size() + "条"); + } + return result; + } + + /** + * 将斜率接近的两条直线合并成“最长”的一条线,并存在line中 + */ + private Line mergeCoincideLines(Line line, Line other_line) { + // 两条线的4个端点共可能有4C2条直线,把最长的一条存在line中即可 + Line l1 = new Line(line.pt1, line.pt2); + Line l2 = new Line(line.pt1, other_line.pt1); + Line l3 = new Line(line.pt1, other_line.pt2); + Line l4 = new Line(line.pt2, other_line.pt1); + Line l5 = new Line(line.pt2, other_line.pt2); + Line l6 = new Line(other_line.pt1, other_line.pt2); + List lines = new ArrayList<>(); + lines.add(l1); + lines.add(l2); + lines.add(l3); + lines.add(l4); + lines.add(l5); + lines.add(l6); + return MessageDetector.longestLine(lines); + } + + /** + * 返回一列直线中最长的直线 + */ + private static Line longestLine(List lines) { + lines.sort(new Comparator() { + @Override + public int compare(Line l1, Line l2) { + return Integer.compare((int) l1.length, (int) l2.length); + } + }); + return lines.get(lines.size() - 1); + } + + /** + * 将可能的“折线”识别并合并 + */ + private Set mergePolygonalLines(List lines) { + Set result = new HashSet<>(); + // 从第一条线开始,检查当前线与每条后面的线是否能合并 + for (int i = 0; i < lines.size(); i++) { + Line line = lines.get(i); + if (line.pt1_in_polyline && line.pt2_in_polyline) { + continue; + } + if (line.belonged_poly == null) { + // 将当前直线作为一条新折线的一段 + line.belonged_poly = new PolygonalLine(line); + result.add(line.belonged_poly); + } + // 检查当前线与每条后面的线是否能合并 + for (int j = i + 1; j < lines.size(); j++) { + Line other_line = lines.get(j); + Pair pt1_other_line = Line.ptNearLine(line.pt1, other_line); + Pair pt2_other_line = Line.ptNearLine(line.pt2, other_line); + if (pt1_other_line.getLeft() == true) { + if (pt1_other_line.getRight() == other_line.pt1) { + if (Line.ptNearPt(line.pt1, other_line.pt1, false, false, other_line)) { + // 在折线的线段list中添加other_line + line.belonged_poly.line_list.add(other_line); + // 更新line和other_line的端点状态 + line.pt1_in_polyline = true; + other_line.pt1_in_polyline = true; + // 顺便给other_line也存上折线 + other_line.belonged_poly = line.belonged_poly; + } + + } else { + if (Line.ptNearPt(line.pt1, other_line.pt2, false, false, other_line)) { + // 在折线的线段list中添加other_line + line.belonged_poly.line_list.add(other_line); + // 更新line和other_line的端点状态 + line.pt1_in_polyline = true; + other_line.pt2_in_polyline = true; + // 顺便给other_line也存上折线 + other_line.belonged_poly = line.belonged_poly; + } + } + + } + // 同上,只是换一个端点 + else if (pt2_other_line.getLeft() == true) { + if (pt2_other_line.getRight() == other_line.pt1) { + if (Line.ptNearPt(line.pt2, other_line.pt1, false, false, other_line)) { + // 在折线的线段list中添加other_line + line.belonged_poly.line_list.add(other_line); + // 更新line和other_line的端点状态 + line.pt2_in_polyline = true; + other_line.pt1_in_polyline = true; + // 顺便给other_line也存上折线 + other_line.belonged_poly = line.belonged_poly; + } + + } else { + if (Line.ptNearPt(line.pt2, other_line.pt2, false, false, other_line)) { + // 在折线的线段list中添加other_line + line.belonged_poly.line_list.add(other_line); + // 更新line和other_line的端点状态 + line.pt2_in_polyline = true; + other_line.pt2_in_polyline = true; + // 顺便给other_line也存上折线 + other_line.belonged_poly = line.belonged_poly; + } + } + } + } + } + // 设置每条折线的两端。同时记录两端的点所在的线段 + for (PolygonalLine pl : result) { + for (Line l : pl.line_list) { + Point pt = l.notInPolyPt(); + if (pt != null) { + // 如果该直线两端都“暂时不在折线中”,说明这是根单独的线。将其两端设置为折线两端即可 + if (pt.x == 23579 && pt.y == 59873572) { + pl.pt1 = l.pt1; + pl.pt2 = l.pt2; + pl.l1 = l; + pl.l2 = l; + // 无需再检查(也没得可检查,因为pl.line_list中只有这一根线) + break; + } else { + if (pl.pt1 == null) { + pl.pt1 = pt; + pl.l1 = l; + } else if (pl.pt2 == null) { + pl.pt2 = pt; + pl.l2 = l; + } + } + } + } + } + return result; + } + + /** + * 识别关系符号,并根据关系符号与关系线端点的位置关系更新关系线的source和target + * + * @param classes_in_cd + * @param dash_lines + * @param solid_lines + * @param dash_lines + * @return + */ + private Pair> detectRelationType(Pair> classes_in_cd, Set solid_lines, Set dash_lines, double ratio) { + System.out.println("开始识别" + cd_path + "中所有关系符号"); + Mat cls_diagram = classes_in_cd.getLeft(); + Mat origin_cls_diagram = Imgcodecs.imread(cd_path, Imgcodecs.IMREAD_GRAYSCALE); + /* + * 识别图中所有“轮廓”并存在contours中 + */ + // 先腐蚀(让符号像素侵蚀周围空间,得以连接起来)、后膨胀(让点周围的空白区域膨胀,从而去掉孤立的噪点),提高识别准确率 + Imgproc.erode(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3.5, 3.5)), new Point(-1, -1), 4); + Imgproc.dilate(cls_diagram, cls_diagram, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1.7, 1.7)), new Point(-1, -1), 5); + List contours = new ArrayList<>(); + Imgproc.findContours(cls_diagram, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); + /* + * 对每个轮廓contour,检测是否为某种关系符号 + */ + // 根据图片像素计算轮廓面积阈值。如果轮廓面积太小或太大,则直接忽略 + long cd_area = cls_diagram.width() * cls_diagram.height(); + double min_area = cd_area * ratio; + double max_area = cd_area * 0.5; + for (MatOfPoint contour : contours) { + // 如果轮廓面积太小或太大,则直接忽略 + double contour_area = Imgproc.contourArea(contour); + if (contour_area < min_area || contour_area > max_area) { + // 将噪点直接涂白 + Imgproc.fillConvexPoly(cls_diagram, contour, new Scalar(255, 255, 255)); + continue; + } + // 如果轮廓面积合适,则用矩形将其包裹,并裁剪原图中的矩形区域、重新识别符号 + MatOfPoint2f curve = new MatOfPoint2f(contour.toArray()); + MatOfPoint2f approx_curve = new MatOfPoint2f(); + Imgproc.approxPolyDP(curve, approx_curve, 0.01 * Imgproc.arcLength(curve, true), true); + // 获取包络矩形 + Rect rect_containing_rela_type = Imgproc.boundingRect(new MatOfPoint(approx_curve.toArray())); + // 如果包络矩形包含某一条关系线的某一端,则检测该矩形内的符号 + // 对于虚线,由于只可能是实现或生命线,所以只需要知道是否包含即可,不用检测矩形内部图形。belong_to_dash_rela为true表示当前关系符号属于某条虚线关系线 + boolean belong_to_dash_rela = false; + for (Relation dash_rela : dash_lines) { + // 如果dash_rela.source_pt_index != -1,则说明该dash_rela已经与某一包络矩形检测并对应上了,则跳过 + if (dash_rela.source_pt_index != -1) { + continue; + } + // 对线的每个端点,检查其本身、延伸、反向延伸这三个点是否被包含于包络矩形中。如果包含,则belong_to_dash_line = true; + if (rect_containing_rela_type.contains(dash_rela.poly_line.pt1) || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt1, dash_rela.poly_line.l1, 5, false)) + || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt1, dash_rela.poly_line.l1, 5, true))) { + // 包络矩形确实包含关系线的端点pt1 + belong_to_dash_rela = true; + // 重新设置虚线的source和target。由于目前虚线的端点pt1必须对应target类,所以如果虚线的原source类对应了pt1,则需对换source和target + if (dash_rela.source.whole.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt1, dash_rela.poly_line.l1, 35, false))) { + UMLClass temp = dash_rela.source; + dash_rela.source = dash_rela.target; + dash_rela.target = temp; + } + dash_rela.source_pt_index = 2; + dash_rela.type = "实现"; + // 记录当前关系到UMLClass里 + dash_rela.source.out_relas.add(dash_rela); + dash_rela.target.in_relas.add(dash_rela); + break; + } else if (rect_containing_rela_type.contains(dash_rela.poly_line.pt2) || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt2, dash_rela.poly_line.l2, 5, false)) + || rect_containing_rela_type.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt2, dash_rela.poly_line.l2, 5, true))) { + belong_to_dash_rela = true; + // 重新设置虚线的source和target。由于目前虚线的端点pt2必须对应target类,所以如果虚线的原source类对应了pt2,则需对换source和target + if (dash_rela.source.whole.contains(ImgProcessor.reachPt(dash_rela.poly_line.pt2, dash_rela.poly_line.l2, 35, false))) { + UMLClass temp = dash_rela.source; + dash_rela.source = dash_rela.target; + dash_rela.target = temp; + } + dash_rela.source_pt_index = 1; + dash_rela.type = "实现"; + // 记录当前关系到UMLClass里 + dash_rela.source.out_relas.add(dash_rela); + dash_rela.target.in_relas.add(dash_rela); + break; + } + } + // 如果该关系符号不属于任何虚线关系,则检测其是否属于某实线关系 + if (!belong_to_dash_rela) { + // 对于实线,需检测矩形内部图形,识别继承、聚合 + for (Relation solid_rela : solid_lines) { + // 如果solid_line.source_pt_index != -1,则说明该solid_line已经与某一包络矩形检测并对应上了,则跳过 + if (solid_rela.source_pt_index != -1) { + continue; + } + // 记录该包络矩形是否属于当前实线关系线solid_rela。如果是的话,再去检测矩形内图形 + boolean belong_to_this_solid_rela = false; + // 方法同对dash_rela的检测。对线的每个端点,检查其本身、延伸、反向延伸这三个点是否被包含于包络矩形中。 + if (rect_containing_rela_type.contains(solid_rela.poly_line.pt1) || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt1, solid_rela.poly_line.l1, 5, false)) + || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt1, solid_rela.poly_line.l1, 5, true))) { + // 包络矩形确实包含关系线的端点pt1 + belong_to_this_solid_rela = true; + // 重新设置虚线的source和target。由于目前虚线的端点pt1必须对应target类,所以如果虚线的原source类对应了pt1,则需对换source和target + if (solid_rela.source.whole.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt1, solid_rela.poly_line.l1, 35, false))) { + UMLClass temp = solid_rela.source; + solid_rela.source = solid_rela.target; + solid_rela.target = temp; + } + solid_rela.source_pt_index = 2; + } else if (rect_containing_rela_type.contains(solid_rela.poly_line.pt2) || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt2, solid_rela.poly_line.l2, 5, false)) + || rect_containing_rela_type.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt2, solid_rela.poly_line.l2, 5, true))) { + belong_to_this_solid_rela = true; + // 重新设置虚线的source和target。由于目前虚线的端点pt2必须对应target类,所以如果虚线的原source类对应了pt2,则需对换source和target + if (solid_rela.source.whole.contains(ImgProcessor.reachPt(solid_rela.poly_line.pt2, solid_rela.poly_line.l2, 35, false))) { + UMLClass temp = solid_rela.source; + solid_rela.source = solid_rela.target; + solid_rela.target = temp; + } + solid_rela.source_pt_index = 1; + } + // 目前该关系符号确实属于当前实线关系solid_rela,所以需检查其是聚合还是继承 + if (belong_to_this_solid_rela) { + String type = "聚合"; + // 裁剪原图中包含关系符号的矩形区域 + Mat cutted_origin_rela_type_area = ImgProcessor.cutImage(origin_cls_diagram, rect_containing_rela_type); + Imgproc.erode(cutted_origin_rela_type_area, cutted_origin_rela_type_area, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(2, 2)), new Point(-1, -1), 2); + Imgproc.dilate(cutted_origin_rela_type_area, cutted_origin_rela_type_area, Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1, 1)), new Point(-1, -1), 2); + // 针对包含关系符号的区域,重新识别各种“轮廓”,从而发现真正的关系符号 + List origin_contours = new ArrayList<>(); + Imgproc.findContours(cutted_origin_rela_type_area, origin_contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); + // 针对识别出来的各种“轮廓”做筛选 + for (MatOfPoint origin_contour : origin_contours) { + MatOfPoint2f origin_curve = new MatOfPoint2f(origin_contour.toArray()); + MatOfPoint2f origin_approx_curve = new MatOfPoint2f(); + // 对“轮廓”做二次逼近,获取关系符号形状。第一次逼近可以误差大一点,得到大致形状;第二次逼近误差要小一点,得到精确形状 + Imgproc.approxPolyDP(origin_curve, origin_approx_curve, 0.05 * Imgproc.arcLength(origin_curve, false), true); + Imgproc.approxPolyDP(origin_approx_curve, origin_approx_curve, 0.01 * Imgproc.arcLength(origin_approx_curve, true), true); + // 如果逼近得到三角形,则基本就是继承了,再检查一下位置即可(其实检不检查作用不大) + if (origin_approx_curve.toArray().length == 3) { + // 获取三角形的包络矩形 + Rect rect_possible_containing_ext = Imgproc.boundingRect(new MatOfPoint(origin_approx_curve.toArray())); + // 检查包络矩形rect_possible_containing_ext是否真的包含当前关系线solid_rela的target端点的延长线。如果是的话,则确定是继承关系 + // 先找到关系线solid_rela的target端点和对应线段。然后检查target端点的延长与包络矩形的关系 + Point target_point = solid_rela.poly_line.pt1; + Line target_line = solid_rela.poly_line.l1; + if (solid_rela.source_pt_index == 1) { + // 如果source端点是1,则target端点是pt2 + target_point = solid_rela.poly_line.pt2; + target_line = solid_rela.poly_line.l2; + } + // 如果rect_possible_containing_ext包含当前关系线solid_rela的target端点或延长,则就是继承关系了 + if (rect_possible_containing_ext.contains(target_point) || rect_possible_containing_ext.contains(ImgProcessor.reachPt(target_point, target_line, 5, false)) || rect_possible_containing_ext.contains(ImgProcessor.reachPt(target_point, target_line, 5, true))) { + type = "继承"; + } + /* + * 看一下实线符号识别情况。可注释掉 + */ + // Imgproc.rectangle(cutted_origin_rela_type_area, rect_possible_containing_ext, new Scalar(5, 5, 5), 3); + // Imgcodecs.imwrite(temp_res_path, cutted_origin_rela_type_area); + } + } + solid_rela.type = type; + // 记录当前关系到result里 + solid_rela.source.out_relas.add(solid_rela); + solid_rela.target.in_relas.add(solid_rela); + // 检测完符号类别后,对当前区域就算检测完了,不用再检测其他实线了 + break; + } + } + // 如果运行到这儿(检测完符号类别后break会直接到这儿),说明对当前区域的包络矩形与实线之间的检测完毕。相当于对当前区域检测完毕,开始检测下一个区域 + } + } + System.out.println("完成对" + cd_path + "中识别关系符号的识别。"); + return classes_in_cd; + } + public Pair> getResult() { + return result; + } } diff --git a/src/main/java/com/hy/java/uct/umlrecog/sddetector/ObjectDetector.java b/src/main/java/com/hy/java/uct/umlrecog/sddetector/ObjectDetector.java index c6f56f2592d700d019081ce7cf7193a651866be7..56274be3aefd50ba30746df2c363b223fad1f107 100644 --- a/src/main/java/com/hy/java/uct/umlrecog/sddetector/ObjectDetector.java +++ b/src/main/java/com/hy/java/uct/umlrecog/sddetector/ObjectDetector.java @@ -1,5 +1,260 @@ package com.hy.java.uct.umlrecog.sddetector; +import java.io.File; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import org.opencv.core.Mat; +import org.opencv.core.MatOfPoint; +import org.opencv.core.MatOfPoint2f; +import org.opencv.core.Scalar; +import org.opencv.imgcodecs.Imgcodecs; +import org.opencv.imgproc.Imgproc; + +import com.hy.java.uct.umlrecog.UMLDiagramRecognizer; +import com.hy.java.uct.umlrecog.util.ImgProcessor; +import com.hy.java.uct.umlrecog.util.Rectangle; +import com.hy.java.uct.umlrecog.util.UMLClass; +import com.hy.java.utility.common.Pair; + +import net.sourceforge.tess4j.ITesseract; +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; + public class ObjectDetector { + private String cd_path = null; + private String temp_res_path = null; + private Pair> result = null; + + public ObjectDetector(String repo_cd_path) { + this.cd_path = repo_cd_path; + this.temp_res_path = cd_path.replaceAll(cd_path.substring(cd_path.lastIndexOf("\\") + 1, cd_path.lastIndexOf(".")), "temp result"); + } + + public void recog() { + /* + * 预处理 + */ + // 读取图片。并灰度处理 + Mat mat = Imgcodecs.imread(cd_path, Imgcodecs.IMREAD_GRAYSCALE); + // 高斯锐化,提升类图图形清晰度 + // Imgproc.Laplacian(mat, mat, 2); + // 二值化,用于后续处理 + Imgproc.threshold(mat, mat, 160, 255, Imgproc.THRESH_BINARY); + /* + * 识别类 + */ + // 矩形检测。识别所有矩形区域 + Pair> all_rect_areas_in_cd = detectRectArea(mat, 0.000555); + // 对矩形进行整合,形成类区域 + List classes = mergeIntoClass(all_rect_areas_in_cd); + // 对类区域进行文字检测 + result = Pair.createPair(all_rect_areas_in_cd.getLeft(), detectText(Imgcodecs.imread(cd_path, Imgcodecs.IMREAD_GRAYSCALE), classes)); + } + + /** + * 检测方框 + * + * @param cls_diagram + * @param ratio 检测最小矩形占全图面积的比例(0~1之间的一个小数)。如果类图中类的面积很小,则该比例应设的很小。 + * @return + */ + private Pair> detectRectArea(Mat cls_diagram, double ratio) { + System.out.println("开始识别" + cd_path + "中所有矩形"); + /* + * 识别图中所有“轮廓”并存在contours中 + */ + List contours = new ArrayList<>(); + Imgproc.findContours(cls_diagram, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE); + /* + * 对每个轮廓contour,检测是否为矩形,并将contour和检测结果存在Rectangle中。所有轮廓的检测结果最终存在all_rect_areas中 + */ + List all_rect_areas = new ArrayList<>(); + // 这个rect_contours里只存那些检测结果为矩形的contour,用于后面边框涂白,防止矩形干扰关系符号和关系线识别 + List rect_contours = new ArrayList<>(); + // 根据图片像素计算轮廓面积阈值。如果轮廓面积太小或太大,则直接忽略 + long cd_area = cls_diagram.width() * cls_diagram.height(); + double min_cls_area = 60; + double max_cls_area = cd_area * 0.5; + for (MatOfPoint contour : contours) { + // 如果轮廓面积太小或太大,则直接忽略 + double contour_area = Imgproc.contourArea(contour); + if (contour_area < min_cls_area || contour_area > max_cls_area) { + continue; + } + // 如果轮廓面积合适,则检测是否为矩形。采用多边形逼近法,将轮廓转化为curve向多边形做逼近,并将逼近结果存在approx_curve中 + MatOfPoint2f curve = new MatOfPoint2f(contour.toArray()); + MatOfPoint2f approx_curve = new MatOfPoint2f(); + Imgproc.approxPolyDP(curve, approx_curve, 0.01 * Imgproc.arcLength(curve, true), true); + // 针对逼近结果approx_curve,若其共有4个顶点,则可认为是矩形。将检测结果绘制在原图中,便于后续处理 + if (approx_curve.toArray().length == 4) { + // 将矩形存在all_rect_areas中用于类的整合和文字识别。存完后将其从原图中抹掉,防止其干扰关系符号和关系线识别 + all_rect_areas.add(new Rectangle(cls_diagram.clone(), contour, approx_curve)); + rect_contours.add(contour); + // 存完后将矩形从图中抹掉(涂白)。后面还需对所有边框进行涂白 + Imgproc.fillConvexPoly(cls_diagram, contour, new Scalar(255, 255, 255)); + } + } + // 对所有矩形边框进行涂白,防止其干扰关系符号和关系线识别 + Imgproc.drawContours(cls_diagram, rect_contours, -1, new Scalar(255, 255, 255), 5); + // 此时的cls_diagram中矩形已涂白 + return Pair.createPair(cls_diagram, all_rect_areas); + } + + private List mergeIntoClass(Pair> all_rect_areas_in_cd) { + List result = new ArrayList<>(); + // Mat cls_diagram = all_rect_areas_in_cd.getLeft(); + // 获取所有矩形 + List rect_area_list = all_rect_areas_in_cd.getRight(); + /* + * 拼接思路: + * + * 首先针对当前矩形,将其赋给一个类。记录该类当前拥有的矩形列表。 + * + * 然后针对该矩形的左上角,看是否与其他矩形的左上角横坐标相差不超过3~5个像素 + * + * 如果是,则再看两个左上角的纵坐标距离是否大致等于“上面矩形”的高度 + * + * 如果是,则纳入同一个类中 + */ + for (int all_rect_index = 0; all_rect_index < rect_area_list.size(); all_rect_index++) { + Rectangle current_rect = rect_area_list.get(all_rect_index); + // 如果当前rect已经属于某个类,则跳过它 + if (current_rect.within_class) { + continue; + } + // 如果当前rect不属于任何类,则将其赋给一个新的类 + UMLClass uml_class = new UMLClass(); + uml_class.list.add(current_rect); + current_rect.within_class = true; + // 这个temp_rect用于每次拼接之后更新类的总大小。实际每次都是用temp_rect与其他未拼接的类作比较 + uml_class.temp_rect = current_rect.clone(); + // 对当前类的temp_rect,与all_rect_areas中其他所有矩形做比较 + for (int j = all_rect_index + 1; j < rect_area_list.size(); j++) { + // 获取all_rect_areas列表中位于current_rect后面的矩形 + Rectangle other_rect = rect_area_list.get(j); + // 针对该矩形的左上角,看是否与其他矩形的左上角横坐标相差不超过3~5个像素 + if (Math.abs(other_rect.tl().x - uml_class.temp_rect.tl().x) <= 5) { + // 如果是,则再看两个左上角的纵坐标距离是否大致等于“上面矩形”的高度 + // “上面矩形”是temp_rect + if (uml_class.temp_rect.tl().y < other_rect.tl().y) { + if (other_rect.tl().y - uml_class.temp_rect.tl().y - uml_class.temp_rect.height <= 5) { + uml_class.list.add(other_rect); + other_rect.within_class = true; + // 更新类的temp_rect + uml_class.temp_rect.height += other_rect.height; + } + } + // “上面矩形”是other_rect + else { + if (uml_class.temp_rect.tl().y - other_rect.tl().y - other_rect.height <= 5) { + uml_class.list.add(other_rect); + other_rect.within_class = true; + // 更新类的temp_rect + uml_class.temp_rect.x = other_rect.x; + uml_class.temp_rect.y = other_rect.y; + uml_class.temp_rect.height += other_rect.height; + } + } + } + } + // 将列表里的矩形按上、中、下排序 + uml_class.list.sort(new Comparator() { + @Override + public int compare(Rectangle r1, Rectangle r2) { + return Integer.valueOf(r1.y).compareTo(r2.y); + } + }); + for (int j = 0; j < uml_class.list.size(); j++) { + Rectangle r_in_l = uml_class.list.get(j); + if (r_in_l != null) { + switch (j) { + case 0: { + uml_class.top = r_in_l; + break; + } + case 1: { + uml_class.mid = r_in_l; + break; + } + case 2: { + uml_class.bottom = r_in_l; + break; + } + default: + break; + } + } + } + /* + * 此时会出现top其实是整个矩形的情况。需将top、mid、bottom重新分割 + */ + if (uml_class.mid != null) { + if (uml_class.bottom != null) { + if (uml_class.top.y + uml_class.top.height >= uml_class.bottom.y + uml_class.bottom.height) { + int top_old_height = uml_class.top.height; + uml_class.top = uml_class.mid.clone(); + uml_class.mid = uml_class.bottom.clone(); + uml_class.bottom.y = uml_class.top.y + uml_class.top.height + uml_class.mid.height; + uml_class.bottom.height = top_old_height - uml_class.top.height - uml_class.mid.height; + } + } else { + if (uml_class.top.y + uml_class.top.height >= uml_class.mid.y + uml_class.mid.height) { + int top_old_height = uml_class.top.height; + uml_class.top = uml_class.mid.clone(); + uml_class.mid.y = uml_class.top.y + uml_class.top.height; + uml_class.mid.height = top_old_height - uml_class.top.height; + } + } + } + // 计算类框整体大小 + int whole_height = 0; + // 先将whole的高度设置为top的高度 + whole_height += uml_class.top.height; + if (uml_class.mid != null) { + whole_height += uml_class.mid.height; + } + if (uml_class.bottom != null) { + whole_height += uml_class.bottom.height; + } + uml_class.whole = new Rectangle(null, null, uml_class.top.x, uml_class.top.y, uml_class.top.width, whole_height); + // 针对当前矩形,与所有其他矩形对比完毕后,则完成了该类的识别 + result.add(uml_class); + } + System.out.println("完成将" + cd_path + "中所有矩形整合为类的业务,共" + result.size() + "个类"); + return result; + } + + private List detectText(Mat cls_diagram, List classes) { + System.out.println("开始识别" + cd_path + "中每个类的文字"); + for (UMLClass uc : classes) { + ITesseract instance = new Tesseract(); + instance.setDatapath(UMLDiagramRecognizer.tessdata_path); + try { + // 将uc中的每个区域写入临时文件,然后识别临时文件中的文字 + if (uc.top != null) { + Imgcodecs.imwrite(temp_res_path, ImgProcessor.cutImage(cls_diagram, uc.top)); + uc.setTitle(instance.doOCR(new File(temp_res_path))); + } + if (uc.mid != null) { + Imgcodecs.imwrite(temp_res_path, ImgProcessor.cutImage(cls_diagram, uc.mid)); + uc.setAttrisStr(instance.doOCR(new File(temp_res_path))); + } + if (uc.bottom != null) { + Imgcodecs.imwrite(temp_res_path, ImgProcessor.cutImage(cls_diagram, uc.bottom)); + uc.setMethodsStr(instance.doOCR(new File(temp_res_path))); + } + } catch (TesseractException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + System.out.println("完成对" + cd_path + "中每个类的文字识别"); + return classes; + } + public Pair> getResult() { + return result; + } }