diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/dot/mod.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/dot/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..7be2148ecacea3ecae506b66e68290ee6ec51f2f --- /dev/null +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/dot/mod.rs @@ -0,0 +1,273 @@ +// Copyright (c) 2025, Huawei Technologies Co., Ltd. +// All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +use std::{ + fs::File, + io::{BufReader, Read}, +}; + +use ahash::{HashMap, HashMapExt}; +use anyhow::{Result, anyhow}; +use graphviz_rust::dot_structures::{ + Attribute, Edge as DotEdge, EdgeTy, Graph, GraphAttributes, Id, Node as DotNode, Stmt, + Subgraph as DotSubgraph, Vertex, +}; +use smartstring::alias::String; + +use crate::{AttrValue, Model, Node, StdString, Subgraph}; + +pub fn read_dot(path: &str) -> Result { + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut content = StdString::new(); + reader.read_to_string(&mut content)?; + + Ok(content) +} + +struct DotParsingContext { + nodes: HashMap, + edges: Vec<(String, String)>, + graph_attributes: HashMap, + subgraphs: HashMap, + nesting_map: HashMap, + current_subgraph: Option, +} + +impl DotParsingContext { + fn new() -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + graph_attributes: HashMap::new(), + subgraphs: HashMap::new(), + nesting_map: HashMap::new(), + current_subgraph: None, + } + } + + fn into_model(self, name: String) -> Model { + Model { + name, + nodes: self.nodes, + edges: self.edges, + parameters: self.graph_attributes, + subgraphes: self.subgraphs, + nesting_map: self.nesting_map, + } + } + + fn process_statement(&mut self, stmt: &Stmt) -> Result<()> { + match stmt { + Stmt::Subgraph(sg) => self.process_subgraph(sg)?, + Stmt::Node(n) => self.process_node(n)?, + Stmt::Edge(e) => self.process_edge(e)?, + Stmt::GAttribute(ga) => self.process_graph_attribute(ga)?, + Stmt::Attribute(a) => self.process_attribute(a)?, + } + + Ok(()) + } + + fn process_subgraph(&mut self, sg: &DotSubgraph) -> Result<()> { + let name = plain_id(&sg.id)?; + + self.subgraphs + .entry(name.clone()) + .or_insert_with(|| Subgraph { name: name.clone() }); + + let prev_parent = self.current_subgraph.take(); + self.current_subgraph = Some(name); + + for stmt in &sg.stmts { + self.process_statement(stmt)?; + } + + self.current_subgraph = prev_parent; + + Ok(()) + } + + fn process_node(&mut self, n: &DotNode) -> Result<()> { + let node_id = plain_id(&n.id.0)?; + + let mut op_type = node_id.clone(); + let mut attributes = HashMap::new(); + + for attr in &n.attributes { + let key = plain_id(&attr.0)?; + if &key == "pos" { + return Ok(()); + } + + let value = plain_id(&attr.1)?; + + if key == "label" { + op_type = value.clone(); + } + + attributes.insert(key, AttrValue::StringLike(value)); + } + + let node = Node { + name: node_id.clone(), + opType: op_type, + input: Vec::new(), + output: Vec::new(), + attributes, + tensors: Vec::new(), + dynamic: false, + }; + + self.nodes.insert(node_id.clone(), node); + + if let Some(subgraph) = &self.current_subgraph { + self.nesting_map.insert(node_id, subgraph.clone()); + } + + Ok(()) + } + + fn ensure_node_exists(&mut self, node_id: String) { + if !self.nodes.contains_key(&node_id) { + let node = Node { + name: node_id.clone(), + opType: node_id.clone(), + input: Vec::new(), + output: Vec::new(), + attributes: HashMap::new(), + tensors: Vec::new(), + dynamic: false, + }; + self.nodes.insert(node_id.clone(), node); + + if let Some(subgraph) = &self.current_subgraph { + self.nesting_map.insert(node_id, subgraph.clone()); + } + } + } + + fn process_edge(&mut self, e: &DotEdge) -> Result<()> { + match &e.ty { + EdgeTy::Pair(source, target) => match (source, target) { + (Vertex::N(src), Vertex::N(tgt)) => { + let src_id = plain_id(&src.0)?; + let tgt_id = plain_id(&tgt.0)?; + + self.ensure_node_exists(src_id.clone()); + self.ensure_node_exists(tgt_id.clone()); + + self.edges.push((src_id, tgt_id)); + + Ok(()) + } + _ => Err(anyhow!("Subgraph edges are not supported")), + }, + EdgeTy::Chain(vertices) => { + for i in 0..vertices.len() - 1 { + let src = &vertices[i]; + let tgt = &vertices[i + 1]; + + if let (Vertex::N(src), Vertex::N(tgt)) = (src, tgt) { + let src_id = plain_id(&src.0)?; + let tgt_id = plain_id(&tgt.0)?; + + self.ensure_node_exists(src_id.clone()); + self.ensure_node_exists(tgt_id.clone()); + + self.edges.push((src_id, tgt_id)); + } + } + + Ok(()) + } + } + } + + fn process_attribute(&mut self, attr: &Attribute) -> Result<()> { + if self.current_subgraph.is_none() { + let key = plain_id(&attr.0)?; + if &key == "pos" { + return Ok(()); + } + + let value = plain_id(&attr.1)?; + self.graph_attributes.insert(key, value); + } + Ok(()) + } + + fn process_graph_attribute(&mut self, ga: &GraphAttributes) -> Result<()> { + if self.current_subgraph.is_none() { + let attributes = match ga { + GraphAttributes::Graph(attrs) => attrs, + _ => return Ok(()), + }; + + for attr in attributes { + let key = plain_id(&attr.0)?; + if &key == "pos" || &key == "bb" || &key == "size" { + continue; + } + + let value = plain_id(&attr.1)?; + self.graph_attributes.insert(key, value); + } + } + + Ok(()) + } + + fn populate_node_connections(&mut self) { + for (src, tgt) in &self.edges { + if let Some(src_node) = self.nodes.get_mut(src) { + src_node.output.push(tgt.clone()); + } + if let Some(tgt_node) = self.nodes.get_mut(tgt) { + tgt_node.input.push(src.clone()); + } + } + } +} + +fn plain_id(id: &Id) -> Result { + match id { + Id::Plain(p) => Ok(String::from(p)), + Id::Escaped(e) => Ok(String::new()), + _ => Err(anyhow!(format!("Only Plaint Text id is supported, actual: {id}"))), + } +} + +pub fn parse_dot(raw: &str) -> Result { + let graph = graphviz_rust::parse(raw).map_err(|e| anyhow!(e))?; + + match graph { + Graph::Graph { .. } => Err(anyhow!("Undirected graphs are not supported")), + Graph::DiGraph { id, stmts, .. } => { + let mut ctx = DotParsingContext::new(); + + for stmt in &stmts { + ctx.process_statement(stmt)?; + } + + ctx.populate_node_connections(); + + let name = plain_id(&id)?; + + Ok(ctx.into_model(name)) + } + } +} diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/lib.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/lib.rs index 9061b18a66d049978b1f67cfb3e4ba2eed3a55b4..aa6da760c41516f064a89fea969f2d60888b821a 100644 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/lib.rs +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/parser/src/lib.rs @@ -27,6 +27,9 @@ use processors::*; mod pbtxt; use pbtxt::*; +mod dot; +use dot::*; + mod dispatch; use dispatch::{FileType, FileType::*}; @@ -36,13 +39,16 @@ pub use str_ext::StrExt; pub mod string_ext; pub use string_ext::SmartStringExt; -mod reader; - pub type StdString = String; pub fn parse_bin(path: &str) -> Result { if path.ends_with(".pbtxt") { return parse_onnx_pbtxt(path) } + if path.ends_with(".dot") { + let content = read_dot(path)?; + return parse_dot(&content) + } + match FileType::from(path) { ONNX => parse_onnx(path), MindIR => parse_mindir(path), diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/gspan.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/gspan.rs index 34097fea44c648add8f717f909dd6f8031f6131a..7108b5c1f7268902911a3fd57eeefb1de8938e2b 100644 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/gspan.rs +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/gspan.rs @@ -45,11 +45,8 @@ impl GSpan { GSpan { trans: graphs, min_sup, inner_min_sup, max_pat_min, max_pat_max, directed } } - pub fn run( + pub fn run( &self, - out_type: OutType, // 输出类型 - out_source: Option>, // 输出源 - mut process: Option, // 过程数据输出位置 ) -> (usize, MaxDFSCodeGraphResult) { // 0. Prepare the Result let mut result = MaxDFSCodeGraphResult::default(); @@ -58,17 +55,8 @@ impl GSpan { self.inner_min_sup, self.max_pat_min, self.max_pat_max, - out_type, ); - if let Some(out_source) = out_source { - match out_source { - OutSource::Path(path) => - result.set_stream(BufWriter::new(File::create(path.to_string()).unwrap())), - OutSource::Stream(stream) => result.set_stream(stream), - OutSource::Channel(sender) => result.set_channel(true, Some(sender)), - } - } - + // 1. Find single node frequent subgraph, if requested let mut single_vertex_graph_map: BTreeMap< usize, @@ -89,7 +77,6 @@ impl GSpan { &mut single_vertex_graph_map, &mut single_vertex_label_frequent_map, &mut next_gid, - &mut process, ); // 3. Subgraphs > Vertices @@ -128,7 +115,6 @@ impl GSpan { to_label_value, &mut dfs_code, &mut next_gid, - &mut process, &mut result, ); dfs_code.pop_with_set_result(to_label_value, &mut result); @@ -163,12 +149,11 @@ impl GSpan { } } - fn print_frequent_single_vertex( + fn print_frequent_single_vertex( &self, single_vertex_graph_map: &BTreeMap, usize)>>, single_vertex_label_frequent_map: &BTreeMap, next_gid: &mut usize, - process: &mut Option, ) { for (frequent_label, sup) in single_vertex_label_frequent_map.iter() { // 判断图之间的支持度 @@ -194,38 +179,19 @@ impl GSpan { if max < self.inner_min_sup { continue; } - - if let Some(process) = process { - let gid = *next_gid; - - let mut g = Graph::new(gid, self.directed); - g.insert_vertex("result_0", frequent_label); - - let result = mapped.iter().map(|v| v.clone()).reduce(|mut acc, cur| { - acc.0.extend(cur.0); // 扩展集合 - acc.1 += cur.1; // 累加计数 - acc - }); - - if let Some((set, count)) = result { - self.report_single(process, &mut g, *sup, min, max, count, set); - } else { - println!("The map is empty or no matching labels.") - } - } + *next_gid += 1; } } - fn sub_mining( + fn sub_mining( &self, projected: &Projected, dfs_code: &mut DFSCode, next_gid: &mut usize, - process: &mut Option, result: &mut MaxDFSCodeGraphResult, ) { - if self.should_stop_mining(projected, dfs_code, next_gid, process) { + if self.should_stop_mining(projected, dfs_code, next_gid) { return; } @@ -251,7 +217,7 @@ impl GSpan { e_label_key.clone(), Vertex::NIL_V_LABEL.into(), ); - self.sub_mining(e_label_value, dfs_code, next_gid, process, result); + self.sub_mining(e_label_value, dfs_code, next_gid, result); dfs_code.pop_with_set_result(e_label_value, result); } } @@ -266,7 +232,7 @@ impl GSpan { e_label_key.clone(), to_label_key.clone(), ); - self.sub_mining(to_label_value, dfs_code, next_gid, process, result); + self.sub_mining(to_label_value, dfs_code, next_gid, result); dfs_code.pop_with_set_result(to_label_value, result); } } @@ -355,12 +321,11 @@ impl GSpan { (new_fwd_root, new_bck_root) } - fn should_stop_mining( + fn should_stop_mining( &self, projected: &Projected, dfs_code: &mut DFSCode, next_gid: &mut usize, - process: &mut Option, ) -> bool { // Check if the pattern is frequent enough, between graphs let sup: usize = support(projected); @@ -386,20 +351,7 @@ impl GSpan { if self.max_pat_max >= self.max_pat_min && dfs_code.count_node() > self.max_pat_max { return true; } - - // Output the frequent substructures - if let Some(process) = process { - let gid = next_gid.clone(); - self.report( - sup, - min_inner_sup, - max_inner_sup, - projected.projections.len(), - dfs_code, - gid, - process, - ); - } + *next_gid += 1; false @@ -414,7 +366,7 @@ impl GSpan { return true; } - let graph_is_min = dfs_code.to_graph(0, self.directed); + let graph_is_min = dfs_code.to_graph(0); let mut dfs_code_is_min = DFSCode::new(); @@ -661,61 +613,3 @@ impl GSpan { (e_label_map_entry.0, to_label_map_entry.0, to_label_map_value) } } - -impl GSpan { - fn report_single( - &self, - out: &mut W, - g: &mut Graph, - sup: usize, - min_inner_sup: usize, - max_inner_sup: usize, - total: usize, - names: FxHashSet, - ) { - if self.max_pat_max >= self.max_pat_min && g.vertices.len() >= self.max_pat_max { - return; - } - if self.max_pat_min > 0 && g.vertices.len() < self.max_pat_min { - return; - } - out.write( - &*g.to_str_repr(Some((sup, min_inner_sup, max_inner_sup, total))) - .to_string() - .into_bytes(), - ) - .unwrap(); - out.write(b"\n").unwrap(); - let vec: Vec = names.into_iter().collect(); - out.write(&vec.join(",").into_bytes()).unwrap(); - out.write(b"\n").unwrap(); - } - - fn report( - &self, - sup: usize, - min_inner_sup: usize, - max_inner_sup: usize, - total: usize, - dfs_code: &DFSCode, - gid: usize, - out: &mut W, - ) { - if self.max_pat_max >= self.max_pat_min && dfs_code.count_node() > self.max_pat_max { - return; - } - if self.max_pat_min > 0 && dfs_code.count_node() < self.max_pat_min { - return; - } - - let g = dfs_code.to_graph(gid, self.directed); - out.write(b"-------\n").unwrap(); - out.write( - &*g.to_str_repr(Some((sup, min_inner_sup, max_inner_sup, total))) - .to_string() - .into_bytes(), - ) - .unwrap(); - out.write(b"\n").unwrap(); - } -} diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/dfs_code.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/dfs_code.rs index 31fc79b004b057ef8d634b1d8d6e5bcc58076fa8..8f906a0409c5b919b539244d922e20cf3eb4080b 100644 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/dfs_code.rs +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/dfs_code.rs @@ -56,8 +56,8 @@ impl DFSCode { self.dfs_vec.get(index).unwrap() } - pub fn to_graph(&self, graph_id: usize, directed: bool) -> Graph { - let mut g = Graph::new(graph_id, directed); + pub fn to_graph(&self, graph_id: usize) -> Graph { + let mut g = Graph::new(graph_id); let mut edge_data = Vec::<(String, String, Option)>::with_capacity(8); for it in &self.dfs_vec { let from_name = String::from(format!("{}", it.from)); diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/graph.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/graph.rs index fad96f3e86fc164792f227fcda6a262a1b61be71..62fd7fc39acbeceb8757bee1775d6def59e21237 100644 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/graph.rs +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/models/graph.rs @@ -27,18 +27,16 @@ pub struct Graph { pub id: usize, pub name: String, pub edge_size: usize, - pub directed: bool, pub vertices: Vec, pub vertex_name_label_map: HashMap, } impl Graph { - pub fn new(id: usize, directed: bool) -> Graph { + pub fn new(id: usize) -> Graph { Graph { id, name: String::new(), edge_size: 0, - directed, vertices: Vec::with_capacity(32), vertex_name_label_map: HashMap::new(), } @@ -123,64 +121,15 @@ impl Graph { } impl Graph { - pub fn graph_from_file(filename: &str, directed: bool) -> Result { - //读取文件内容 - return match fs::read_to_string(filename) { - Ok(json_content) => Graph::graph_from_json_string(json_content.into(), directed), - Err(_) => Err(GraphSetParseError { - message: format!("Error reading file : {}", filename).into(), - }), - }; - } - - pub fn graph_from_json_string( - json_content: String, - directed: bool, - ) -> Result { - match serde_json::from_str::(&json_content) { - Ok(model_graph) => { - return Ok(Graph::graph_from_model_graph(model_graph, directed)); - } - Err(e) => { - return Err(GraphSetParseError { - message: format!("Error parsing json : {}", e.to_string()).into(), - }); - } - } - } - - pub fn graph_from_model_graph(model_graph: ModelGraph, directed: bool) -> Graph { + pub fn graph_from_model_graph(model_graph: ModelGraph) -> Graph { let node_map = model_graph.nodes; - let mut graph = Graph::new(0, directed); + let mut graph = Graph::new(0); graph.name = model_graph.name; for (_, val) in &node_map { graph.push_node(val); } - // for (_, val) in &node_map { - // graph.build_edge_by_node(val); - // } + graph.build_edges_for_nodes(model_graph.edges); return graph; } - - pub fn to_str_repr(&self, support: Option<(usize, usize, usize, usize)>) -> String { - let mut lines: Vec = Vec::new(); - let mut g_rep = format!("t # {}", self.id.to_string()); - if let Some(support) = support { - g_rep += &*format!( - " * btw({}) inn({}, {}) ttl({})", - support.0, support.1, support.2, support.3 - ); - } - lines.push(g_rep.into()); - let mut edges: Vec<&Edge> = Vec::new(); - for vertex in &self.vertices { - lines.push(vertex.to_str_repr()); - edges.extend(vertex.edges.iter()); - } - for edge in edges { - lines.push(edge.to_str_repr()); - } - lines.join("\n").into() - } } diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/result.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/result.rs index 4cb782e79df1e0c000bdb3da842a0a811a506aec..8a27bbf597f67735363f01e436d9f936b0f2cdde 100644 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/result.rs +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/gspan/result.rs @@ -17,13 +17,10 @@ use crate::{ #[derive(Debug, Clone)] pub enum OutType { - TXT, JSON, } -pub enum OutSource { - Channel(Sender), - Stream(W), +pub enum OutSource { Path(String), } @@ -39,10 +36,6 @@ pub struct JSONResult { #[derive(Default)] pub struct MaxDFSCodeGraphResult { - out: Option>, - out_type: Option, - channel: bool, // 如果 channel 为 true,则 out 流失效 - sender: Option>, min_sup: usize, inner_min_sup: usize, max_pat_min: usize, // Minimum number of vertices @@ -50,6 +43,11 @@ pub struct MaxDFSCodeGraphResult { value: Vec<(DFSCode, Vec>)>, } +// // 单例结构体 +// pub struct MaxDFSCodeGraphResult { +// inner: Arc>, +// } + impl MaxDFSCodeGraphResult { pub fn set_config( &mut self, @@ -57,13 +55,11 @@ impl MaxDFSCodeGraphResult { inner_min_sup: usize, max_pat_min: usize, // Minimum number of vertices max_pat_max: usize, // Maximum number of vertices - out_type: OutType, ) { self.min_sup = min_sup; self.inner_min_sup = inner_min_sup; self.max_pat_min = max_pat_min; self.max_pat_max = max_pat_max; - self.out_type = Some(out_type); } pub fn add_value(&mut self, dfs_code: &DFSCode, projected: &Projected) -> bool { @@ -86,19 +82,11 @@ impl MaxDFSCodeGraphResult { } let item = (dfs_code.clone(), projected.to_vertex_names_list()); let edges_list = projected.to_edges_list(); - if self.channel { - self.send_result(sup, _min_inner_sup, max_inner_sup, &item, edges_list); - } else if Option::is_some(&self.out) { - self.write_result(sup, _min_inner_sup, max_inner_sup, &item, edges_list); - } + self.value.push(item); true } - pub fn get_value_len(&self) -> usize { - self.value.len() - } - pub fn get_result(&self) -> Vec { self.value .iter() @@ -129,198 +117,3 @@ impl MaxDFSCodeGraphResult { self.value.iter().map(|e| e.1.len()).sum() } } - -impl MaxDFSCodeGraphResult { - pub fn set_channel(&mut self, channel: bool, sender: Option>) { - if channel { - self.out = None; - } else { - // take 方法会返回 Some(sender),并把原来的字段设置为 None - // _sender 离开作用域时会自动调用 drop,因此不需要显式调用 drop - let _sender = self.sender.take(); - } - self.sender = sender; - self.channel = channel; - } - - pub fn drop_sender(&mut self) { - if Option::is_some(&self.sender) { - // take 方法会返回 Some(sender),并把原来的字段设置为 None - // _sender 离开作用域时会自动调用 drop,因此不需要显式调用 drop - let _sender = self.sender.take(); - } - } - - fn send_result( - &mut self, - sup: usize, - min_inner_sup: usize, - max_inner_sup: usize, - item: &(DFSCode, Vec>), - edges_list: Vec>, - ) { - let id = self.value.len(); - if let Some(out_type) = &self.out_type { - match out_type { - OutType::TXT => - if let Some(sender) = &mut self.sender { - let line = - report_txt(id, sup, min_inner_sup, max_inner_sup, item, edges_list); - sender.send(line).unwrap(); - }, - OutType::JSON => - if let Some(sender) = &mut self.sender { - let line = - report_json(id, sup, min_inner_sup, max_inner_sup, item, edges_list); - sender.send(line).expect("ERR: MaxDFSCodeGraphResult Channel"); - }, - } - } - } -} - -impl MaxDFSCodeGraphResult { - pub fn set_stream(&mut self, out: W) { - self.channel = false; - self.out = Some(Box::new(out)); - } - - fn write_result( - &mut self, - sup: usize, - min_inner_sup: usize, - max_inner_sup: usize, - item: &(DFSCode, Vec>), - edges_list: Vec>, - ) { - let id = self.value.len(); - if let Some(out_type) = &self.out_type { - match out_type { - OutType::TXT => { - if let Some(out) = &mut self.out { - let line = - report_txt(id, sup, min_inner_sup, max_inner_sup, item, edges_list); - out.write(&*line.to_string().into_bytes()).expect("ERR: MaxDFSCodeGraphResult Stream"); - - // 刷新缓冲区,确保所有数据都被写出 - if let Err(e) = out.flush() { - eprintln!("Failed to flush output after writing lines: {}", e); - } - } - } - OutType::JSON => { - if let Some(out) = &mut self.out { - let line = - report_json(id, sup, min_inner_sup, max_inner_sup, item, edges_list); - out.write(&*line.to_string().into_bytes()).expect("ERR: MaxDFSCodeGraphResult Stream"); - out.write(b",\n").expect("ERR: MaxDFSCodeGraphResult Stream"); - - // 刷新缓冲区,确保所有数据都被写出 - if let Err(e) = out.flush() { - eprintln!("Failed to flush output after writing lines: {}", e); - } - } - } - } - } - } -} - -fn report_txt( - id: usize, - sup: usize, - min_inner_sup: usize, - max_inner_sup: usize, - item: &(DFSCode, Vec>), - edges_list: Vec>, -) -> String { - let mut lines: Vec = vec![]; - - let g = item.0.to_graph(id, false); - let total = item.1.len(); - lines.push("\n".into()); - lines.push(g.to_str_repr(Some((sup, min_inner_sup, max_inner_sup, total)))); - lines.push("\n".into()); - - for (index, line) in item.1.iter().enumerate() { - let vertex_content = line - .iter() - .map(|l| format!("{}/{}", l.0, &l.1).into()) - .collect::>() - .join(", "); - lines.push(format!("${}| {}\n", line.len(), vertex_content).into()); - - let edge_content = edges_list[index] - .iter() - .map(|e| { - format!( - " e| {}/{}-{}-{}/{}", - &e.from, &e.from_label, &e.e_label, &e.to, &e.to_label - ).into() - }) - .collect::>() - .join("\n").into(); - lines.push(edge_content); - lines.push("\n".into()); - } - - lines.join("").into() -} - -fn report_json( - id: usize, - sup: usize, - min_inner_sup: usize, - max_inner_sup: usize, - item: &(DFSCode, Vec>), - edges_list: Vec>, -) -> String { - let g = item.0.to_graph(id, false); - - let mut output_vertices: Vec = Vec::new(); - let mut output_edges: Vec = Vec::new(); - for vertex in &g.vertices { - output_vertices - .push(OutputVertex { name: vertex.name.clone(), label: vertex.label.clone() }); - output_edges.extend(vertex.edges.iter().map(|e| OutputEdge { - from: e.from.clone(), - to: e.to.clone(), - from_label: e.from_label.clone(), - to_label: e.to_label.clone(), - e_label: e.e_label.clone(), - })); - } - - let mut json_result = JSONResult { - between_sup: sup, - inner_min_sup: min_inner_sup, - inner_max_sup: max_inner_sup, - total: item.1.len(), - structure: Structure { tid: g.id, vertices: output_vertices, edges: output_edges }, - instances: vec![], - }; - - for (index, line) in item.1.iter().enumerate() { - let node_ids = line - .iter() - .map(|l| NodeId { gid: l.0, nid: l.1.clone() }) - .collect::>(); - - let edges = edges_list[index] - .iter() - .map(|e| OutputEdge { - from: e.from.clone(), - to: e.to.clone(), - from_label: e.from_label.clone(), - to_label: e.to_label.clone(), - e_label: e.e_label.clone(), - }) - .collect::>(); - - json_result - .instances - .push(Instance { node_num: node_ids.len(), node_ids, edges }); - } - - serde_json::to_string(&json_result).expect("Err: Serialization failed").into() -} diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/lib.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/lib.rs index b2501e61b037001fad81e8d3dca12129e0fd3787..f5d7e832ecb9163e2943c5fee9dc87edc1698b61 100644 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/lib.rs +++ b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/lib.rs @@ -4,23 +4,18 @@ pub mod gspan; use anyhow::Result; - pub use gspan::*; pub mod io; pub use io::*; -pub mod strategy; use parser::{Model, Node, parse_bin}; -use smartstring::alias::String; -pub use strategy::*; use crate::{ - gspan::result::OutType, + gspan::gspan::GSpan, io::{model_graph::ModelGraph, node::Node as ModelNode}, models::graph::Graph, result::JSONResult, - strategy::{config::Config, gspan_mining::GSpanMining, mining_strategy::MiningStrategy}, }; impl From for ModelGraph { @@ -52,27 +47,11 @@ pub fn subgraphs_bin( let raw = parse_bin(path)?; let model_graph = ModelGraph::from(raw); - let graph = Graph::graph_from_model_graph(model_graph, true); + let graph = Graph::graph_from_model_graph(model_graph); - let gspan_mining = GSpanMining; + let gspan = GSpan::new(vec![graph], 1, min_inner_support, 2, max_vertices, true); - match Config::new_from_graphs( - vec![graph], - None, - None, - OutType::JSON, - 1, - min_inner_support, - 2, - max_vertices, - ) { - Ok(config) => { - let result = gspan_mining.run(config); - Ok(result) - } - e => { - eprintln!("Failed to create config: {:?}", e); - unreachable!() - } - } + let (subgraphs, result) = gspan.run(); + + Ok(result.get_result()) } diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/config.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/config.rs deleted file mode 100644 index 61927ae9ca040be557cedd18566dcfc9235b48cd..0000000000000000000000000000000000000000 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/config.rs +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c), Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - */ -use std::{fs, path::Path}; -use smartstring::alias::String; - -use crate::gspan::{models::graph::Graph, result::OutType}; - -#[derive(Debug)] -#[allow(dead_code)] -pub enum ConfigError { - InvalidInputFile(String), - InvalidProcessFile(String), - InvalidOutputFile(String), - MinSupportTooSmall(usize), - MinInnerSupportTooSmall(usize), - MinVerticesGreaterThanMax(usize, usize), - NonNormalizedPath(String), - SymlinkNotAllowed(String), - IllegalCharacters(String), -} - -#[derive(Debug)] -pub enum InputSource { - File(String), - Graphs(Vec), -} - -#[derive(Debug)] -pub struct Config { - input_source: InputSource, - process_path: Option, - output_path: Option, - output_type: OutType, - min_support: usize, // 相同结构在不同图中出现的最小次数 - min_inner_support: usize, // 相同结构在图内部中出现的最小次数 - min_vertices: usize, // Minimum number of vertices - max_vertices: usize, // Maximum number of vertices -} - -impl Config { - pub fn get_input_source(&self) -> &InputSource { - &self.input_source - } - - pub fn get_process_path(&self) -> &Option { - &self.process_path - } - - pub fn get_output_path(&self) -> &Option { - &self.output_path - } - - pub fn get_output_type(&self) -> &OutType { - &self.output_type - } - - pub fn get_min_support(&self) -> usize { - self.min_support - } - - pub fn get_min_inner_support(&self) -> usize { - self.min_inner_support - } - - pub fn get_min_vertices(&self) -> usize { - self.min_vertices - } - - pub fn get_max_vertices(&self) -> usize { - self.max_vertices - } -} - -impl Config { - pub fn new( - input_file: &str, - process_file: Option<&str>, - output_file: Option<&str>, - output_type: OutType, - min_support: usize, - min_inner_support: usize, - min_vertices: usize, - max_vertices: usize, - ) -> Result { - // 校验文件路径是否为空 - if input_file.is_empty() { - return Err(ConfigError::InvalidInputFile( - "Input file path cannot be empty.".into(), - )); - } - - check_normalized_path(&input_file)?; - - // 校验 process_file 和 output_file 是否为空 - if let Some(ref process_file) = process_file { - if process_file.is_empty() { - return Err(ConfigError::InvalidProcessFile( - "Process file path cannot be empty.".into(), - )); - } - check_normalized_path(process_file)?; - } - - if let Some(ref output_file) = output_file { - if output_file.is_empty() { - return Err(ConfigError::InvalidOutputFile( - "Output file path cannot be empty.".into(), - )); - } - check_normalized_path(output_file)?; - } - - // 校验支持度阈值 - if min_support < 1 { - return Err(ConfigError::MinSupportTooSmall(min_support)); - } - if min_inner_support < 1 { - return Err(ConfigError::MinInnerSupportTooSmall(min_inner_support)); - } - - // 校验顶点数阈值 - if min_vertices > max_vertices { - return Err(ConfigError::MinVerticesGreaterThanMax(min_vertices, max_vertices)); - } - - Ok(Config { - input_source: InputSource::File(input_file.into()), - process_path: process_file.map_or(None, |file| Some(file.into())), - output_path: output_file.map_or(None, |file| Some(file.into())), - output_type, - min_support, - min_inner_support, - min_vertices, - max_vertices, - }) - } - - pub fn new_from_graphs( - graphs: Vec, - process_file: Option<&str>, - output_file: Option<&str>, - output_type: OutType, - min_support: usize, - min_inner_support: usize, - min_vertices: usize, - max_vertices: usize, - ) -> Result { - // 校验 process_file 和 output_file 是否为空 - if let Some(ref process_file) = process_file { - if process_file.is_empty() { - return Err(ConfigError::InvalidProcessFile( - "Process file path cannot be empty.".into(), - )); - } - check_normalized_path(process_file)?; - } - - if let Some(ref output_file) = output_file { - if output_file.is_empty() { - return Err(ConfigError::InvalidOutputFile( - "Output file path cannot be empty.".into(), - )); - } - check_normalized_path(output_file)?; - } - - // 校验支持度阈值 - if min_support < 1 { - return Err(ConfigError::MinSupportTooSmall(min_support)); - } - if min_inner_support < 1 { - return Err(ConfigError::MinInnerSupportTooSmall(min_inner_support)); - } - - // 校验顶点数阈值 - if min_vertices > max_vertices { - return Err(ConfigError::MinVerticesGreaterThanMax(min_vertices, max_vertices)); - } - - Ok(Config { - input_source: InputSource::Graphs(graphs), - process_path: process_file.map_or(None, |file| Some(file.into())), - output_path: output_file.map_or(None, |file| Some(file.into())), - output_type, - min_support, - min_inner_support, - min_vertices, - max_vertices, - }) - } -} - -// 校验路径是否已标准化(不允许是软链接,且没有非法字符) -fn check_normalized_path(path: &str) -> Result<(), ConfigError> { - let p = Path::new(path); - - // 检查是否为符号链接 - if fs::symlink_metadata(p) - .and_then(|metadata| Ok(metadata.file_type().is_symlink())) - .unwrap_or(false) - { - return Err(ConfigError::SymlinkNotAllowed(format!("Path '{}' is a symbolic link.", path).into())); - } - - // 检查是否存在非法字符 - if let Some(illegal_char) = find_illegal_characters(path) { - return Err(ConfigError::IllegalCharacters(format!( - "Path '{}' contains illegal character '{}'.", - path, illegal_char - ).into())); - } - - Ok(()) -} - -fn find_illegal_characters(path: &str) -> Option { - // 定义文件路径非法字符集合 - let illegal_chars = if cfg!(target_os = "windows") { - // Windows 非法字符 - vec!['<', '>', ':', '"', '|', '?', '*', '\0'] - .into_iter() - .chain((0x01..=0x1F).map(|c| c as u8 as char)) // 控制字符 - .collect() - } else { - // Unix-like 系统非法字符 - vec!['\0'] - .into_iter() - .chain((0x01..=0x1F).map(|c| c as u8 as char)) // 控制字符 - .collect::>() - }; - - for c in path.chars() { - if illegal_chars.contains(&c) { - return Some(c); - } - } - - None -} diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/gspan_mining.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/gspan_mining.rs deleted file mode 100644 index 681e50c871157678d2d768e28479e2424471d360..0000000000000000000000000000000000000000 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/gspan_mining.rs +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c), Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - */ -use std::{ - fs::{File, OpenOptions}, - io::{BufWriter, Read, Seek, Write}, - sync::mpsc::{self, Receiver, Sender}, - thread, - time::Instant, -}; - -use super::mining_strategy::MiningStrategy; -use crate::{ - gspan::{ - gspan::GSpan, - models::graph::Graph, - result::{JSONResult, OutType}, - }, - result::OutSource, - strategy::config::InputSource, -}; -use smartstring::alias::String; - -pub struct GSpanMining; - -impl MiningStrategy for GSpanMining { - fn run(&self, args: super::Config) -> Vec { - let now = Instant::now(); - let graphs = match args.get_input_source() { - InputSource::File(input_file) => { - let graph = Graph::graph_from_file(&input_file, false); - match graph { - Ok(graph) => vec![graph], - Err(err) => panic!("{}", err.to_string()), - } - } - InputSource::Graphs(graphs) => graphs.to_vec(), - }; - for graph in graphs.iter() { - println!( - "All good parsing input file. vertex: {}, edge: {}.", - graph.vertices.len(), - graph.edge_size - ); - } - let alpha = now.elapsed().as_millis(); - println!("Took {}ms", alpha); - - println!("Mining subgraphs.."); - let gspan = GSpan::new( - graphs, - args.get_min_support(), - args.get_min_inner_support(), - args.get_min_vertices(), - args.get_max_vertices(), - true, - ); - - let process_writer: Option> = match args.get_process_path() { - Some(file) => Some(BufWriter::new(File::create(file.to_string()).unwrap())), - None => None, - }; - - let output_source = match args.get_output_path() { - Some(file) => Some(OutSource::Path(file.clone())), - None => None, - }; - - let (subgraphs, result) = - gspan.run(args.get_output_type().clone(), output_source, process_writer); - let delta = now.elapsed().as_millis(); - println!("Finished."); - println!("Found {} subgraphs", subgraphs); - println!( - "Found {}/{} subgraphs (Only Max)", - result.get_value_len(), - result.get_sum_subgraphs() - ); - println!("Took {}ms", delta - alpha); - println!("Total Took {}ms", delta); - - fix_json_file(args.get_output_path(), args.get_output_type()); - result.get_result() - } - - fn run_channel(&self, args: super::Config) -> Receiver { - let now = Instant::now(); - let graphs = match args.get_input_source() { - InputSource::File(file) => { - let graph = Graph::graph_from_file(&file, false); - match graph { - Ok(graph) => vec![graph], - Err(err) => panic!("{}", err.to_string()), - } - } - InputSource::Graphs(graphs) => graphs.to_vec(), - }; - for graph in graphs.iter() { - println!( - "All good parsing input file. vertex: {}, edge: {}.", - graph.vertices.len(), - graph.edge_size - ); - } - let alpha = now.elapsed().as_millis(); - println!("Took {}ms", alpha); - - println!("Mining subgraphs.."); - let gspan = GSpan::new( - graphs, - args.get_min_support(), - args.get_min_inner_support(), - args.get_min_vertices(), - args.get_max_vertices(), - true, - ); - - let (tx, rx): (Sender, Receiver) = mpsc::channel(); - - let process_path = (*args.get_process_path()).clone(); - let output_type = args.get_output_type().clone(); - thread::spawn(move || { - let process_writer: Option> = match process_path { - None => None, - Some(file) => Some(BufWriter::new(File::create(file.to_string()).unwrap())), - }; - - let (subgraphs, mut result) = - gspan.run(output_type, Some(OutSource::Channel(tx)), process_writer); - let delta = now.elapsed().as_millis(); - println!("Finished."); - println!("Found {} subgraphs", subgraphs); - println!( - "Found {}/{} subgraphs (Only Max)", - result.get_value_len(), - result.get_sum_subgraphs() - ); - println!("Took {}ms", delta - alpha); - println!("Total Took {}ms", delta); - result.drop_sender(); - }); - - fix_json_file(args.get_output_path(), args.get_output_type()); - rx - } -} - -fn fix_json_file(output_path: &Option, output_type: &OutType) { - match &output_type { - OutType::JSON => { - if let Some(filename) = &output_path { - // 打开文件并读取内容 - let mut file = OpenOptions::new().read(true).write(true).open(filename.to_string()).unwrap(); - - let mut contents = String::new().to_string(); - file.read_to_string(&mut contents).unwrap(); - - // 修改内容 - contents.pop(); - contents.pop(); - let new_contents = format!("[{}]", contents); - - // 清空文件并重置文件位置 - file.set_len(0).unwrap(); - file.seek(std::io::SeekFrom::Start(0)).unwrap(); - - file.write_all(new_contents.as_bytes()).unwrap(); - } - } - _ => {} - } -} diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/mining_strategy.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/mining_strategy.rs deleted file mode 100644 index a73e33570ac0c1c314ed7a06f03e63c77b51c609..0000000000000000000000000000000000000000 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/mining_strategy.rs +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c), Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - */ -use std::sync::mpsc::Receiver; - -use super::Config; -use crate::gspan::result::JSONResult; -use smartstring::alias::String; - -pub trait MiningStrategy { - fn run(&self, config: Config) -> Vec; - - fn run_channel(&self, config: Config) -> Receiver; -} diff --git a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/mod.rs b/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/mod.rs deleted file mode 100644 index 5442b281a4802de4913124d3f4b980bff5de2a92..0000000000000000000000000000000000000000 --- a/plugins/mindstudio-insight-plugins/ModelVis/rust/subgraph/src/strategy/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c), Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. - */ -pub mod config; -pub mod gspan_mining; -pub mod mining_strategy; -use smartstring::alias::String; - -use std::sync::mpsc::Receiver; - -use config::Config; - -use self::mining_strategy::MiningStrategy; -use crate::gspan::result::JSONResult; - -pub struct MiningContext { - strategy: Box, -} - -impl MiningContext { - pub fn new(strategy: Box) -> Self { - MiningContext { strategy } - } - - pub fn run(&self, config: Config) -> Vec { - self.strategy.run(config) - } - - pub fn run_channel(&self, config: Config) -> Receiver { - self.strategy.run_channel(config) - } -}