diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/.gitignore b/plugins/tensorboard-plugins/tb_graph_ascend/.gitignore
index 70f4e767811d0d93c25fbb8ce2d2b29c4ba3b6e6..2ae952e7aced2c5fe1b74606444d92498196c7d2 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/.gitignore
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/.gitignore
@@ -5,7 +5,7 @@ dist/
build/
tb_graph_ascend.egg-info/
__pycache__/
-/server/static/index.html
+**/server/static/index.html
report.html
assets/
/htmlcov/
\ No newline at end of file
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/README.md b/plugins/tensorboard-plugins/tb_graph_ascend/README.md
index cd578e3f4d7d597229854e1362afe05753600f03..7cd935265554d493dc47472811cebbf1619c9a83 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/README.md
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/README.md
@@ -2,13 +2,13 @@
## 一、 介绍
-此工具是将模型结构进行分级可视化展示的 Tensorboard 插件。可将模型的层级关系、精度性能数据进行可视化,并支持将调试模型和标杆模型进行分视图展示和关联比对,方便用户快速定位精度问题。
+此工具是将模型结构进行分级可视化展示的 Tensorboard 插件。可将模型的层级关系、精度数据进行可视化,并支持将调试模型和标杆模型进行分视图展示和关联比对,方便用户快速定位精度问题。
## 二、快速安装
### 1. 相关依赖
-`python >= 3.7 ,tensorboard >= 2.11.2,numpy <= 1.26.3`
+`python >= 3.7 ,tensorboard >= 2.11.2
### 2. 安装方式
@@ -160,7 +160,7 @@
打开本工具时,本工具会对 logdir 目录下的 vis 文件以及其父目录进行安全检查,如果存在安全风险,本工具会展示如下提示信息,询问用户是否继续执行,用户选择继续执行后,可以操作未通过安全检查的文件和目录,用户需要自行承担操作风险。如果用户选择不继续执行,则用户只能操作通过安全检查的文件。
-
+
#### 4.1.2 TensorBoard 版本说明
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/constant.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/constant.ts
index 145aa943805e35341a4453b44b7ae6495c27fb64..986d12c65fb4265a0f06bb3534862e977d656b02 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/constant.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/constant.ts
@@ -20,6 +20,9 @@ export const BENCH_PREFIX = 'B___';
// 未匹配节点颜色
export const UNMATCHED_COLOR = '#C7C7C7';
+export const JSON_TYPE = 'json'
+export const DB_TYPE = 'db'
+
// 双图下单个图形的最小宽度
export const MIN_GRAPG_WIDTH = 200;
@@ -44,7 +47,7 @@ export enum NODE_TYPE {
// 渲染信息
export const DURATION_TIME = 160; // 动画时间
export const SELECTED_STROKE_COLOR = 'rgb(31, 63, 207)'; // 选中节点颜色
-export const BENCH_NODE_COLOR = 'rgb(236, 235, 235)'; // 基准模型节点颜色
+export const BENCH_NODE_COLOR = 'rgba(255, 255, 255, 1)'; // 基准模型节点颜色
export const BENCH_STROKE_COLOR = 'rgb(161, 161, 161)'; // 基准模型边框颜色
export const NO_MATCHED_NODE_COLOR = 'rgb(199, 199, 199)'; // 未匹配节点颜色
export const BASE_NODE_COLOR = 'rgb(255, 255, 255)'; // 基准节点颜色,没有精度信息、API、FUSION的填充色
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/i18n.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/i18n.ts
index 1f0e5ca5a3940526b0ac32b7950fd2343487fce9..87fabb2ff4a75a547f5ae17448455348ca1e8110 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/i18n.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/common/i18n.ts
@@ -25,7 +25,7 @@ i18next
translation: {
fit: "Fit Screen",
settings: "Settings",
- match: 'Matching',
+ function: 'function',
show_debug_minimap: "show debug minimap",
show_bench_minimap: "show bench minimap",
run: "Run",
@@ -48,15 +48,15 @@ i18next
},
node_match: "Node Match",
select_match_config_file: "Select Match Config File",
- select_match_config_file_desc: "Select the corresponding configuration file, read the matching node information, and match the corresponding node."
-
+ select_match_config_file_desc: "Select the corresponding configuration file, read the matching node information, and match the corresponding node.",
+ node_search: "Node Search"
}
},
'zh-CN': {
translation: {
fit: "自适应屏幕",
settings: "设置",
- match: '匹配',
+ function: "功能",
show_debug_minimap: "调试侧缩略图",
show_bench_minimap: "标杆侧缩略图",
run: "目录",
@@ -70,8 +70,8 @@ i18next
accuracy_error: "精度误差",
overflow: "精度溢出",
match_accuracy_error: "符合精度误差节点",
- overflow_filter_node: "溢出筛选节点",
- no_matching_nodes: "无匹配节点11",
+ overflow_filter_node: "符合溢出筛选节点",
+ no_matching_nodes: "无匹配节点",
precision_desc: {
"summary": "节点中调试侧和标杆侧输出的统计量相对误差,值越大精度差距越大,颜色标记越深,相对误差指标(RelativeErr):| (调试值 - 标杆值) / 标杆值 |",
"all": "节点中所有输入的最小双千指标和所有输出的最小双千分之一指标的差值,反映了双千指标的下降情况,值越大精度差距越大,颜色标记越深,双千分之一精度指标(One Thousandth Err Ratio):Tensor中的元素逐个与对应的标杆数据对比,相对误差小于千分之一的比例占总元素个数的比例,比例越接近1越好",
@@ -79,7 +79,8 @@ i18next
},
node_match: "节点匹配",
select_match_config_file: "选择匹配配置文件",
- select_match_config_file_desc: "选择对应配置文件,会读取匹配节点信息,并将对应节点进行匹配。"
+ select_match_config_file_desc: "选择对应配置文件,会读取匹配节点信息,并将对应节点进行匹配。",
+ node_search: "节点搜索"
}
}
},
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/index.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/index.ts
index 3d895cea6fcf654bad4d5967636a103f42148b94..b085bee4647da0d1445d88f06077e6c9e2466973 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/index.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/index.ts
@@ -27,8 +27,8 @@ import '../graph_controls_board/index';
import '../common/graph-board-layout';
import '@vaadin/confirm-dialog'
import { Notification } from '@vaadin/notification';
-
-import type { SelectionType, ProgressType, GraphConfigType, GraphAllNodeType, NodeListType, UnmatchedNodeType } from './type';
+import request from '../utils/request';
+import type { SelectedItemType, SelectionType, ProgressType, GraphConfigType, GraphAllNodeType, NodeListType, UnmatchedNodeType } from './type';
@customElement('graph-ascend')
class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
@@ -51,19 +51,22 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
colors="{{colors}}"
colorset="[[colorset]]"
overflowcheck="[[overflowcheck]]"
+ steps="[[steps]]"
+ ranks="[[ranks]]"
microsteps="[[microsteps]]"
npu-match-nodes="[[npuMatchNodes]]"
bench-match-nodes="[[benchMatchNodes]]"
matched-config-files="[[matchedConfigFiles]]"
nodelist="[[nodelist]]"
unmatched="[[unmatched]]"
- matchedlist="[[matchedlist]]"
minimap-vis="{{minimapVis}}"
is-sync-expand="{{isSyncExpand}}"
is-single-graph="{{isSingleGraph}}"
task="[[task]]"
is-overflow-filter="{{isOverflowFilter}}"
on-fit-tap="onFitTap"
+ load-all-node-list="{{loadGraphAllNodeList}}"
+ need-load-all-node-list="{{needLoadAllNodeList}}"
>
@@ -160,10 +163,10 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
selection: SelectionType | null = null;
@property({ type: Object, notify: true })
- nodelist: any;
+ nodelist: NodeListType = {} as NodeListType;
@property({ type: Object, notify: true })
- unmatched: any;
+ unmatched: UnmatchedNodeType = {} as UnmatchedNodeType;
@property({ type: Object, notify: true })
matchedlist: any;
@@ -186,11 +189,18 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
@property({ type: Boolean })
isSingleGraph: boolean = false;
- @property({ type: Object })
- microsteps: any;
+ @property({ type: Array })
+ microsteps: number[] = [];
+
+ @property({ type: Array })
+ steps: Array
= [{ value: 0, label: '0' }];
+
+ @property({ type: Array })
+ ranks: Array = [{ value: 0, label: '0' }];
+
@property({ type: Array })
- overflowcheck;
+ overflowcheck: boolean = false;
@property({ type: Object })
tooltips: object = {};
@@ -215,6 +225,9 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
@property({ type: Array })
fileListError: Array = [];
+ @property({ type: Object })
+ needLoadAllNodeList: boolean = true;
+
private currentSelection: SelectionType | null = null;
private useGraphAscend = useGraphAscend();
private eventSource: EventSource | null = null;
@@ -224,12 +237,30 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
if (!this.selection?.run || !this.selection?.tag) {
return;
}
- if (this.currentSelection?.run !== this.selection?.run || this.currentSelection?.tag !== this.selection?.tag) {
- this.loadGraphData(this.selection);
- } else if (this.currentSelection?.microStep !== this.selection?.microStep) {
+ const isFileChange = this.currentSelection?.run !== this.selection?.run || this.currentSelection?.tag !== this.selection?.tag;
+ const isDBChange = this.currentSelection?.rank !== this.selection?.rank || this.currentSelection?.step !== this.selection?.step;
+ if (isFileChange) {
+ switch (this.selection?.type) {
+ case 'json':
+ this.loadJSONGraphData(this.selection);
+ break;
+ case 'db':
+ this.loadDBGraphData(this.selection, true);
+ break;
+ default:
+ break;
+ }
+ }
+ else if (isDBChange) {
+ this.loadDBGraphData(this.selection, false);
+ }
+ else if (this.currentSelection?.microStep !== this.selection?.microStep) {
this.initGraphBoard(); // 只改变microsteps时,不重新加载图数据
- this.loadGraphAllNodeList(this.selection);
}
+ else {
+ return
+ }
+ this.set('needLoadAllNodeList', true);
this.currentSelection = this.selection;
};
@@ -262,13 +293,25 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
}
this.set('metaDir', data);
}
+ loadDBGraphData = async (metaData: SelectionType, isInitDB: boolean = false) => {
+ if (isInitDB) {
+ this.progreesLoading('正在初始化数据库', '请稍后', { progress: 10, progressValue: 10, done: false });
+ await request({ url: 'loadGraphData', method: 'GET', params: metaData });
+ await this.loadGraphConfig(metaData)
+ }
+ this.progreesLoading('正在初始化图', '请稍后', { progress: 90, progressValue: 90, done: false });
+ this.initGraphBoard(); // 先读取配置,再加载图,顺序很重要
+ this.progreesLoading('初始化完成', '请稍后', { progress: 100, progressValue: 100, done: true });
+ }
+
- loadGraphData = (metaData: SelectionType) => {
+ loadJSONGraphData = async (metaData: SelectionType) => {
if (this.eventSource) {
this.eventSource.close();
this.eventSource = null;
}
- this.eventSource = new EventSource(`loadGraphData?run=${metaData.run}&tag=${metaData.tag}`);
+
+ this.eventSource = new EventSource(`loadGraphData?run=${metaData.run}&tag=${metaData.tag}&type=${metaData.type}`);
this.eventSource.onmessage = async (e) => {
const data = safeJSONParse(e.data);
if (data?.error) {
@@ -282,10 +325,7 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
this.eventSource?.close();
this.eventSource = null;
try {
- await Promise.all([
- this.loadGraphConfig(metaData),
- this.loadGraphAllNodeList(metaData),
- ]);
+ await this.loadGraphConfig(metaData)
this.initGraphBoard(); // 先读取配置,再加载图,顺序很重要
this.progreesLoading('初始化完成', '请稍后', data);
} catch (error) {
@@ -303,7 +343,7 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
}
this.eventSource?.close();
};
- };
+ }
loadGraphConfig = async (metaData) => {
const { success, data, error } = await this.useGraphAscend.loadGraphConfig(metaData);
@@ -317,14 +357,28 @@ class TfGraphDashboard extends LegacyElementMixin(PolymerElement) {
this.set('task', config.task);
this.set('matchedConfigFiles', ['未选择', ...config.matchedConfigFiles]);
const microstepsCount = Number(config.microSteps);
+ const ranks = config.ranks || [0];
+ const steps = config.steps || [0];
if (microstepsCount) {
const microstepsArray = Array.from({ length: microstepsCount + 1 }, (_, index) => ({
label: index === 0 ? 'ALL' : String(index - 1),
value: index - 1,
}));
this.set('microsteps', microstepsArray);
- } else {
- this.set('microsteps', []);
+ }
+ if (ranks.length > 0) {
+ const ranksArray = ranks.map((rank) => ({
+ label: rank,
+ value: rank,
+ }))
+ this.set('ranks', ranksArray);
+ }
+ if (steps.length > 0) {
+ const stepsArray = steps.map((step) => ({
+ label: step,
+ value: step,
+ }))
+ this.set('steps', stepsArray);
}
} else {
Notification.show(`图配置加载失败:${error}`, {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/type/index.d.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/type/index.d.ts
index 04066670d861cb2c4f9ece15ce037617afe5b121..dc9b7455e37f692a940c49f2fc79a3c4e7998982 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/type/index.d.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/type/index.d.ts
@@ -21,13 +21,18 @@ export interface ProgressType {
done?: boolean;
}
+export interface SelectedItemType {
+ value: number;
+ label: string;
+}
+
export interface SelectionType {
run: string;
tag: string;
- type: string;
+ type: 'json' | 'db';
microStep?: number;
- step?: string;
- rank?: string;
+ step?: number;
+ rank?: number;
}
export interface GraphConfigType {
@@ -41,6 +46,8 @@ export interface GraphConfigType {
isSingleGraph: boolean;
matchedConfigFiles: string[];
task: string;
+ ranks: number[];
+ steps: number[]
}
export interface GraphAllNodeType {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/useGraphAscend.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/useGraphAscend.ts
index b83d74219a19b6440e00f9a2412c2dd67338e84f..d340d8a6fb9562de017ff6301f7497291f13c38f 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/useGraphAscend.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_ascend/useGraphAscend.ts
@@ -38,12 +38,12 @@ const useGraphAscend = () => {
}
};
const loadGraphConfig = async (metaData: SelectionType): Promise => {
- const result = await request({ url: 'loadGraphConfigInfo', method: 'POST', data: { metaData }}); // 获取异步的 ArrayBuffer
+ const result = await request({ url: 'loadGraphConfigInfo', method: 'POST', data: { metaData } }); // 获取异步的 ArrayBuffer
return result;
};
const loadGraphAllNodeList = async (metaData: SelectionType): Promise => {
- const result = await request({ url: 'loadGraphAllNodeList', method: 'POST', data: { metaData }}); // 获取异步的 ArrayBuffer
+ const result = await request({ url: 'loadGraphAllNodeList', method: 'POST', data: { metaData } }); // 获取异步的 ArrayBuffer
return result;
};
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/index.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/index.ts
index 692c4d0893533389d2181e40be9068c77062b782..111ea4f6b16b368d0b110db346ff4358169cf905 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/index.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/index.ts
@@ -38,6 +38,7 @@ import { Notification } from '@vaadin/notification';
import type { UseGraphType } from '../../type';
import type { HierarchyNodeType, ContextMenuItem, PreProcessDataConfigType, GraphType } from '../../type';
import type { ContextMenuItemSelectedEvent } from '@vaadin/context-menu';
+import type { SelectionType } from '../../../graph_ascend/type';
const EXPAND_MATCHED_NODE = 1;
const DATA_COMMUNICATION = 2;
@@ -131,7 +132,7 @@ class Hierarchy extends PolymerElement {
isOverflowFilter: boolean = false;
@property({ type: Object })
- selection = {};
+ selection: SelectionType = {} as SelectionType;
@property({ type: Boolean, notify: true })
selectedNode = '';
@@ -356,7 +357,7 @@ class Hierarchy extends PolymerElement {
bindUpdateHierarchyDataEvent() {
const onUpdateHierarchyDataEvent = async () => {
this.set('loading', true);
- const { success, data, error } = await this.useGraph.updateHierarchyData(this.graphType);
+ const { success, data, error } = await this.useGraph.updateHierarchyData(this.graphType, this.selection);
this.set('loading', false);
if (success) {
const hierarchyObject = data;
@@ -435,12 +436,13 @@ class Hierarchy extends PolymerElement {
if (target.tagName.toLowerCase() !== 'rect' && target.tagName.toLowerCase() !== 'text') {
event.stopPropagation();
} else {
- const contextMenuItems: Array = [
- {
+ const contextMenuItems: Array = [];
+ if (this.graphType != 'Single') {
+ contextMenuItems.push({
text: '展开对应侧节点',
type: EXPAND_MATCHED_NODE,
- },
- ];
+ })
+ }
const selectedNode = target.getAttribute('name');
const nodeName = selectedNode?.replace(new RegExp(`^(${NPU_PREFIX}|${BENCH_PREFIX})`), '') ?? '';
const nodeData = this.hierarchyObject[nodeName];
@@ -534,6 +536,7 @@ class Hierarchy extends PolymerElement {
this.dispatchEvent(changeMatchNodeExpandState);
}
const transform = this.changeNodeCenter(nodeName);
+ this.set('needChangeNodeCenter', true);
this.renderGraph(this.hierarchyData, this.hightLightNodeName, transform);
};
const onDoubleClickGraphEvent = (event) => {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/useGraph.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/useGraph.ts
index 415eac9a993ead40c1e2864e7d21b9b50014b3a7..0a874861e08774901475bf98b4096699030dcd2e 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/useGraph.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/components/hierarchy/useGraph.ts
@@ -18,7 +18,7 @@ import { maybeTruncateString, darkenColor, safeJSONParse } from '../../../utils/
import request from '../../../utils/request';
import { isEmpty } from 'lodash';
import { HierarchyNodeType, PreProcessDataConfigType, GraphType } from '../../type';
-
+import { SelectionType } from '../../../graph_ascend/type';
import { UseGraphType } from '../../type';
import {
DURATION_TIME,
@@ -99,11 +99,16 @@ const useGraph = (): UseGraphType => {
if (isEmpty(node.matchedNodeLink)) {
return Object.keys(colors).find((color) => colors[color].value === '无匹配节点') ?? NO_MATCHED_NODE_COLOR;
}
+ if (graphType === 'Bench') {
+ return BENCH_NODE_COLOR;
+ }
+
const precisionValue = parseFloat(node.precisionIndex);
return calcClolorByPrecision(precisionValue, colors);
};
const calcClolorByPrecision = (precisionValue: number, colors: PreProcessDataConfigType['colors']) => {
+
if (isNaN(precisionValue)) {
return BASE_NODE_COLOR; // 默认返回灰色
}
@@ -275,7 +280,7 @@ const useGraph = (): UseGraphType => {
texts.order();
};
- const changeNodeExpandState: UseGraphType['changeNodeExpandState'] = async (nodeInfo: any, metaData: any): Promise => {
+ const changeNodeExpandState: UseGraphType['changeNodeExpandState'] = async (nodeInfo: any, metaData: SelectionType): Promise => {
try {
const metaDataSafe = safeJSONParse(JSON.stringify(metaData));
const params = {
@@ -297,10 +302,14 @@ const useGraph = (): UseGraphType => {
}
};
- const updateHierarchyData = async (graphType: string): Promise => {
- const params = { graphType };
+ const updateHierarchyData = async (graphType: string, metaData: SelectionType): Promise => {
+
try {
- const result = await request({ url: 'updateHierarchyData', method: 'GET', params: params });
+ const params = {
+ metaData,
+ graphType
+ };
+ const result = await request({ url: 'updateHierarchyData', method: 'POST', data: params });
return result;
} catch (err) {
return {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/type/index.d.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/type/index.d.ts
index 47db174ac24e1c03e886ee563ea1e730ed115343..299a7c4fd81c992c4bae41d86680c896daff1ae8 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/type/index.d.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_board/type/index.d.ts
@@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+import { SelectionType } from "../../graph_ascend/type";
export interface HierarchyNodeType {
x: number;
y: number;
@@ -59,9 +61,9 @@ export interface UseGraphType {
config: PreProcessDataConfigType,
transform: { x: number; y: number; scale: number },
) => Array;
- changeNodeExpandState: (nodeInfo: any, metaData: any) => Promise;
+ changeNodeExpandState: (nodeInfo: any, metaData: SelectionType) => Promise;
createComponent: (text, precision, colors: PreProcessDataConfigType['colors']) => any;
- updateHierarchyData: (graphType: string) => Promise;
+ updateHierarchyData: (graphType: string, metaData: SelectionType) => Promise;
}
export interface TransformType {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_color_select/index.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_color_select/index.ts
index 07601d2c6dcd86cb8ce114f3dedb575d83e7be88..1bb53b352ad9959955f6531147ce19349ac8c7e3 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_color_select/index.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_color_select/index.ts
@@ -20,7 +20,6 @@ import * as _ from 'lodash';
import { PolymerElement, html } from '@polymer/polymer';
import { Notification } from '@vaadin/notification';
import { customElement, property, observe } from '@polymer/decorators';
-import { fetchPbTxt, safeJSONParse } from '../../../utils';
import { NPU_PREFIX, UNMATCHED_COLOR, defaultColorSetting, defaultColorSelects } from '../../../common/constant';
import request from '../../../utils/request';
import { DarkModeMixin } from '../../../polymer/dark_mode_mixin';
@@ -246,7 +245,7 @@ class Legend extends LegacyElementMixin(DarkModeMixin(PolymerElement)) {
- ([[precisionmenu.length]])
+ ([[overflowmenu.length]])
{
- const { run, tag, microStep } = this.selection;
const item = event.model.item;
let checkbox;
let overflowCheckbox;
@@ -823,14 +830,6 @@ class Legend extends LegacyElementMixin(DarkModeMixin(PolymerElement)) {
} else {
overflowCheckbox = this.shadowRoot?.getElementById(`overflowCheckbox-${event.model.index}`) as HTMLInputElement;
}
- const params = new URLSearchParams();
- if (run) {
- params.set('run', run);
- }
- if (tag) {
- params.set('tag', tag);
- }
- params.set('microStep', String(microStep));
// 更新 selectColor 数组
if (checkbox) {
if (checkbox.checked) {
@@ -845,29 +844,34 @@ class Legend extends LegacyElementMixin(DarkModeMixin(PolymerElement)) {
}
if (this.selectColor.length === 0) {
this.precisionmenu = [];
+ this.set('selectedPrecisionNode', '');
return;
}
- params.set('precision_index', this.selectColor.join(','));
- const screenPath = `screen?${String(params)}`;
- try {
- const screenStr = fetchPbTxt(screenPath);
- const precisionmenu = safeJSONParse(new TextDecoder().decode(await screenStr).replace(/'/g, '"')) as object;
- this.set('precisionmenu', precisionmenu);
+ const params = {
+ metaData: this.selection,
+ type: 'precision',
+ values: this.selectColor,
+ };
+ const { success, data, error } = await request({ url: 'screen', method: 'POST', data: params });
+
+ if (success) {
+ this.set('precisionmenu', data);
// 更新数据绑定
this.notifyPath(`menu.${event.model.index}.checked`, checkbox.checked);
// 清除精度筛选输入框
- this.set('selectedPrecisionNode', precisionmenu?.[0] || '');
+ this.set('selectedPrecisionNode', data?.[0] || '');
+ // 选中第一个选项
setTimeout(() => {
this._observePrecsionNode();
}, 200)
- } catch (e) {
- Notification.show(`获取精度菜单失败,请检查 toggleCheckbox 和 vis 文件中的数据。`, {
+ }
+ else {
+ Notification.show(`Error:${error}`, {
position: 'middle',
duration: 4000,
theme: 'error',
});
}
-
} else {
if (overflowCheckbox.checked) {
this.overflowLevel.push(item[1]); // 添加选中的颜色
@@ -881,23 +885,31 @@ class Legend extends LegacyElementMixin(DarkModeMixin(PolymerElement)) {
this.overflowmenu = [];
return;
}
- params.set('overflow_level', this.overflowLevel.join(','));
- const screenPath = `screen?${String(params)}`;
-
- try {
- const screenStr = fetchPbTxt(screenPath);
- this.overflowmenu = safeJSONParse(new TextDecoder().decode(await screenStr).replace(/'/g, '"')) as object;
- } catch (e) {
- Notification.show(`获取溢出菜单失败,请检查 toggleCheckbox 和 vis 文件中的数据。`, {
+
+ const params = {
+ metaData: this.selection,
+ type: 'overflow',
+ values: this.overflowLevel,
+ };
+ const { success, data, error } = await request({ url: 'screen', method: 'POST', data: params });
+ if (success) {
+ this.set('overflowmenu', data);
+ // 更新数据绑定
+ this.notifyPath(`menu.${event.model.index}.checked`, overflowCheckbox.checked);
+ // 清除精度筛选输入框
+ this.set('selectedOverflowNode', data?.[0] || '');
+ // 选中第一个选项
+ setTimeout(() => {
+ this._observeOverFlowNode();
+ }, 200)
+ }
+ else {
+ Notification.show(`Error:${error}`, {
position: 'middle',
duration: 4000,
theme: 'error',
});
}
- // 更新数据绑定
- this.notifyPath(`menu.${event.model.index}.checked`, overflowCheckbox.checked);
- // 清除精度溢出输入框
- this.set('selectedOverflowNode', '');
}
}
@@ -915,6 +927,9 @@ class Legend extends LegacyElementMixin(DarkModeMixin(PolymerElement)) {
};
_observeOverFlowNode = () => {
+ if (!this.selectedOverflowNode) {
+ return;
+ }
const prefix = this.isSingleGraph ? '' : NPU_PREFIX;
const node = prefix + this.selectedOverflowNode;
this.set('selectedNode', node);
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_main_controler/index.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_main_controler/index.ts
index f6088d8d23c2569906891f90c8dbdd1f4e42a126..d49d9185c12b65b2aa3cdd1c384f0ea159da8fc3 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_main_controler/index.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_main_controler/index.ts
@@ -23,6 +23,7 @@ import { customElement, property, observe } from '@polymer/decorators';
import { isEmpty } from 'lodash';
import type { SelectionType } from '../../../graph_ascend/type';
import type { MetaDirType } from '../../type';
+import { DB_TYPE } from '../../../common/constant';
@customElement('tf-main-controler')
class MainController extends PolymerElement {
// 定义模板
@@ -63,6 +64,10 @@ class MainController extends PolymerElement {
+
+
+
+
`;
@@ -82,20 +87,36 @@ class MainController extends PolymerElement {
@property({ type: Array })
tags = [];
+ @property({ type: Boolean })
+ isDBType = false;
+
@property({ type: Array })
microsteps = [];
+ @property({ type: Array })
+ steps: any[] = [{ value: 0, label: '0' }];
+
+ @property({ type: Array })
+ ranks: any[] = [{ value: 0, label: '0' }];
+
@property({ type: String })
selectedRun = '';
@property({ type: String })
selectedTag = '';
+ @property({ type: Number })
+ selectedRank = 0;
+
+ @property({ type: Number })
+ selectedStep = 0;
+
@property({ type: Number })
selectedMicroStep = -1;
@observe('metaDir')
_metaDirChanged(): void {
+
if (isEmpty(this.metaDir)) {
return;
}
@@ -111,7 +132,6 @@ class MainController extends PolymerElement {
}
const { type, tags } = this.metaDir[this.selectedRun];
this.set('tags', tags);
- this.set('selectedTag', tags[0]);
const selection = {
...this.selection,
run: this.selectedRun,
@@ -119,9 +139,18 @@ class MainController extends PolymerElement {
microStep: -1,
type
};
+ const isDBType = type == DB_TYPE;
+
+ if (isDBType) {
+ selection['step'] = 0;
+ selection['rank'] = 0;
+ this.set('selectedStep', 0);
+ this.set('selectedRank', 0);
+ }
+ this.set('selection', selection);
+ this.set('isDBType', isDBType);
this.set('selectedTag', tags[0]);
this.set('selectedMicroStep', -1);
- this.set('selection', selection);
}
@observe('selectedTag')
@@ -134,8 +163,42 @@ class MainController extends PolymerElement {
tag: this.selectedTag,
microStep: -1,
};
+ if (this.isDBType) {
+ selection['step'] = 0;
+ selection['rank'] = 0;
+ this.set('selectedStep', 0);
+ this.set('selectedRank', 0);
+ }
+ this.set('selection', selection);
this.set('selectedMicroStep', -1);
+ }
+
+ @observe('selectedStep')
+ _selectedStepChanged(): void {
+ if (isEmpty(this.metaDir)) {
+ return;
+ }
+ const selection = {
+ ...this.selection,
+ step: this.selectedStep,
+ microStep: -1,
+ };
+ this.set('selection', selection);
+ this.set('selectedMicroStep', -1);
+ }
+
+ @observe('selectedRank')
+ _selectedRankChanged(): void {
+ if (isEmpty(this.metaDir)) {
+ return;
+ }
+ const selection = {
+ ...this.selection,
+ rank: this.selectedRank,
+ microStep: -1,
+ };
this.set('selection', selection);
+ this.set('selectedMicroStep', -1);
}
@observe('selectedMicroStep')
@@ -157,14 +220,22 @@ class MainController extends PolymerElement {
_getTagChanged(event): void {
const detail = event.detail;
- if (!detail?.rankId || detail?.rankId >= this.tags.length) {
+ if (detail?.rankId != undefined && this.isDBType) {
+ setTimeout(() => {
+ this.set('selectedRank', detail?.rankId);
+ })
+ }
+ else if (detail?.rankId != undefined && !this.isDBType && detail?.rankId <= this.tags.length) {
+ setTimeout(() => {
+ this.set('selectedTag', this.tags[detail?.rankId]);
+ })
+ }
+ else {
Notification.show(this.t('invalid_rank_id'), {
position: 'middle',
duration: 2000,
theme: 'warning',
});
- return;
}
- this.set('selectedTag', this.tags[detail?.rankId]);
}
}
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_manual_match/index.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_manual_match/index.ts
index 982614d57c732d8ebfa06b1602e43d3ac8a2cd25..046baadc003f2db5dbdf242ee2e9311f0de8046d 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_manual_match/index.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/components/tf_manual_match/index.ts
@@ -24,12 +24,14 @@ import { isEmpty } from 'lodash';
import { Notification } from '@vaadin/notification';
import { PolymerElement, html } from '@polymer/polymer';
import { customElement, property, observe } from '@polymer/decorators';
-import { NPU_PREFIX, BENCH_PREFIX } from '../../../common/constant';
+import { NPU_PREFIX, BENCH_PREFIX, DB_TYPE } from '../../../common/constant';
import useMatched from './useMatched';
+import '../tf_search_combox/index';
import i18next from '../../../common/i18n'
import type { UseMatchedType } from '../../type';
+import type { SelectionType } from '../../../graph_ascend/type';
+
-import '../tf_search_combox/index';
@customElement('tf-manual-match')
class Legend extends PolymerElement {
// 定义模板
@@ -209,21 +211,23 @@ class Legend extends PolymerElement {
text="手动匹配结束后,点击保存匹配节点信息,会将已匹配的节点对应关系保存到配置文件中,不会持久原始文件,如果是初次保存,会新建一个文件,文件名称为:[当前文件名].vis.config。"
position="end"
>
- 保存
-
-
-
-
+
+ 保存
+
+
+
+
+
@@ -235,8 +239,11 @@ class Legend extends PolymerElement {
@property({ type: Object })
unmatched: any = [];
+ @property({ type: Boolean })
+ isDBType = false;
+
@property({ type: Object })
- selection: any;
+ selection: SelectionType = {} as SelectionType;
@property({ type: Boolean })
isCompareGraph: boolean = true;
@@ -305,6 +312,13 @@ class Legend extends PolymerElement {
npuMatchedNodeList = {};
benchMatchedNodeList = {};
+ @observe('selection')
+ _observeSelection(): void {
+ this.set('isDBType', this.selection?.type == DB_TYPE);
+ }
+
+
+
@observe('t')
_observeT(): void {
if (this.t) {
@@ -323,7 +337,7 @@ class Legend extends PolymerElement {
}
@observe('npuMatchNodes', 'benchMatchNodes')
- _observeSelection(): void {
+ _observeMatchNodes(): void {
if (!this.isCompareGraph) {
return;
}
@@ -449,7 +463,7 @@ class Legend extends PolymerElement {
const benchUnMatchNodes = data?.benchUnMatchNodes || [];
// 更新节点之间的匹配关系,更新匹配精度,节点重新上色
const updateHierarchyData = new CustomEvent('updateHierarchyData', { bubbles: true, composed: true });
- const porcessedNodeNum = Math.abs(npuUnMatchNodes?.length - this.npuUnMatchedNodes?.length);
+ const processedNodeNum = Math.abs(npuUnMatchNodes?.length - this.npuUnMatchedNodes?.length);
this.dispatchEvent(updateHierarchyData);
// 更新匹配关系
this.npuMatchedNodeList = npuMatchNodes;
@@ -466,7 +480,7 @@ class Legend extends PolymerElement {
// 已匹配列表清空选中的节点
this.set('selectedNpuMatchedNode', '');
this.set('selectedBenchMatchedNode', '');
- Notification.show(`取消成功:取消节点数 ${porcessedNodeNum} 个,对应节点状态已更新`, {
+ Notification.show(`取消成功:取消节点数 ${processedNodeNum} 个,对应节点状态已更新`, {
position: 'middle',
duration: 4000,
theme: 'success',
@@ -499,16 +513,16 @@ class Legend extends PolymerElement {
);
this.set('matchConfigLoading', false);
if (success) {
- const matchReslut = data?.matchReslut || [];
+ const matchResult = data?.matchResult || [];
const npuMatchNodes = data?.npuMatchNodes || {};
const benchMatchNodes = data?.benchMatchNodes || {};
const npuUnMatchNodes = data?.npuUnMatchNodes;
const benchUnMatchNodes = data?.benchUnMatchNodes;
// 更新节点之间的匹配关系,更新匹配精度,节点重新上色
const updateHierarchyData = new CustomEvent('updateHierarchyData', { bubbles: true, composed: true });
- const porcessedNodeNum = matchReslut?.length;
- const matchSuccessNum = matchReslut?.filter(Boolean).length;
- const matchFailedNum = porcessedNodeNum - matchSuccessNum;
+ const processedNodeNum = matchResult?.length;
+ const matchSuccessNum = matchResult?.filter(Boolean).length;
+ const matchFailedNum = processedNodeNum - matchSuccessNum;
// 更新匹配关系
this.npuMatchedNodeList = npuMatchNodes;
this.benchMatchedNodeList = benchMatchNodes;
@@ -520,7 +534,7 @@ class Legend extends PolymerElement {
this.set('npuUnMatchedNodes', npuUnMatchNodes);
this.set('benchUnMatchedNodes', benchUnMatchNodes);
Notification.show(
- `匹配成功:匹配节点数 ${porcessedNodeNum} 个,其中成功 ${matchSuccessNum} 个,失败 ${matchFailedNum} 个`,
+ `匹配成功:匹配节点数 ${processedNodeNum} 个,其中成功 ${matchSuccessNum} 个,失败 ${matchFailedNum} 个`,
{
position: 'middle',
duration: 4000,
@@ -570,7 +584,7 @@ class Legend extends PolymerElement {
const benchUnMatchNodes = data?.benchUnMatchNodes || [];
// 更新节点之间的匹配关系,更新匹配精度,节点重新上色
const updateHierarchyData = new CustomEvent('updateHierarchyData', { bubbles: true, composed: true });
- const porcessedNodeNum = Math.abs(npuUnMatchNodes?.length - this.npuUnMatchedNodes.length);
+ const processedNodeNum = Math.abs(npuUnMatchNodes?.length - this.npuUnMatchedNodes.length);
// 更新匹配关系
this.npuMatchedNodeList = npuMatchNodes;
this.benchMatchedNodeList = benchMatchNodes;
@@ -587,7 +601,7 @@ class Legend extends PolymerElement {
// 未匹配列表清空选中的节点
this.set('selectedNpuUnMatchedNode', '');
this.set('selectedBenchUnMatchedNode', '');
- Notification.show(`匹配成功:匹配节点数 ${porcessedNodeNum} 个,对应节点状态已更新`, {
+ Notification.show(`匹配成功:匹配节点数 ${processedNodeNum} 个,对应节点状态已更新`, {
position: 'middle',
duration: 4000,
theme: 'success',
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/index.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/index.ts
index 4c129f688ce20f8dc9bcdbe5faf24b07f465e584..c115f93b13025d6c76966033194532b7f26d3a25 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/index.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/index.ts
@@ -86,8 +86,6 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
margin-right: 12px;
}
-
-
.icon-button {
font-size: var(--tb-graph-controls-title-font-size);
}
@@ -134,6 +132,7 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
}
.tab-content {
+ position: relative;
background: rgb(255, 255, 255);
padding: 0 20px;
flex-grow: 1;
@@ -174,6 +173,38 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
font-size: var(--tb-graph-controls-title-font-size);
margin-left: -4px;
}
+
+ .vaadin-details-selected {
+ display: flex;
+ padding-top: 0;
+ }
+
+ .vaadin-details-title {
+ font-size: 14px;
+ color: #333333;
+ font-weight: 600;
+ margin-bottom: 0;
+ }
+
+ .vaadin-details vaadin-details-summary {
+ font-size: 15px;
+ color: #333333;
+ font-weight: 600;
+ }
+
+ .loading-wrapper{
+ position: absolute;
+ width: 90%;
+ height: 90%;
+ z-index: 999;
+ color: rgba(37, 37, 37, 0.8);
+ background-color: rgba(255, 255, 255, 0.76);
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ font-size: 20px;
+ font-weight: 600;
+ }
paper-checkbox {
--paper-checkbox-unchecked-color: gray; /* 选中时的颜色 */
user-select: none;
@@ -182,7 +213,7 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
-
+
@@ -205,17 +236,11 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
meta-dir="[[metaDir]]"
selection="{{selection}}"
microsteps="[[microsteps]]"
+ ranks="[[ranks]]"
+ steps="[[steps]]"
>
-
-
-
-
+
+
+
+ Loading......
+
+
+
+
+
+
+
`;
@property({ type: Object })
t: Function = (key) => i18next.t(key);
+ @property({ type: Boolean })
+ loading: boolean = false;
+
@property({ type: Object })
metaDir: MetaDirType = {} as MetaDirType;
@@ -287,6 +330,14 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
@property({ type: Object, notify: true })
colors: any;
+ @property({ type: Object, notify: true })
+ loadAllNodeList: Function = () => { };
+
+ @property({ type: Object, notify: true })
+ needLoadAllNodeList: boolean = true;
+
+
+
override ready(): void {
super.ready();
this._showTabContent(this.t('settings'), 'nodes-content');
@@ -334,8 +385,14 @@ class TfGraphControls extends LegacyElementMixin(DarkModeMixin(PolymerElement))
this._showTabContent(this.t('settings'), 'nodes-content');
}
- _showMatch(): void {
- this._showTabContent(this.t('match'), 'match-content');
+ async _showMatch(): Promise {
+ this._showTabContent(this.t('function'), 'match-content');
+ if (this.loadAllNodeList && this.needLoadAllNodeList) {
+ this.set('loading', true)
+ await this.loadAllNodeList(this.selection);
+ this.set('needLoadAllNodeList', false) //已经加载过一次,不需要再加载
+ this.set('loading', false)
+ }
}
_fit(): void {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/type/index.d.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/type/index.d.ts
index 06146458b7169680177bfe5e248dfaa752f581d3..be538a873c11b4ea292aa37623dfd9b2d020d181 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/type/index.d.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/graph_controls_board/type/index.d.ts
@@ -41,6 +41,6 @@ export interface MatchResultType {
benchMatchNodes: Record;
npuUnMatchNodes: string[];
benchUnMatchNodes: string[];
- matchReslut?: boolean[];
+ matchResult?: boolean[];
};
};
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/utils/request.ts b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/utils/request.ts
index 0f5d6b13308aa9e851894ab35cb7e7729e74fbae..0232936edc143c39fcc019c2788777bdac1903b3 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/utils/request.ts
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/fe/src/utils/request.ts
@@ -29,16 +29,17 @@ interface ApiResponse {
}
export default async function request(options: RequestOptions): Promise> {
const { url, method = 'GET', data = null, params = null, headers = {}, timeout = 60000 * 3 } = options;
-
+ const controller = new AbortController();
+ const signal = controller.signal;
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
- const controller = new AbortController();
- const signal = controller.signal;
- const timeoutId = setTimeout(() => controller.abort(), timeout);
+
if (typeof params === 'object' && params !== null) {
if ('metaData' in params) {
params.metaData.type = 'rank' in params.metaData ? 'db' : 'json';
}
}
+
const response: AxiosResponse = await axios({
url,
method,
@@ -62,6 +63,7 @@ export default async function request(options: RequestOptions): Promise
};
}
} catch (error) {
+ clearTimeout(timeoutId);
if (axios.isAxiosError(error)) {
const axiosError = error as AxiosError;
if (axiosError.code === 'ECONNABORTED') {
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/__init__.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/__init__.py
similarity index 100%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/__init__.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/__init__.py
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/hierarchy.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/hierarchy.py
similarity index 77%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/hierarchy.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/hierarchy.py
index 33bd1b7bae092c2c538d5837a64fe8bb7ed78dad..72510fbd005076e93f2d2b3e4e7d5a45ac90c8cd 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/hierarchy.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/hierarchy.py
@@ -1,3 +1,4 @@
+
# Copyright (c) 2025, Huawei Technologies.
# All Rights Reserved.
#
@@ -14,7 +15,8 @@
# limitations under the License.
# ==============================================================================
from tensorboard.util import tb_logging
-from ..utils.global_state import NPU_PREFIX, BENCH_PREFIX, NPU, SINGLE, UNEXPAND_NODE, MODULE
+from ..utils.global_state import GraphState
+from ..utils.global_state import NPU_PREFIX, BENCH_PREFIX, NPU, SINGLE, UNEXPAND_NODE, MODULE, DataType
logger = tb_logging.get_logger()
@@ -23,40 +25,50 @@ INNER_HIGHT = 15 # 内部节点高度
HORIZONTAL_SPACING = 5 # 横向排列间距
VERTICAL_SPACING = 10 # 纵向排列间距
MAX_PER_ROW = 5 # 横向每行最大数
+DB_TYPE = DataType.DB.value
+JSON_TYPE = DataType.JSON.value
class Hierarchy:
- def __init__(self, graph_type, graph, micro_step):
- root_node_name = graph.get('root')
- node_info = graph.get('node', {}).get(root_node_name, {})
+ def __init__(self, graph_type, repo, micro_step, rank, step):
+ # DB:查询根节点信息
+ node_info = repo.query_root_nodes(graph_type, rank, step)
+ if(not node_info):
+ logger.error("No root node info found in database.")
+ return
+ root_node_name = node_info.get('node_name')
name_prefix = NPU_PREFIX if graph_type == NPU else BENCH_PREFIX
name_prefix = '' if graph_type == SINGLE else name_prefix
+ self.repo = repo
self.name_prefix = name_prefix
self.root_name = root_node_name
self.graph_type = graph_type
- self.graph = graph
+ self.rank = rank
+ self.step = step
self.micro_step_id = micro_step
self.current_hierarchy = {
root_node_name: self.get_basic_rende_info(root_node_name, node_info)
}
# 默认展开根节点
- self.update_graph_data(self.root_name, graph)
+ self.update_graph_data(self.root_name)
self.update_graph_shape()
self.update_graph_position()
- @staticmethod
- def measure_text_width(text):
+ @classmethod
+ def measure_text_width(cls, text):
return len(text) * 6 # 假设每个字符宽度为6
- @staticmethod
- def extract_label_name(node_name, node_type):
+ @classmethod
+ def extract_label_name(cls, node_name, node_type):
+ if not node_name:
+ return ''
splited_subnode_name = node_name.split('.')
splited_label = []
# 在展开层级时,将父级层级名称相关去除,仅保留子节点本身名称信息
# 如Module.layer1.1.relu.ReLU.forward.0中的父级名称Module.layer1.1去除,仅保留子级的relu.ReLU.forward.1
# 如Module.layer4.0.BasicBlock.forward.0中的父级名称Module.1去除,仅保留子级的layer4.0.BasicBlock.forward.0
- if node_type == MODULE:
+ if int(node_type) == MODULE:
if len(splited_subnode_name) < 4:
return node_name
splited_label = splited_subnode_name[-4:] if not splited_subnode_name[
@@ -71,14 +83,14 @@ class Hierarchy:
-2].isdigit() else splited_subnode_name[-3:]
return ('.').join(splited_label)
- def update_graph_data(self, node_name, graph):
+ def update_graph_data(self, node_name):
target_node = self.current_hierarchy.get(node_name, {})
if node_name == self.root_name or target_node:
- self.process_click_expand(node_name, graph)
+ self.process_click_expand(node_name)
else: # 如果图中不存在该节点,说明是选中节点,需要递归展开该节点的所有父节点
- self.process_select_expand(node_name, graph)
+ self.process_select_expand(node_name)
- def process_click_expand(self, node_name, graph):
+ def process_click_expand(self, node_name):
target_node = self.current_hierarchy.get(node_name, {})
target_node_children = target_node.get("children", [])
if not target_node or not target_node_children:
@@ -86,34 +98,40 @@ class Hierarchy:
if not target_node.get('expand', False):
# 1.将target_node的expand置为true
# 2.将node_name的子节点信息初始化,并添加到current_hierarchy中
- for subnode_name in target_node_children:
+ # DB: 查询所有以当前为父节点的子节点
+ sub_nodes = self.repo.query_sub_nodes(node_name, self.graph_type, self.rank, self.step)
+ for subnode_name, node_info in sub_nodes.items():
if self.current_hierarchy.get(subnode_name):
continue
- node_info = graph.get('node', {}).get(subnode_name, {})
render_info = self.get_basic_rende_info(subnode_name, node_info)
self.current_hierarchy[subnode_name] = render_info
target_node['expand'] = True
else:
target_node['expand'] = False if node_name != self.root_name else True # 根节点默认展开
- def process_select_expand(self, node_name, graph):
- parent_node_name = graph.get('node', {}).get(node_name, {}).get("upnode")
- parent_node = self.current_hierarchy.get(parent_node_name)
+ def process_select_expand(self, node_name):
+ # DB:逻辑
+ # 1.查询当前节点及其的所有父节点,一直到没有父节点位置{}
+
+ # DB:查询当前节点的父节点信息
+ up_nodes = self.repo.query_up_nodes(node_name, self.graph_type, self.rank, self.step)
+ parent_node_name = up_nodes.get(node_name, {}).get('upnode')
+ parent_node = self.current_hierarchy.get(parent_node_name, '')
# 递归展开父节点
while not parent_node or not parent_node.get('expand', False):
- if not parent_node: # 如果父节点不存在,则初始化父节点
- node_info = graph.get('node', {}).get(parent_node_name)
- render_info = self.get_basic_rende_info(parent_node_name, node_info)
+ if not parent_node: # 如果父节点不存在图中,则初始化父节点
+
+ render_info = self.get_basic_rende_info(parent_node_name, up_nodes.get(parent_node_name))
self.current_hierarchy[parent_node_name] = render_info
try:
- self.process_click_expand(parent_node_name, graph) # 展开父节点
+ self.process_click_expand(parent_node_name) # 展开父节点
except Exception as e:
logger.error(f"Failed to expand parent node {parent_node_name}: {e}")
break
if parent_node_name == self.root_name:
break
- parent_node_name = graph.get('node', {}).get(parent_node_name, {}).get("upnode")
- parent_node = self.current_hierarchy.get(parent_node_name)
+ parent_node_name = up_nodes.get(parent_node_name, {}).get('upnode')
+ parent_node = self.current_hierarchy.get(parent_node_name, '')
def update_graph_shape(self):
self.resize_hierarchy(self.root_name)
@@ -132,7 +150,7 @@ class Hierarchy:
return
if not node.get('expand', False):
# 未展开的父节点按文字宽度
- node['width'] = Hierarchy.measure_text_width(node.get('label', '')) + HORIZONTAL_SPACING * 2 # 文字宽度 + 边距
+ node['width'] = self.measure_text_width(node.get('label', '')) + HORIZONTAL_SPACING * 2 # 文字宽度 + 边距
node['height'] = INNER_HIGHT
return
for child_name in node.get('children', []):
@@ -166,7 +184,7 @@ class Hierarchy:
# 最终尺寸计算
node['width'] = max(
max_child_width + HORIZONTAL_SPACING * 2, # 子节点最大宽度 + 边距
- Hierarchy.measure_text_width(node.get('label', '')) + HORIZONTAL_SPACING * 2 # 保证文字可见
+ self.measure_text_width(node.get('label', '')) + HORIZONTAL_SPACING * 2 # 保证文字可见
)
node['height'] = total_height + VERTICAL_SPACING # 总高度 + 边距
@@ -241,10 +259,11 @@ class Hierarchy:
return {}
label = node_name
children = []
+ precision_index = None,
if node_name == self.root_name: # 根节点,根据micro_step获取子节点
- target_node_children = node_info.get('subnodes', [])
- for subnode_name in target_node_children:
- child_node = self.graph.get('node', {}).get(subnode_name, {})
+ # DB:查询以根节点为父节点的所有子节点
+ sub_nodes = self.repo.query_sub_nodes(node_name, self.graph_type, self.rank, self.step)
+ for subnode_name, child_node in sub_nodes.items():
child_micro_step_id = child_node.get('micro_step_id', -1) # 如果子节点不包含micro_step_id,则默认为-1,直接添加
is_append_all_node = int(self.micro_step_id) == -1 or child_micro_step_id == -1
is_append_split_node = int(self.micro_step_id) != -1 and int(child_micro_step_id) == int(
@@ -253,12 +272,18 @@ class Hierarchy:
children.append(subnode_name)
else:
children = node_info.get('subnodes', [])
+ # precisionIndex 先从缓存中获取(更新误差可能会更新缓存),如果不存在,则从ode_info中获取
+ update_precision_cache = GraphState.get_global_value('update_precision_cache', {})
+ if update_precision_cache.get(node_name) is not None:
+ precision_index = update_precision_cache.get(node_name, {}).get('precision_index')
+ else:
+ precision_index = node_info.get('data', {}).get('precision_index', "NaN")
if node_info.get('upnode', '') != self.root_name: # 首层节点不处理显示内容
- label = Hierarchy.extract_label_name(node_name, node_info.get('node_type'))
+ label = self.extract_label_name(node_name, node_info.get('node_type'))
render_info = {
'x': 0,
'y': 0,
- 'width': Hierarchy.measure_text_width(node_name) + HORIZONTAL_SPACING * 2,
+ 'width': self.measure_text_width(node_name) + HORIZONTAL_SPACING * 2,
'height': INNER_HIGHT,
'expand': False,
'isRoot': node_name == self.root_name,
@@ -268,11 +293,10 @@ class Hierarchy:
'children': children,
'nodeType': node_info.get('node_type') if node_info.get("subnodes") else UNEXPAND_NODE,
'matchedNodeLink': node_info.get('matched_node_link', []),
- 'precisionIndex': node_info.get('data', {}).get('precision_index', "NaN"), # 精度
+ 'precisionIndex': precision_index, # 精度
'overflowLevel': node_info.get('data', {}).get('overflow_level', "NaN"), # 溢出
'matchedDistributed': node_info.get('matched_distributed', {}),
}
-
return render_info
# 获取连通图
@@ -289,12 +313,24 @@ class Hierarchy:
self.get_connected_graph(child_name, result, new_hierarchy)
def update_hierarchy_data(self):
+ if(self.repo.repo_type == DB_TYPE):
+ return self.current_hierarchy
+ # 处理JSON
for node_name, node_info in self.current_hierarchy.items():
- graph_node_info = self.graph.get('node', {}).get(node_name, {})
+ graph_node_info = self.repo.query_node_info(node_name, self.graph_type)
node_info['matchedNodeLink'] = graph_node_info.get('matched_node_link', [])
node_info['precisionIndex'] = graph_node_info.get('data', {}).get('precision_index', "NaN")
return self.current_hierarchy
+ def update_current_hierarchy_data(self, data):
+ for node_info in data:
+ node_name = node_info.get('node_name')
+ if node_name in self.current_hierarchy:
+ current_node_info = self.current_hierarchy[node_name]
+ current_node_info['matchedNodeLink'] = node_info.get('matched_node_link')
+ current_node_info['precisionIndex'] = node_info.get('precision_index')
+ return True
+
def get_hierarchy(self):
result = {}
new_hierarchy = {}
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/layout_hierarchy_controller.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/layout_hierarchy_model.py
similarity index 35%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/layout_hierarchy_controller.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/layout_hierarchy_model.py
index a88f09474b7526fe72b037f7a57b2dbb663ffb93..cd8dc97be7242803055ff8e316f776e21ad12c1d 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/layout_hierarchy_controller.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/layout_hierarchy_model.py
@@ -14,34 +14,54 @@
# limitations under the License.
# ==============================================================================
from .hierarchy import Hierarchy
+from ..utils.global_state import GraphState, NPU, BENCH, SINGLE
-class LayoutHierarchyController:
+class LayoutHierarchyModel:
npu_hierarchy = None
bench_hierarchy = None
single_hierarchy = None
-
+
hierarchy = {
- 'NPU': npu_hierarchy,
- 'Bench': bench_hierarchy,
- 'Single': single_hierarchy
- }
+ NPU: npu_hierarchy,
+ BENCH: bench_hierarchy,
+ SINGLE: single_hierarchy
+ }
@staticmethod
- def change_expand_state(node_name, graph_type, graph, micro_step):
+ def change_expand_state(node_name, graph_type, repo, micro_step, rank_step):
if node_name == 'root':
- LayoutHierarchyController.hierarchy[graph_type] = Hierarchy(graph_type, graph, micro_step)
- elif LayoutHierarchyController.hierarchy.get(graph_type, None):
- LayoutHierarchyController.hierarchy[graph_type].update_graph_data(node_name, graph)
- LayoutHierarchyController.hierarchy[graph_type].update_graph_shape()
- LayoutHierarchyController.hierarchy[graph_type].update_graph_position()
+ rank = rank_step.get('rank')
+ step = rank_step.get('step')
+ GraphState.set_global_value("update_precision_cache", {}) # 切换图清缓存
+ LayoutHierarchyModel.hierarchy[graph_type] = Hierarchy(graph_type, repo, micro_step, rank, step)
+
+ elif LayoutHierarchyModel.hierarchy.get(graph_type, None):
+ LayoutHierarchyModel.hierarchy[graph_type].update_graph_data(node_name)
+ LayoutHierarchyModel.hierarchy[graph_type].update_graph_shape()
+ LayoutHierarchyModel.hierarchy[graph_type].update_graph_position()
else:
return {}
- return LayoutHierarchyController.hierarchy[graph_type].get_hierarchy()
+ return LayoutHierarchyModel.hierarchy[graph_type].get_hierarchy()
@staticmethod
def update_hierarchy_data(graph_type):
- if LayoutHierarchyController.hierarchy.get(graph_type, None):
- return LayoutHierarchyController.hierarchy[graph_type].update_hierarchy_data()
+ if LayoutHierarchyModel.hierarchy.get(graph_type, None):
+ return LayoutHierarchyModel.hierarchy[graph_type].update_hierarchy_data()
else:
return {}
+
+ @staticmethod
+ def update_current_hierarchy_data(data):
+ npu_update_data = []
+ bench_update_data = []
+ for node in data:
+ if node['graph_type'] == NPU:
+ npu_update_data.append(node)
+ elif node['graph_type'] == BENCH:
+ bench_update_data.append(node)
+ if LayoutHierarchyModel.hierarchy.get(NPU, None) and npu_update_data:
+ LayoutHierarchyModel.hierarchy[NPU].update_current_hierarchy_data(npu_update_data)
+ if LayoutHierarchyModel.hierarchy.get(BENCH, None) and bench_update_data:
+ LayoutHierarchyModel.hierarchy[BENCH].update_current_hierarchy_data(bench_update_data)
+
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/match_nodes_controller.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/match_nodes_model.py
similarity index 72%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/match_nodes_controller.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/match_nodes_model.py
index 588dcb16e0d8f12c8bc6e4ff77ffedec433615fe..51a92bccaaccf02db3be6a5584a15c46f7e15240 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/controllers/match_nodes_controller.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/model/match_nodes_model.py
@@ -15,7 +15,7 @@
# ==============================================================================
from ..utils.graph_utils import GraphUtils
-from ..utils.global_state import ADD_MATCH_KEYS, MODULE
+from ..utils.global_state import ADD_MATCH_KEYS, MODULE, NPU, BENCH
from ..utils.global_state import GraphState
@@ -25,110 +25,77 @@ class MatchNodesController:
def is_same_node_type(graph_data, npu_node_name, bench_node_name):
npu_node_type = graph_data.get('NPU', {}).get('node', {}).get(npu_node_name, {}).get('node_type')
bench_node_type = graph_data.get('Bench', {}).get('node', {}).get(bench_node_name, {}).get('node_type')
-
if npu_node_type is None or bench_node_type is None or npu_node_type != bench_node_type:
return False
return True
@staticmethod
- def get_opposite_node_name(node_name):
- opposite_node_name = ''
- # 如果npu_node_name包含forward,则opposite_npu_node_name为npu_node_name替换forward为backward
- if 'forward' in node_name:
- opposite_node_name = node_name.replace('forward', 'backward')
- else:
- opposite_node_name = node_name.replace('backward', 'forward')
- return opposite_node_name
-
- @staticmethod
- def process_task_add(graph_data, npu_node_name, bench_node_name, task):
+ def process_task_add(graph_data, npu_node_name, bench_node_name, task):
if not MatchNodesController.is_same_node_type(graph_data, npu_node_name, bench_node_name):
- return {
+ return [{
'success': False,
'error': '节点类型不一致,无法添加匹配关系'
- }
-
- result = {}
- opposite_result = {}
- opposite_npu_node_name = MatchNodesController.get_opposite_node_name(npu_node_name)
- opposite_bench_node_name = MatchNodesController.get_opposite_node_name(bench_node_name)
+ }]
+ opposite_npu_node_name = GraphUtils.get_opposite_node_name(npu_node_name)
+ opposite_bench_node_name = GraphUtils.get_opposite_node_name(bench_node_name)
if task == 'md5':
- result = MatchNodesController.process_md5_task_add(graph_data, npu_node_name, bench_node_name)
+ match_result = MatchNodesController.process_md5_task_add(graph_data, npu_node_name, bench_node_name)
opposite_result = MatchNodesController.process_md5_task_add(graph_data, opposite_npu_node_name,
opposite_bench_node_name)
elif task == 'summary':
- result = MatchNodesController.process_summary_task_add(graph_data, npu_node_name, bench_node_name)
+ match_result = MatchNodesController.process_summary_task_add(graph_data, npu_node_name, bench_node_name)
opposite_result = MatchNodesController.process_summary_task_add(graph_data, opposite_npu_node_name,
opposite_bench_node_name)
else:
- result = {
- 'success': False,
- 'error': 'task类型错误'
- }
- result['success'] = result.get('success') or opposite_result.get('success')
- if not result.get('success'):
- result['error'] = f'当前节点:{result.get("error","")}。对侧节点:{opposite_result.get("error")}'
- return result
+ return [{'success': False, 'error': 'task类型错误'},
+ {'success': False, 'error': 'task类型错误'}]
+ return [match_result, opposite_result]
@staticmethod
def process_task_delete(graph_data, npu_node_name, bench_node_name, task):
- result = {}
- opposite_result = {}
- opposite_npu_node_name = MatchNodesController.get_opposite_node_name(npu_node_name)
- opposite_bench_node_name = MatchNodesController.get_opposite_node_name(bench_node_name)
+ opposite_npu_node_name = GraphUtils.get_opposite_node_name(npu_node_name)
+ opposite_bench_node_name = GraphUtils.get_opposite_node_name(bench_node_name)
if task == 'md5':
- result = MatchNodesController.process_md5_task_delete(graph_data, npu_node_name, bench_node_name)
+ match_result = MatchNodesController.process_md5_task_delete(graph_data, npu_node_name, bench_node_name)
opposite_result = MatchNodesController.process_md5_task_delete(graph_data, opposite_npu_node_name,
opposite_bench_node_name)
elif task == 'summary':
- result = MatchNodesController.process_summary_task_delete(graph_data, npu_node_name, bench_node_name)
+ match_result = MatchNodesController.process_summary_task_delete(graph_data, npu_node_name, bench_node_name)
opposite_result = MatchNodesController.process_summary_task_delete(graph_data, opposite_npu_node_name,
opposite_bench_node_name)
else:
- result = {
- 'success': False,
- 'error': 'task类型错误'
- }
- result['success'] = result.get('success') or opposite_result.get('success')
- if not result.get('success'):
- result['error'] = f'当前节点:{result.get("error","")}。对侧节点:{opposite_result.get("error")}'
- return result
+ return [{'success': False, 'error': 'task类型错误'},
+ {'success': False, 'error': 'task类型错误'}]
+ return [match_result, opposite_result]
@staticmethod
def process_task_add_child_layer_by_config(graph_data, match_node_links, task):
# 根据配置文件中的匹配关系,批量调用 process_task_add
- result = {}
- match_reslut = []
+ match_results = []
for npu_node_name, bench_node_name in match_node_links.items():
- res = MatchNodesController.process_task_add(graph_data, npu_node_name, bench_node_name, task)
- match_reslut.append(res.get('success'))
-
- config_data = GraphState.get_global_value("config_data")
-
- result['success'] = True
- result['data'] = {
- 'matchReslut': match_reslut,
- 'npuMatchNodes': config_data.get('npuMatchNodes', {}),
- 'benchMatchNodes': config_data.get('benchMatchNodes', {}),
- 'npuUnMatchNodes': config_data.get('npuUnMatchNodes', []),
- 'benchUnMatchNodes': config_data.get('benchUnMatchNodes', [])
- }
- return result
+ if task == 'md5':
+ match_result = MatchNodesController.process_md5_task_add(graph_data, npu_node_name, bench_node_name)
+ elif task == 'summary':
+ match_result = MatchNodesController.process_summary_task_add(graph_data, npu_node_name, bench_node_name)
+ match_results.append(match_result)
+ return match_results
@staticmethod
def process_task_add_child_layer(graph_data, npu_node_name, bench_node_name, task):
if not all([graph_data, npu_node_name, bench_node_name, task]):
- return {'success': False, 'error': '参数错误'}
+ return [{'success': False, 'error': '参数错误'}]
if not MatchNodesController.is_same_node_type(graph_data, npu_node_name, bench_node_name):
- return {
+ return [{
'success': False,
'error': '节点类型不一致,无法添加匹配关系'
- }
+ }]
+ # DB:找到目标节点所有子节点和标杆侧所有子节点
npu_nodes = graph_data.get('NPU', {}).get('node', {})
bench_nodes = graph_data.get('Bench', {}).get('node', {})
+ result = []
# 1. 选中的目标节点和标杆侧节点添加匹配关系
- result = MatchNodesController.process_task_add(graph_data, npu_node_name, bench_node_name, task)
+ match_result = MatchNodesController.process_task_add(graph_data, npu_node_name, bench_node_name, task)
# 2. 目标节点的子节点和标杆侧的子节点添加匹配关系
@@ -164,13 +131,15 @@ class MatchNodesController:
bench_subnode_list = bench_match_names.get(key, [])
# 多个节点可能有一个module name
for npu_subnode_name, bench_subnode_name in zip(npu_subnode_list, bench_subnode_list):
- result = MatchNodesController.process_task_add(graph_data, npu_subnode_name, bench_subnode_name,
- task)
+ match_result = MatchNodesController.process_task_add(graph_data, npu_subnode_name,
+ bench_subnode_name, task)
npu_subnodes = npu_nodes.get(npu_subnode_name, {}).get('subnodes', [])
bench_subnodes = bench_nodes.get(bench_subnode_name, {}).get('subnodes', [])
# 2.4 如果有子节点,递归调用2.1-2.4
- if result.get('success') and npu_subnodes and bench_subnodes:
- process_child_layer(npu_subnodes, bench_subnodes)
+ if len(match_result) > 0 and match_result[0].get('success'):
+ result.extend(match_result)
+ if npu_subnodes and bench_subnodes:
+ process_child_layer(npu_subnodes, bench_subnodes)
def extract_module_name(subnode_name):
splited_subnode_name = subnode_name.split('.')
@@ -189,27 +158,22 @@ class MatchNodesController:
npu_subnodes = npu_nodes.get(npu_node_name, {}).get('subnodes', [])
bench_subnodes = bench_nodes.get(bench_node_name, {}).get('subnodes', [])
- if result.get('success') and npu_subnodes and bench_subnodes:
- process_child_layer(npu_subnodes, bench_subnodes)
- if result.get('success'):
- config_data = GraphState.get_global_value("config_data")
- result['data'] = {
- 'npuMatchNodes': config_data.get('npuMatchNodes', {}),
- 'benchMatchNodes': config_data.get('benchMatchNodes', {}),
- 'npuUnMatchNodes': config_data.get('npuUnMatchNodes', []),
- 'benchUnMatchNodes': config_data.get('benchUnMatchNodes', [])
- }
+ # 如果当前节点,也就是第一个节点匹配成功,就尝试匹配子节点
+ if len(match_result) > 0 and match_result[0].get('success'):
+ result.extend(match_result)
+ if npu_subnodes and bench_subnodes:
+ process_child_layer(npu_subnodes, bench_subnodes)
return result
@staticmethod
def process_task_delete_child_layer(graph_data, npu_node_name, bench_node_name, task):
if not all([graph_data, npu_node_name, bench_node_name, task]):
- return {'success': False, 'error': '参数错误'}
-
+ return [{'success': False, 'error': '参数错误'}]
+ result = []
npu_nodes = graph_data.get('NPU', {}).get('node', {})
bench_nodes = graph_data.get('Bench', {}).get('node', {})
# 1. 选中的目标节点和标杆侧节点添加匹配关系
- result = MatchNodesController.process_task_delete(graph_data, npu_node_name, bench_node_name, task)
+ match_result = MatchNodesController.process_task_delete(graph_data, npu_node_name, bench_node_name, task)
# 2. 目标节点的子节点和标杆侧的子节点添加匹配关系
def process_child_layer(npu_child_nodes):
@@ -219,28 +183,23 @@ class MatchNodesController:
if not matched_node_link:
continue
bench_subnode_name = matched_node_link[-1]
- result = MatchNodesController.process_task_delete(graph_data, npu_subnode_name, bench_subnode_name,
- task)
+ match_result = MatchNodesController.process_task_delete(graph_data, npu_subnode_name,
+ bench_subnode_name, task)
npu_subnodes = npu_nodes.get(npu_subnode_name, {}).get('subnodes', [])
bench_subnodes = bench_nodes.get(bench_subnode_name, {}).get('subnodes', [])
# 2.4 如果有子节点,递归调用2.1-2.4
- if result.get('success') and npu_subnodes and bench_subnodes:
- process_child_layer(npu_subnodes)
+ if len(match_result) > 0 and match_result[0].get('success'):
+ result.extend(match_result)
+ if npu_subnodes and bench_subnodes:
+ process_child_layer(npu_subnodes)
npu_subnodes = npu_nodes.get(npu_node_name, {}).get('subnodes', [])
bench_subnodes = bench_nodes.get(bench_node_name, {}).get('subnodes', [])
-
- if result.get('success') and npu_subnodes and bench_subnodes:
- process_child_layer(npu_subnodes)
- if result.get('success'):
- config_data = GraphState.get_global_value("config_data")
-
- result['data'] = {
- 'npuMatchNodes': config_data.get('npuMatchNodes', {}),
- 'benchMatchNodes': config_data.get('benchMatchNodes', {}),
- 'npuUnMatchNodes': config_data.get('npuUnMatchNodes', []),
- 'benchUnMatchNodes': config_data.get('benchUnMatchNodes', [])
- }
+ # 如果当前节点,也就是第一个节点匹配成功,就尝试匹配子节点
+ if len(match_result) > 0 and match_result[0].get('success'):
+ result.extend(match_result)
+ if npu_subnodes and bench_subnodes:
+ process_child_layer(npu_subnodes)
return result
@staticmethod
@@ -248,7 +207,7 @@ class MatchNodesController:
npu_node_data = graph_data.get('NPU', {}).get('node', {}).get(npu_node_name)
bench_node_data = graph_data.get('Bench', {}).get('node', {}).get(bench_node_name)
if not npu_node_data or not bench_node_data:
- return {'success': False, 'error': '节点不存在'}
+ return {'success': False, 'error': f'{npu_node_name, bench_node_name}节点不存在'}
# 去除节点名称前缀
npu_input_data = GraphUtils.remove_prefix(npu_node_data.get('input_data', {}), npu_node_name + '.')
bench_input_data = GraphUtils.remove_prefix(bench_node_data.get('input_data', {}), bench_node_name + '.')
@@ -259,15 +218,33 @@ class MatchNodesController:
precision_output_error = MatchNodesController.calculate_md5_diff(npu_output_data, bench_output_data)
precision_error = precision_input_error and precision_output_error
# 在原始数据上,添加匹配节点,和匹配节点信息
-
+ # JSON:处理JSON更新
npu_graph_data = graph_data.get('NPU', {})
bench_graph_data = graph_data.get('Bench', {})
npu_node_data['matched_node_link'] = GraphUtils.get_parent_node_list(bench_graph_data, bench_node_name)
bench_node_data['matched_node_link'] = GraphUtils.get_parent_node_list(npu_graph_data, npu_node_name)
npu_node_data.setdefault('data', {})['precision_index'] = precision_error
-
+ # DB: data只有DB会用到
+ data = [
+ {
+ "node_name": npu_node_name,
+ "matched_node_link": [bench_node_name],
+ "precision_index": precision_error,
+ "input_data": npu_node_data.get('input_data'),
+ "output_data": npu_node_data.get('output_data'),
+ "graph_type": NPU
+ },
+ {
+ "node_name": bench_node_name,
+ "matched_node_link": [npu_node_name],
+ "precision_index": None,
+ "input_data": bench_node_data.get('input_data'),
+ "output_data": bench_node_data.get('output_data'),
+ "graph_type": BENCH,
+ }
+ ]
MatchNodesController.add_config_match_nodes(npu_node_name, bench_node_name)
- return {'success': True}
+ return {'success': True, "data": data}
@staticmethod
def process_summary_task_add(graph_data, npu_node_name, bench_node_name):
@@ -287,15 +264,15 @@ class MatchNodesController:
if not intput_statistical_diff or not output_statistical_diff:
return {
'success': False,
- 'error': '输入或输出统计误差值为空(Input and output statistical error calculation failed)',
+ 'error': f'{npu_node_name, bench_node_name}输入或输出统计误差值为空',
}
-
if precision_error == -1:
return {
'success': False,
- 'error': '输出统计误差值为空,计算精度误差失败(Calculation of precision error failed)',
+ 'error': f'{npu_node_name, bench_node_name}输出统计误差值为空,计算精度误差失败',
}
# 在原始数据上,添加匹配节点,和匹配节点信息
+ # JSON:处理JSON更新
npu_graph_data = graph_data.get('NPU', {})
bench_graph_data = graph_data.get('Bench', {})
npu_node_data['matched_node_link'] = GraphUtils.get_parent_node_list(bench_graph_data, bench_node_name)
@@ -303,8 +280,27 @@ class MatchNodesController:
npu_node_data.setdefault('data', {})['precision_index'] = precision_error
MatchNodesController.update_graph_node_data(npu_node_data.get('input_data'), intput_statistical_diff)
MatchNodesController.update_graph_node_data(npu_node_data.get('output_data'), output_statistical_diff)
+ # DB: data只有DB会用到
+ data = [
+ {
+ "node_name": npu_node_name,
+ "matched_node_link": [bench_node_name],
+ "precision_index": precision_error,
+ "input_data": npu_node_data.get('input_data'),
+ "output_data": npu_node_data.get('output_data'),
+ "graph_type": NPU
+ },
+ {
+ "node_name": bench_node_name,
+ "matched_node_link": [npu_node_name],
+ "precision_index": None,
+ "input_data": bench_node_data.get('input_data'),
+ "output_data": bench_node_data.get('output_data'),
+ "graph_type": BENCH,
+ }
+ ]
MatchNodesController.add_config_match_nodes(npu_node_name, bench_node_name)
- return {'success': True}
+ return {'success': True, "data": data}
@staticmethod
def process_md5_task_delete(graph_data, npu_node_name, bench_node_name):
@@ -315,28 +311,48 @@ class MatchNodesController:
bench_node_name) != npu_node_name:
return {
'success': False,
- 'error': "操作失败:节点未匹配,请先匹配节点",
+ 'error': f"操作失败:{npu_node_name}或{bench_node_name}节点未匹配,请先匹配节点",
}
npu_node_data = graph_data.get('NPU', {}).get('node', {}).get(npu_node_name)
bench_node_data = graph_data.get('Bench', {}).get('node', {}).get(bench_node_name)
if not npu_node_data or not bench_node_data:
return {
'success': False,
- 'error': "操作失败:节点不存在",
+ 'error': "f操作失败:{npu_node_name}或{bench_node_name}节点不存在",
}
# 在原始数据上,删除匹配节点,和匹配节点信息
+ # JSON:处理JSON更新
npu_node_data['matched_node_link'] = []
bench_node_data['matched_node_link'] = []
- # 后端维护一个匹配节点列表,前端展示
del npu_node_data['data']['precision_index']
+ # DB: data只有DB会用到
+ data = [
+ {
+ "node_name": npu_node_name,
+ "matched_node_link": [],
+ "precision_index": None,
+ "input_data": npu_node_data.get('input_data'),
+ "output_data": npu_node_data.get('output_data'),
+ "graph_type": NPU
+ },
+ {
+ "node_name": bench_node_name,
+ "matched_node_link": [],
+ "precision_index": None,
+ "input_data": bench_node_data.get('input_data'),
+ "output_data": bench_node_data.get('output_data'),
+ "graph_type": BENCH,
+ }
+ ]
MatchNodesController.delete_config_match_nodes(npu_node_name, bench_node_name)
return {
'success': True,
- 'data': {},
+ 'data': data,
}
@staticmethod
def process_summary_task_delete(graph_data, npu_node_name, bench_node_name):
+
config_data = GraphState.get_global_value("config_data")
npu_match_nodes_list = config_data.get('npuMatchNodes', {})
bench_match_nodes_list = config_data.get('benchMatchNodes', {})
@@ -344,27 +360,44 @@ class MatchNodesController:
bench_node_name) != npu_node_name:
return {
'success': False,
- 'error': "操作失败:节点未匹配,请先匹配节点",
+ 'error': f"操作失败:{npu_node_name,bench_node_name}节点未匹配,请先匹配节点",
}
npu_node_data = graph_data.get('NPU', {}).get('node', {}).get(npu_node_name)
bench_node_data = graph_data.get('Bench', {}).get('node', {}).get(bench_node_name)
if not npu_node_data or not bench_node_data:
return {
'success': False,
- 'error': "操作失败:节点不存在",
+ 'error': f"操作失败:{npu_node_name}或{bench_node_name}节点不存在",
}
- # 在原始数据上,删除匹配节点,和匹配节点信息
+ # DB:更新数据的节点信息,直接返回新的结果,此处暂时不做更新
+ # JSON:处理JSON更新
npu_node_data['matched_node_link'] = []
bench_node_data['matched_node_link'] = []
- MatchNodesController.delete_matched_node_data(npu_node_data.get('input_data'))
- MatchNodesController.delete_matched_node_data(npu_node_data.get('output_data'))
+ input_data = MatchNodesController.delete_matched_node_data(npu_node_data.get('input_data'))
+ output_data = MatchNodesController.delete_matched_node_data(npu_node_data.get('output_data'))
# 防止 KeyError 或 TypeError
npu_node_data.get('data', {}).pop('precision_index', None)
+ # DB: data只有DB会用到
+ data = [
+ {
+ "node_name": npu_node_name,
+ "matched_node_link": [],
+ "precision_index": None,
+ "input_data": input_data,
+ "output_data": output_data,
+ "graph_type": NPU
+ },
+ {
+ "node_name": bench_node_name,
+ "matched_node_link": [],
+ "precision_index": None,
+ "input_data": bench_node_data.get('input_data'),
+ "output_data": bench_node_data.get('output_data'),
+ "graph_type": BENCH,
+ }
+ ]
MatchNodesController.delete_config_match_nodes(npu_node_name, bench_node_name)
- return {
- 'success': True,
- 'data': {},
- }
+ return {'success': True, 'data': data}
@staticmethod
def add_config_match_nodes(npu_node_name, bench_node_name):
@@ -489,7 +522,6 @@ class MatchNodesController:
# 格式化相对误差字段
for field in ['MaxRelativeErr', 'MinRelativeErr', 'NormRelativeErr', 'MeanRelativeErr']:
diff_values[field] = GraphUtils.format_relative_err(diff_values.get(field, float('nan')))
-
# 转换 absErr 为 NaN 字符串
for field in ['MaxAbsErr', 'MinAbsErr', 'MeanAbsErr', 'NormAbsErr']:
diff_values[field] = GraphUtils.nan_to_str(diff_values.get(field, float('nan')))
@@ -511,3 +543,4 @@ class MatchNodesController:
for sub_key, value in fild_obj.items()
if sub_key not in keys_to_remove
}
+ return graph_npu_node_data
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/__init__.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/__init__.py
index ee251c4084d7ed8afdd796c08983d4893df4a530..8157f715f5d559191757d67386f884050288f9bd 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/__init__.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/__init__.py
@@ -13,6 +13,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
-from .base_graph_service import GraphServiceStrategy
-from .file_check_wrapper import check_file_type
+from .graph_service_base import GraphServiceStrategy
from .graph_service_factory import ServiceFactory
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/base_graph_service.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_base.py
similarity index 91%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/base_graph_service.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_base.py
index bf927f67a21a112166a26c8cb0cf36c1625529fb..32fa67389bcf4da0b2e29bd3ef2f63cbfa15f39c 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/base_graph_service.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_base.py
@@ -17,10 +17,8 @@ import os
from abc import ABC, abstractmethod
from tensorboard.util import tb_logging
-
from ..utils.graph_utils import GraphUtils
from ..utils.global_state import GraphState, NPU, BENCH, Extension, DataType
-from ..controllers.layout_hierarchy_controller import LayoutHierarchyController
logger = tb_logging.get_logger()
DB_EXT = Extension.DB.value
@@ -94,14 +92,6 @@ class GraphServiceStrategy(ABC):
}
return result
- @staticmethod
- def update_hierarchy_data(graph_type):
- if graph_type == NPU or graph_type == BENCH:
- hierarchy = LayoutHierarchyController.update_hierarchy_data(graph_type)
- return {'success': True, 'data': hierarchy}
- else:
- return {'success': False, 'error': '节点类型错误'}
-
@abstractmethod
def load_graph_data(self):
pass
@@ -117,6 +107,12 @@ class GraphServiceStrategy(ABC):
@abstractmethod
def change_node_expand_state(self, node_info, meta_data):
pass
+
+ def search_node_by_precision(self, meta_data, values):
+ pass
+
+ def search_node_by_overflow(self, meta_data, values):
+ pass
@abstractmethod
def get_node_info(self, node_info, meta_data):
@@ -133,10 +129,10 @@ class GraphServiceStrategy(ABC):
@abstractmethod
def delete_match_nodes(self, npu_node_name, bench_node_name, meta_data, is_unmatch_children):
pass
-
+
@abstractmethod
- def save_data(self, meta_data):
- pass
+ def update_precision_error(self, meta_data, filter_value):
+ pass
@abstractmethod
def update_colors(self, colors):
@@ -145,3 +141,4 @@ class GraphServiceStrategy(ABC):
@abstractmethod
def save_matched_relations(self):
pass
+
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_factory.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_factory.py
index 18ff06a64f502033a5789a97820e3f37a4ff94f4..bd9f017c3a116391df6a9d2420541a91d2173a0b 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_factory.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_factory.py
@@ -13,12 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
-from .db_graph_service import DbGraphService
-from .json_graph_service import JsonGraphService
+from .graph_service_db import DbGraphService
+from .graph_service_vis import JsonGraphService
from ..utils.global_state import GraphState, DataType
class ServiceFactory:
+
def __init__(self):
self.run = ''
self.tag = ''
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/json_graph_service.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_vis.py
similarity index 72%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/json_graph_service.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_vis.py
index 32dda659276f5697a04b22ba063e402c22afb9d7..9c3dfa33ae91aa92d8723bff98f80a0e672f6e24 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/json_graph_service.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/graph_service_vis.py
@@ -16,16 +16,16 @@
import os
import time
import json
-
from tensorboard.util import tb_logging
-from .db_graph_service import DbGraphService
-from ..utils.graph_utils import GraphUtils
+
+from .graph_service_base import GraphServiceStrategy
+from ..repositories.graph_repo_vis import GraphRepoVis
from ..utils.global_state import GraphState
-from ..controllers.match_nodes_controller import MatchNodesController
-from ..controllers.layout_hierarchy_controller import LayoutHierarchyController
+from ..utils.graph_utils import GraphUtils
+from ..model.layout_hierarchy_model import LayoutHierarchyModel
+from ..model.match_nodes_model import MatchNodesController
from ..utils.global_state import NPU_PREFIX, BENCH_PREFIX, NPU, BENCH, SINGLE
from ..utils.global_state import MAX_RELATIVE_ERR, MIN_RELATIVE_ERR, MEAN_RELATIVE_ERR, NORM_RELATIVE_ERR
-from .base_graph_service import GraphServiceStrategy
logger = tb_logging.get_logger()
@@ -34,6 +34,7 @@ class JsonGraphService(GraphServiceStrategy):
def __init__(self, run_path, tag):
super().__init__(run_path, tag)
+ self.repo = None
def load_graph_data(self):
runs = GraphState.get_global_value('runs')
@@ -74,6 +75,8 @@ class JsonGraphService(GraphServiceStrategy):
GraphState.set_global_value('current_file_data', json_data)
GraphState.set_global_value('current_tag', self.tag)
GraphState.set_global_value('current_run', run_path)
+ # 初始化GraphJson
+ self.repo = GraphRepoVis(json_data)
yield f"data: {json.dumps({'done': True, 'progress': 100, 'status': 'loading'})}\n\n"
else:
yield f"data: {json.dumps({'progress': current_progress, 'error': 'Failed to parse JSON'})}\n\n"
@@ -171,24 +174,25 @@ class JsonGraphService(GraphServiceStrategy):
return {'success': False, 'error': '获取节点列表失败:' + str(e)}
def change_node_expand_state(self, node_info, meta_data):
- graph_data, error_message = GraphUtils.get_graph_data(meta_data)
- if error_message or not graph_data:
- return {'success': False, 'error': error_message}
- graph_type = node_info.get('nodeType')
- node_name = node_info.get('nodeName')
- micro_step = meta_data.get('microStep')
+
try:
+ graph_data, error_message = GraphUtils.get_graph_data(meta_data)
+ if error_message:
+ return {'success': False, 'error': error_message}
+ if self.repo is None:
+ return {'success': False, 'error': 'initlize graph json failed'}
+ graph_type = node_info.get('nodeType')
+ node_name = node_info.get('nodeName')
+ micro_step = meta_data.get('microStep')
# 单图
if not graph_data.get(NPU):
- hierarchy = LayoutHierarchyController.change_expand_state(node_name, SINGLE, graph_data, micro_step)
+ hierarchy = LayoutHierarchyModel.change_expand_state(node_name, SINGLE, self.repo, micro_step, {})
# NPU
elif graph_type == NPU:
- hierarchy = LayoutHierarchyController.change_expand_state(node_name, graph_type,
- graph_data.get(NPU, {}), micro_step)
+ hierarchy = LayoutHierarchyModel.change_expand_state(node_name, NPU, self.repo, micro_step, {})
# 标杆
elif graph_type == BENCH:
- hierarchy = LayoutHierarchyController.change_expand_state(node_name, graph_type,
- graph_data.get(BENCH, {}), micro_step)
+ hierarchy = LayoutHierarchyModel.change_expand_state(node_name, BENCH, self.repo, micro_step, {})
else:
return {'success': True, 'data': {}}
return {'success': True, 'data': hierarchy}
@@ -197,7 +201,64 @@ class JsonGraphService(GraphServiceStrategy):
node_type_name = ""
if graph_data.get(NPU):
node_type_name = '调试侧' if graph_type == NPU else '标杆侧'
- return {'success': False, 'error': f'{node_type_name}节点展开或收起发生错误', 'data': None}
+ return {'success': False, 'error': f'{node_type_name}节点展开或收起发生错误', 'data': None}
+
+ def search_node_by_precision(self, meta_data, values):
+ # 遍历所有的NPU节点,如果节点的精度值在values中,则返回该节点
+ graph_data, error_message = GraphUtils.get_graph_data(meta_data)
+ if error_message:
+ return {'success': False, 'error': error_message}
+
+ precision = []
+ is_filter_unmatch_nodes = True if '无匹配节点' in values else False
+ try:
+ if is_filter_unmatch_nodes:
+ values.remove('无匹配节点')
+ # 单图
+ if not graph_data.get(NPU):
+ node_list = GraphUtils.split_graph_data_by_microstep(graph_data.get('node', {}),
+ meta_data.get("microStep", -1))
+ # 多图
+ else:
+ node_list = GraphUtils.split_graph_data_by_microstep(graph_data.get(NPU),
+ meta_data.get("microStep", -1))
+ for node_name, node in node_list.items():
+ subnodes = node.get("subnodes", None)
+ if subnodes != [] and subnodes is not None:
+ continue
+ matched_node_link = node.get('matched_node_link', None)
+ if is_filter_unmatch_nodes and (matched_node_link is None or matched_node_link == []):
+ precision.append(node_name)
+ if any(low <= node.get('data', {}).get("precision_index", -1) < high for low, high in values):
+ precision.append(node_name)
+ return {'success': True, 'data': precision}
+ except Exception as e:
+ logger.error('search precision node failed:' + str(e))
+ return {'success': False, 'error': '获取符合精度误差节点失败:' + str(e)}
+
+ def search_node_by_overflow(self, meta_data, values):
+ # 遍历所有的NPU节点,如果节点的精度值在values中,则返回该节点
+ graph_data, error_message = GraphUtils.get_graph_data(meta_data)
+ if error_message:
+ return {'success': False, 'error': error_message}
+ overflow = []
+ try:
+ # 单图
+ if not graph_data.get(NPU):
+ node_list = GraphUtils.split_graph_data_by_microstep(graph_data.get('node', {}),
+ meta_data.get("microStep", -1))
+ for node_name, node in node_list.items():
+ subnodes = node.get("subnodes", None)
+ if subnodes != [] and subnodes is not None:
+ continue
+ if node.get('data', {}).get("overflow_level", -1) in values:
+ overflow.append(node_name)
+ return {'success': True, 'data': overflow}
+ else:
+ return {'success': False, 'error': '多图模式下不支持溢出检测'}
+ except Exception as e:
+ logger.error('search overflow node failed:' + str(e))
+ return {'success': False, 'error': '获取符合溢出检测节点失败:' + str(e)}
def update_precision_error(self, meta_data, filter_value):
try:
@@ -239,7 +300,7 @@ class JsonGraphService(GraphServiceStrategy):
def update_hierarchy_data(self, graph_type):
if (graph_type == NPU or graph_type == BENCH):
- hierarchy = LayoutHierarchyController.update_hierarchy_data(graph_type)
+ hierarchy = LayoutHierarchyModel.update_hierarchy_data(graph_type)
return {'success': True, 'data': hierarchy}
else:
return {'success': False, 'error': '节点类型错误'}
@@ -281,66 +342,53 @@ class JsonGraphService(GraphServiceStrategy):
# 根据任务类型计算误差
if task == 'md5' or task == 'summary':
if is_match_children:
- result = MatchNodesController.process_task_add_child_layer(graph_data,
- npu_node_name, bench_node_name, task)
- return result
+ match_result = MatchNodesController.process_task_add_child_layer(graph_data,
+ npu_node_name, bench_node_name, task)
else:
- result = MatchNodesController.process_task_add(graph_data, npu_node_name, bench_node_name, task)
- if result.get('success'):
- config_data = GraphState.get_global_value("config_data")
- result['data'] = {
- 'npuMatchNodes': config_data.get('npuMatchNodes', {}),
- 'benchMatchNodes': config_data.get('benchMatchNodes', {}),
- 'npuUnMatchNodes': config_data.get('npuUnMatchNodes', []),
- 'benchUnMatchNodes': config_data.get('benchUnMatchNodes', [])
- }
- return result
+ match_result = MatchNodesController.process_task_add(graph_data,
+ npu_node_name, bench_node_name, task)
+ return self._generate_matched_result(match_result)
else:
return {'success': False, 'error': '任务类型不支持(Task type not supported) '}
except Exception as e:
return {'success': False, '操作失败': str(e), 'data': None}
- def add_match_nodes_by_config(self, config_file, meta_data):
+ def add_match_nodes_by_config(self, config_file_name, meta_data):
graph_data, error_message = GraphUtils.get_graph_data(meta_data)
if error_message:
return {'success': False, 'error': '读取文件失败'}
- match_node_links, error = GraphUtils.safe_load_data(meta_data.get('run'), config_file)
+ match_node_links, error = GraphUtils.safe_load_data(meta_data.get('run'), config_file_name)
if error:
- return {'success': False, 'error': '配置文件失败'}
+ return {'success': False, 'error': '读取配置文件失败'}
task = graph_data.get('task')
try:
# 根据任务类型计算误差
if task == 'md5' or task == 'summary':
- result = MatchNodesController.process_task_add_child_layer_by_config(graph_data, match_node_links, task)
- return result
+ match_result = MatchNodesController.process_task_add_child_layer_by_config(graph_data,
+ match_node_links, task)
+ return self._generate_matched_result(match_result)
else:
- return {'success': False, 'error': '任务类型不支持(Task type not supported) '}
+ return {'success': False, 'error': '任务类型不支持(Task type not supported)'}
except Exception as e:
- return {'success': False, 'error': '操作失败', 'data': None}
+ logger.error(str(e))
+ return {'success': False, 'error': str(e), 'data': None}
def delete_match_nodes(self, npu_node_name, bench_node_name, meta_data, is_unmatch_children):
graph_data, error_message = GraphUtils.get_graph_data(meta_data)
if error_message:
return {'success': False, 'error': error_message}
task = graph_data.get('task')
- result = {}
try:
# 根据任务类型计算误差
if task == 'md5' or task == 'summary':
if is_unmatch_children:
- result = MatchNodesController.process_task_delete_child_layer(graph_data, npu_node_name,
- bench_node_name, task)
+ match_result = MatchNodesController.process_task_delete_child_layer(graph_data, npu_node_name,
+ bench_node_name, task)
else:
- result = MatchNodesController.process_task_delete(graph_data, npu_node_name, bench_node_name, task)
- if result.get('success'):
- config_data = GraphState.get_global_value("config_data")
- result['data'] = {
- 'npuMatchNodes': config_data.get('npuMatchNodes', {}),
- 'benchMatchNodes': config_data.get('benchMatchNodes', {}),
- 'npuUnMatchNodes': config_data.get('npuUnMatchNodes', []),
- 'benchUnMatchNodes': config_data.get('benchUnMatchNodes', [])
- }
- return result
+ match_result = MatchNodesController.process_task_delete(graph_data, npu_node_name,
+ bench_node_name, task)
+
+ return self._generate_matched_result(match_result)
else:
return {'success': False, 'error': '任务类型不支持(Task type not supported) '}
except Exception as e:
@@ -370,6 +418,7 @@ class JsonGraphService(GraphServiceStrategy):
first_run_tag = first_run_tags.get(self.run)
first_file_data, error = GraphUtils.safe_load_data(self.run, f"{first_run_tag}.vis")
if error:
+ logger.error(f"Error loading data: {error}")
return {'success': False, 'error': '获取配置信息失败,请检查目录中第一个文件'}
first_file_data['Colors'] = colors
config_data_run['colors'] = colors
@@ -380,9 +429,9 @@ class JsonGraphService(GraphServiceStrategy):
except Exception as e:
return {'success': False, 'error': str(e), 'data': None}
- def save_matched_relations(self):
- run = self.run
- tag = self.tag
+ def save_matched_relations(self, meta_data):
+ run = meta_data.get('run')
+ tag = meta_data.get('tag')
config_data = GraphState.get_global_value("config_data")
# 匹配列表和未匹配列表
npu_match_nodes_list = config_data.get('manualMatchNodes', {})
@@ -390,6 +439,29 @@ class JsonGraphService(GraphServiceStrategy):
_, error = GraphUtils.safe_save_data(npu_match_nodes_list, run, f"{tag}.vis.config")
if error:
return {'success': False, 'error': error}
+ else:
+ return {'success': True, 'data': f"{tag}.vis.config"}
except (ValueError, IOError, PermissionError) as e:
return {'success': False, 'error': f"Error: {e}"}
- return {'success': True, 'data': f"{tag}.vis.config"}
+
+ def _generate_matched_result(self, match_result):
+ update_data = []
+ for item in match_result:
+ if item.get('success') is True:
+ for node in item.get('data', []):
+ update_data.append(node)
+
+ if len(update_data) > 0:
+ config_data = GraphState.get_global_value("config_data")
+ result = {
+ 'success': True,
+ 'data': {
+ 'npuMatchNodes': config_data.get('npuMatchNodes', {}),
+ 'benchMatchNodes': config_data.get('benchMatchNodes', {}),
+ 'npuUnMatchNodes': config_data.get('npuUnMatchNodes', []),
+ 'benchUnMatchNodes': config_data.get('benchUnMatchNodes', [])
+ }
+ }
+ else:
+ result = {'success': False, 'error': '选择的节点不可匹配(Selected nodes do not match) '}
+ return result
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/file_check_wrapper.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/file_check_wrapper.py
similarity index 100%
rename from plugins/tensorboard-plugins/tb_graph_ascend/server/app/service/file_check_wrapper.py
rename to plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/file_check_wrapper.py
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/global_state.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/global_state.py
index 02db7d15b63fc863a0ad3b251376344b481e034b..b4cdcbe024a18168009be0e7cd7048d2d4c88254 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/global_state.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/global_state.py
@@ -17,7 +17,6 @@
# 全局常量
from enum import Enum
-
ADD_MATCH_KEYS = [
'MaxAbsErr',
'MinAbsErr',
@@ -28,7 +27,7 @@ ADD_MATCH_KEYS = [
'MeanRelativeErr',
'NormRelativeErr',
]
-MAX_FILE_SIZE = 3 * 1024 * 1024 * 1024 # 最大文件大小限制
+MAX_FILE_SIZE = 15 * 1024 * 1024 * 1024 # 最大文件大小限制
NPU_PREFIX = 'N___'
BENCH_PREFIX = 'B___'
FILE_NAME_REGEX = r'^[a-zA-Z0-9_\-\.]+$' # 文件名正则表达式
@@ -75,6 +74,8 @@ class GraphState:
},
'first_run_tags': {},
'runs': {},
+ 'update_precision_cache': {}, # {node_name{precision,...}},方便查询精度,提高性能
+ 'all_node_info_cache': {}, # {rank_step_micro_step:{node_name:node_info,...}},方便查询节点信息,提高性能
}
@staticmethod
@@ -100,6 +101,8 @@ class GraphState:
},
'first_run_tags': {},
'runs': {},
+ 'update_precision_cache': {},
+ 'all_node_info_cache': {},
}
@staticmethod
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/graph_utils.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/graph_utils.py
index 3e4e0436926333f253c1dacf930abeab5ac253d5..2d335948708023f9219565d201e069b36deb66d7 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/graph_utils.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/utils/graph_utils.py
@@ -56,6 +56,16 @@ class GraphUtils:
logger.error(f'Error: fail to get graph data by {meta_data}, error: {e}')
return None, 'Error: fail to get graph data'
+ @staticmethod
+ def get_opposite_node_name(node_name):
+ opposite_node_name = ''
+ # 如果npu_node_name包含forward,则opposite_npu_node_name为npu_node_name替换forward为backward
+ if 'forward' in node_name:
+ opposite_node_name = node_name.replace('forward', 'backward')
+ else:
+ opposite_node_name = node_name.replace('backward', 'forward')
+ return opposite_node_name
+
@staticmethod
def get_parent_node_list(graph_data, node_name):
"""获取父节点列表"""
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/views/graph_views.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/views/graph_views.py
index af14d915112808f1850bae2274122aff483bd7a2..0253876433dd926a8c2704a77861a0e363bfecd0 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/app/views/graph_views.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/app/views/graph_views.py
@@ -19,8 +19,10 @@ import json
from pathlib import Path
from werkzeug import wrappers, Response, exceptions
from tensorboard.backend import http_util
-from ..service import ServiceFactory, GraphServiceStrategy, check_file_type
+from ..service import ServiceFactory, GraphServiceStrategy
+from ..utils.file_check_wrapper import check_file_type
from ..utils.graph_utils import GraphUtils
+from ..utils.global_state import DataType
class GraphView:
@@ -63,18 +65,25 @@ class GraphView:
def load_graph_data(request):
meta_data = {
'run': request.args.get('run'),
- 'tag': request.args.get('tag')
+ 'tag': request.args.get('tag'),
+ 'type': request.args.get('type'),
}
strategy = GraphView._get_strategy(meta_data)
- return Response(
- strategy.load_graph_data(),
- mimetype='text/event-stream',
- headers={
- 'Cache-Control': 'no-cache',
- 'Connection': 'close', # TCP链接不复用,请求结束释放资源
- "X-Content-Type-Options": "nosniff",
- }
- )
+ if meta_data.get('type') == DataType.DB.value:
+ result = strategy.load_graph_data()
+ return http_util.Respond(request, result, "application/json")
+ elif meta_data.get('type') == DataType.JSON.value:
+ return Response(
+ strategy.load_graph_data(),
+ mimetype='text/event-stream',
+ headers={
+ 'Cache-Control': 'no-cache',
+ 'Connection': 'close', # TCP链接不复用,请求结束释放资源
+ "X-Content-Type-Options": "nosniff",
+ }
+ )
+ else:
+ return http_util.Respond(request, {'success': False, 'message': 'type error'}, "application/json")
# 获取当前图数据配置信息
@staticmethod
@@ -101,6 +110,26 @@ class GraphView:
response = http_util.Respond(request, result, "application/json")
return response
+ # 根据精度误差搜索节点
+ @staticmethod
+ @wrappers.Request.application
+ def search_node(request):
+ data = GraphUtils.safe_json_loads(request.get_data().decode('utf-8'))
+ meta_data = data.get("metaData")
+ search_type = data.get("type")
+ values = data.get("values")
+ strategy = GraphView._get_strategy(meta_data)
+ if(search_type == 'precision'):
+ result = strategy.search_node_by_precision(meta_data, values)
+ elif(search_type == 'overflow'):
+ result = strategy.search_node_by_overflow(meta_data, values)
+ else:
+ result = {
+ 'success': False,
+ 'message': "type error"
+ }
+ return http_util.Respond(request, result, "application/json")
+
# 更新误差节点
@staticmethod
@wrappers.Request.application
@@ -132,9 +161,13 @@ class GraphView:
# 更新当前图节点信息
@staticmethod
@wrappers.Request.application
+ @check_file_type
def update_hierarchy_data(request):
- graph_type = request.args.get("graphType")
- hierarchy = GraphServiceStrategy.update_hierarchy_data(graph_type)
+ data = GraphUtils.safe_json_loads(request.get_data().decode('utf-8'), {})
+ graph_type = data.get("graphType")
+ meta_data = data.get('metaData')
+ strategy = GraphView._get_strategy(meta_data)
+ hierarchy = strategy.update_hierarchy_data(graph_type)
return http_util.Respond(request, json.dumps(hierarchy), "application/json")
# 获取当前节点对应节点的信息看板数据
@@ -207,10 +240,12 @@ class GraphView:
# 更新颜色信息
@staticmethod
@wrappers.Request.application
+ @check_file_type
def update_colors(request):
- colors = GraphUtils.safe_json_loads(request.args.get('colors'))
- run = request.args.get('run')
- strategy = GraphView._get_strategy({'run': run}, no_tag=True)
+ data = GraphUtils.safe_json_loads(request.get_data().decode('utf-8'), {})
+ meta_data = data.get('metaData')
+ colors = GraphUtils.safe_json_loads(data.get('colors'))
+ strategy = GraphView._get_strategy(meta_data, no_tag=True)
update_result = strategy.update_colors(colors)
return http_util.Respond(request, json.dumps(update_result), "application/json")
@@ -222,7 +257,7 @@ class GraphView:
data = GraphUtils.safe_json_loads(request.get_data().decode('utf-8'), {})
meta_data = data.get('metaData')
strategy = GraphView._get_strategy(meta_data)
- save_result = strategy.save_matched_relations()
+ save_result = strategy.save_matched_relations(meta_data)
return http_util.Respond(request, json.dumps(save_result), "application/json")
@staticmethod
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/constants.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/constants.py
deleted file mode 100644
index 111a41071945675a6db83318b9de3499ac56ec8e..0000000000000000000000000000000000000000
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/constants.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2025, Huawei Technologies.
-# All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-SCREEN_MAP = {'precision_index': 'precision_index', 'overflow_level': 'overflow_level'}
-UNMATCHED_NODE_NAME = '无匹配节点'
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/server/plugin.py b/plugins/tensorboard-plugins/tb_graph_ascend/server/plugin.py
index 21c5df98856789dfd6808e8d737c817a8f849f23..b60ec18e5da44564393bd60a8ab1408998f8c0e8 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/server/plugin.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/server/plugin.py
@@ -18,12 +18,9 @@
"""The TensorBoard Graphs plugin."""
import os
-from werkzeug import wrappers
-from tensorboard.backend import http_util
from tensorboard.plugins import base_plugin
from tensorboard.util import tb_logging
-from . import constants
from .app.views.graph_views import GraphView
from .app.utils.graph_utils import GraphUtils
from .app.utils.global_state import GraphState, Extension
@@ -40,7 +37,6 @@ class GraphsPlugin(base_plugin.TBPlugin):
"""Graphs Plugin for TensorBoard."""
plugin_name = PLUGIN_NAME
- headers = [('X-Content-Type-Options', 'nosniff')]
def __init__(self, context):
"""Instantiates GraphsPlugin via TensorBoard core.
@@ -50,26 +46,16 @@ class GraphsPlugin(base_plugin.TBPlugin):
"""
super().__init__(context)
GraphState.reset_global_state()
- self._data_provider = context.data_provider
self.logdir = os.path.abspath(os.path.expanduser(context.logdir.rstrip('/')))
# 将logdir赋值给global_state中的logdir属性,方便其他模块使用
- GraphState.set_global_value('logdir', os.path.abspath(os.path.expanduser(context.logdir.rstrip('/'))))
- self._current_file_path = None # Store the path of the currently loaded file
- self._current_file_data = None # Store the data of the currently loaded file
- self._current_tag = None # Store the tag of the currently loaded file
- self.batch_id = 0 # 将 batch_id 声明为实例变量
- self.step_id = 0 # 可以同样声明 step_id
- self.dfs_node_ids = [] # batch和step没变的话就将所有的nodename存起来,方便快速读取
- self.check_batch_id = -1 # 来配合node_ids监察用的,他不变node_ids就不用重新读取了
- self.check_step_id = 0 # 同上
- self.check_tag = None
+ GraphState.set_global_value('logdir', self.logdir)
def get_plugin_apps(self):
return {
'/index.js': GraphView.static_file_route,
'/index.html': GraphView.static_file_route,
"/load_meta_dir": GraphView.load_meta_dir,
- "/screen": self.get_all_screen_nodes,
+ "/screen": GraphView.search_node,
'/loadGraphData': GraphView.load_graph_data,
'/loadGraphConfigInfo': GraphView.load_graph_config_info,
'/loadGraphAllNodeList': GraphView.load_graph_all_node_list,
@@ -87,6 +73,7 @@ class GraphsPlugin(base_plugin.TBPlugin):
def is_active(self):
"""The graphs plugin is active if any run has a graph."""
+
def _is_vis(path, file_name):
return os.path.isfile(path) and (file_name.endswith(DB_EXT) or file_name.endswith(JSON_EXT))
@@ -111,127 +98,3 @@ class GraphsPlugin(base_plugin.TBPlugin):
es_module_path='/index.js',
disable_reload=True,
)
-
- # 拿所有precisonNodes的,与controls的精度筛选联动
- @wrappers.Request.application
- def get_all_screen_nodes(self, request):
- grouped_screen_set, inaccuracy_node_ids = [], []
- precision_none = 0
- screen = ''
- # 尝试获取 screen_set 和 screen 的值
- for key, value in constants.SCREEN_MAP.items():
- if key in request.args:
- screen_set = request.args.get(key)
- screen = value
- break # 找到一个匹配的 key 后跳出循环
-
- if screen == 'precision_index':
- precision_set_str = screen_set.split(',')
- if constants.UNMATCHED_NODE_NAME in precision_set_str:
- precision_set_str = [p for p in precision_set_str if p != constants.UNMATCHED_NODE_NAME]
- precision_none = 1
- grouped_screen_set = [
- list(map(float, precision_set_str[i: i + 2]))
- for i in range(0, len(precision_set_str), 2)
- ]
- else:
- grouped_screen_set = screen_set
- tag = request.args.get("tag")
- json_data = self.check_jsondata(request)
-
- def has_conditions_changed(tag, batch):
- return (
- self.check_batch_id != batch
- or self.check_step_id != self.step_id
- or self.check_tag != tag
- or self.check_tag is None
- )
-
- if has_conditions_changed(tag, self.batch_id):
- self.dfs_node_ids = self.dfs_collect_nodes(json_data, request)
- self.check_batch_id = self.batch_id
- self.check_step_id = self.step_id
- self.check_tag = tag
- node_ids = self.dfs_node_ids
- for node in node_ids:
- node_data = self.json_get(json_data, 'NPU', 'node', node, 'data') or self.json_get(
- json_data, 'node', node, 'data'
- )
- matched = self.json_get(json_data, 'NPU', 'node', node, 'matched_node_link') or self.json_get(
- json_data, 'node', node, 'matched_node_link'
- )
- inaccuracy = node_data.get(screen) if node_data is not None else None
- # 如果 inaccuracy 为 None,直接检查是否符合条件
- if inaccuracy is None and precision_none == 0:
- continue # 跳过后续的处理,进入下一个 node
- if inaccuracy is None and precision_none == 1:
- if (node_data is None or node_data.get('overflow_level', False)) and not matched:
- inaccuracy_node_ids.append(node)
- continue # 跳过后续的处理,进入下一个 node
-
- # 对于 inaccuracy 是数字类型,检查是否在某个子范围内,精度误差
- if isinstance(inaccuracy, (int, float)):
- for group in grouped_screen_set:
- if len(group) > 1 and all(g is not None for g in group) and group[0] <= inaccuracy <= group[1]:
- inaccuracy_node_ids.append(node)
- break # 找到符合条件的,跳出当前循环
- # 对于非数字的 inaccuracy,检查是否在 grouped_screen_set 中,溢出检测
- elif inaccuracy in grouped_screen_set:
- inaccuracy_node_ids.append(node)
- else:
- logger.error(f'The inaccuracy in {node} is not a valid value')
-
- return http_util.Respond(request, inaccuracy_node_ids, "application/json")
-
- def dfs_collect_nodes(self, json_data, request):
- root_subnodes_set = []
- all_node_names = []
- try:
- request_micro_step_id = request.args.get("microStep")
- except ValueError:
- logger.error('The param "batch" or "step" does not exist or not a valid value')
- root_name = self.json_get(json_data, 'NPU', 'root') or \
- self.json_get(json_data, 'root')
- root_subnodes = self.json_get(json_data, 'NPU', 'node', root_name, 'subnodes') \
- if 'NPU' in json_data else \
- self.json_get(json_data, 'node', root_name, 'subnodes')
- if root_subnodes:
- for node in root_subnodes:
- json_path = ['NPU', 'node', node, 'micro_step_id'] if 'NPU' in json_data \
- else ['node', node, 'micro_step_id']
- micro_step_id = self.json_get(json_data, *json_path)
- if request_micro_step_id == '-1' or str(micro_step_id) == request_micro_step_id:
- root_subnodes_set.append(node)
-
- def get_leaf_nodes(subnodes_set):
- npu_data = self.json_get(json_data, 'NPU')
- for node in subnodes_set:
- node_data = (
- self.json_get(npu_data, 'node', node) if npu_data else self.json_get(json_data, 'node', node)
- )
- if node_data:
- if node_data.get('subnodes'):
- get_leaf_nodes(node_data.get('subnodes'))
- else:
- all_node_names.append(node)
-
- get_leaf_nodes(root_subnodes_set)
-
- return all_node_names
-
- # 检查到底是读一般还是用之前存的
- def check_jsondata(self, request):
- meta_data = {
- "tag": request.args.get("tag"),
- "run": request.args.get('run')
- }
- graph_data, _ = GraphUtils.get_graph_data(meta_data)
- return graph_data
-
- def json_get(self, data, *args):
- result = data
- for key in args:
- if result is None:
- return None
- result = result.get(key)
- return result
diff --git a/plugins/tensorboard-plugins/tb_graph_ascend/setup.py b/plugins/tensorboard-plugins/tb_graph_ascend/setup.py
index 101b8900e42b39b2061097647aa29fb21ab971c3..2fd9b523601691aa057d7d60f41e3d7a951468ac 100644
--- a/plugins/tensorboard-plugins/tb_graph_ascend/setup.py
+++ b/plugins/tensorboard-plugins/tb_graph_ascend/setup.py
@@ -16,7 +16,7 @@
# --------------------------------------------------------------------------------------------#
import setuptools
-VERSION = '8.1.2'
+VERSION = '8.2.0'
INSTALL_REQUIRED = ["tensorboard >= 2.11.2"]
setuptools.setup(