From 2aa62eddd73891053b57a84d8088da7368d3c70a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9A=E6=99=97?= Date: Tue, 3 Dec 2024 11:50:54 +0000 Subject: [PATCH] =?UTF-8?q?=E7=BD=91=E7=BB=9C=E5=B7=A5=E5=85=B7demo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 姚晗 --- .../analysis/cluster_net_checker/README.md | 37 +++++++++++++++++++ .../analysis/cluster_net_checker/__init__.py | 0 .../cluster_net_checker.py | 0 .../analysis/cluster_net_checker/stats.ipynb | 0 .../cluster_net_checker_export.py | 27 ++++++++++++++ 5 files changed, 64 insertions(+) create mode 100644 profiler/cluster_analyse/analysis/cluster_net_checker/README.md create mode 100644 profiler/cluster_analyse/analysis/cluster_net_checker/__init__.py create mode 100644 profiler/cluster_analyse/analysis/cluster_net_checker/cluster_net_checker.py create mode 100644 profiler/cluster_analyse/analysis/cluster_net_checker/stats.ipynb create mode 100644 profiler/cluster_analyse/cluster_statistics_export/cluster_net_checker_export.py diff --git a/profiler/cluster_analyse/analysis/cluster_net_checker/README.md b/profiler/cluster_analyse/analysis/cluster_net_checker/README.md new file mode 100644 index 00000000000..712bb336c1f --- /dev/null +++ b/profiler/cluster_analyse/analysis/cluster_net_checker/README.md @@ -0,0 +1,37 @@ +# 网络工具demo + + +## 功能 + +1. 网络拓扑展示 + + a. 支持万卡+拓扑展示(选择并展示集群中特定通信域拓扑) + + b. 通信域信息准确(从训练中采集) + + c. 物理拓扑获取(调研中) + + +2. 问题rank分析 + + a. 快慢卡 + + b. 慢链路 + + c. rank指标排序 + + +## 使用流程 + +采集+分析+可视化 + +1. 采集: + - profiling工具动态采集 + - 插入到mindspeed的patch代码(获取并行信息,并使用mstx将并行信息写入profiling db) + +2. 分析: + - cluster_net_checker.py + +3. 可视化: + - 快速穿刺版本 stats.ipynb + - 未来使用 insight \ No newline at end of file diff --git a/profiler/cluster_analyse/analysis/cluster_net_checker/__init__.py b/profiler/cluster_analyse/analysis/cluster_net_checker/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/cluster_analyse/analysis/cluster_net_checker/cluster_net_checker.py b/profiler/cluster_analyse/analysis/cluster_net_checker/cluster_net_checker.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/cluster_analyse/analysis/cluster_net_checker/stats.ipynb b/profiler/cluster_analyse/analysis/cluster_net_checker/stats.ipynb new file mode 100644 index 00000000000..e69de29bb2d diff --git a/profiler/cluster_analyse/cluster_statistics_export/cluster_net_checker_export.py b/profiler/cluster_analyse/cluster_statistics_export/cluster_net_checker_export.py new file mode 100644 index 00000000000..43b5e68b970 --- /dev/null +++ b/profiler/cluster_analyse/cluster_statistics_export/cluster_net_checker_export.py @@ -0,0 +1,27 @@ +# Copyright (c) 2024, Huawei Technologies Co., Ltd. +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cluster_statistics_export.stats_export import StatsExport + + +QUERY = """ +""" + + +class ClusterNetCheckerExport(StatsExport): + + def __init__(self, db_path, recipe_name): + super().__init__(db_path, recipe_name) + self._query = QUERY \ No newline at end of file -- Gitee