From cc1eae2fc43b6a2d1d3b21c9bccfd32cf6cc41b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BE=9A=E6=98=8A=E5=AE=87?= <gonghaoyu1@h-partners.com>
Date: Tue, 5 Nov 2024 16:50:21 +0800
Subject: [PATCH 1/2] test

---
 .../cluster_analyse/analysis/base_analysis.py |  4 ++--
 .../analysis/comm_matrix_analysis.py          | 20 ++++++++++---------
 .../analysis/step_trace_time_analysis.py      |  7 ++-----
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/profiler/cluster_analyse/analysis/base_analysis.py b/profiler/cluster_analyse/analysis/base_analysis.py
index 8f48fd10e..156c8285a 100644
--- a/profiler/cluster_analyse/analysis/base_analysis.py
+++ b/profiler/cluster_analyse/analysis/base_analysis.py
@@ -95,6 +95,6 @@ class BaseAnalysis:
                 setdefault(op_name, {}).setdefault(rank_id, op_info)
 
     def combine_ops_total_info(self):
-        for rank_tup, group_dict in self.comm_ops_struct.items():
-            for step_id, communication_ops in group_dict.items():
+        for _, group_dict in self.comm_ops_struct.items():
+            for _, communication_ops in group_dict.items():
                 self.compute_total_info(communication_ops)
diff --git a/profiler/cluster_analyse/analysis/comm_matrix_analysis.py b/profiler/cluster_analyse/analysis/comm_matrix_analysis.py
index b8cf865d7..9e868ca0c 100644
--- a/profiler/cluster_analyse/analysis/comm_matrix_analysis.py
+++ b/profiler/cluster_analyse/analysis/comm_matrix_analysis.py
@@ -77,29 +77,31 @@ class CommMatrixAnalysis(BaseAnalysis):
             return tmp_link
 
         project_local_global_rank_map = dict()
+        default_value = {
+            Constant.TRANSPORT_TYPE: '',
+            Constant.TRANSIT_TIME_MS: 0,
+            Constant.TRANSIT_SIZE_MB: 0,
+            Constant.OP_NAME: ''
+        }
         for op_name, op_dict in step_dict.items():
-            link_info = defaultdict(lambda: {
-                Constant.TRANSPORT_TYPE: '',
-                Constant.TRANSIT_TIME_MS: 0,
-                Constant.TRANSIT_SIZE_MB: 0,
-                Constant.OP_NAME: ''
-            })
+            link_info = defaultdict(lambda:default_value.copy())
             for rank_id, rank_dict in op_dict.items():
                 process_link_key()
             step_dict[op_name] = convert_local_to_global_rank()
 
     def combine_link_info(self, step_dict: dict):
-        total_op_info = defaultdict(lambda: {
+        default_value = {
             Constant.TRANSPORT_TYPE: '',
             Constant.TRANSIT_TIME_MS: 0,
             Constant.TRANSIT_SIZE_MB: 0,
             Constant.OP_NAME: ''
-        })
+        }
+        total_op_info = defaultdict(lambda: default_value.copy())
         for op_name, op_dict in step_dict.items():
             if self.check_add_op(op_name):
                 for link_key, link_dict in op_dict.items():
                     self.combine_link(total_op_info[link_key], link_dict)
-        for link_key, link_dict in total_op_info.items():
+        for _, link_dict in total_op_info.items():
             link_dict[Constant.BANDWIDTH_GB_S] = \
                 self.compute_ratio(link_dict.get(Constant.TRANSIT_SIZE_MB, 0),
                                    link_dict.get(Constant.TRANSIT_TIME_MS, 0))
diff --git a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py
index 023ec332f..f6ecd81ec 100644
--- a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py
+++ b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py
@@ -43,11 +43,8 @@ class StepTraceTimeAnalysis:
         if not data_group_list:
             return []
         ret = []
-        for idx in range(len(data_group_list[0])):
-            max_val = 0
-            for idy in range(len(data_group_list)):
-                max_val = max(max_val, data_group_list[idy][idx])
-            ret.append(max_val)
+        for item in zip(*data_group_list):
+            ret.append(max(item))
         return ret
 
     def run(self):
-- 
Gitee


From 3b9d9114bb442ef794d5252de2f8f3e387eea6c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BE=9A=E6=98=8A=E5=AE=87?= <gonghaoyu1@h-partners.com>
Date: Wed, 6 Nov 2024 14:41:02 +0800
Subject: [PATCH 2/2] advisor st test

---
 profiler/test/st/advisor/__init__.py          |  0
 ...advisor_cmd_single_ascend_pt_no_compare.py | 76 +++++++++++++++++++
 profiler/test/st/advisor/utils.py             | 46 +++++++++++
 3 files changed, 122 insertions(+)
 create mode 100644 profiler/test/st/advisor/__init__.py
 create mode 100644 profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py
 create mode 100644 profiler/test/st/advisor/utils.py

diff --git a/profiler/test/st/advisor/__init__.py b/profiler/test/st/advisor/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py b/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py
new file mode 100644
index 000000000..694438a1d
--- /dev/null
+++ b/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py
@@ -0,0 +1,76 @@
+import os
+import subprocess
+import logging
+from unittest import TestCase
+
+import math
+import pandas as pd
+from bs4 import BeautifulSoup
+
+from profiler.prof_common.path_manager import PathManager
+from .utils import get_files, execute_cmd
+
+
+class TestAdvisorCmdSingleAscendPtNoCompare(TestCase):
+    ST_DATA_PATH = os.getenv("MSTT_PROFILER_ST_DATA_PATH",
+                             "/home/dcs-50/smoke_project_for_msprof_analyze/mstt_profiler/st_data")
+    BASE_PROFILING_PATH = os.path.join(ST_DATA_PATH, "cluster_data_3", "n122-122-067_12380_20240912033946038_ascend_pt")
+    COMPARISON_PROFILING_PATH = os.path.join(ST_DATA_PATH, "cluster_data_2",
+                                             "n122-120-121_12321_20240911113658382_ascend_pt")
+    OUTPUT_PATH = os.path.join(os.path.abspath(os.path.dirname(__file__)), "TestAdvisorCmdSingleAscendPtNoCompare")
+    ALL_OUTPUT_PATH = os.path.join(OUTPUT_PATH,"all")
+    COMPUTATION_OUTPUT_PATH = os.path.join(OUTPUT_PATH, "computation")
+    SCHEDULE_OUTPUT_PATH = os.path.join(OUTPUT_PATH, "schedule")
+    RESULT_EXCEL = {}
+    RESULT_HTML = {}
+    COMMAND_SUCCESS = 0
+
+    def setup_class(self):
+        PathManager.make_dir_safety(self.OUTPUT_PATH)
+        PathManager.make_dir_safety(self.ALL_OUTPUT_PATH)
+        cmd_all = ["msprof-analyze", "advisor", "all" ,"-d", self.BASE_PROFILING_PATH, "-o",self.ALL_OUTPUT_PATH]
+        if execute_cmd(cmd_all) != self.COMMAND_SUCCESS or not os.path.exists(self.ALL_OUTPUT_PATH):
+            self.assertEqual(False, True, msg="advisor [all] task failed.")
+
+        self.RESULT_HTML,self.RESULT_EXCEL = get_files(self.OUTPUT_PATH)
+
+    def teardown_class(self):
+        PathManager.remove_path_safety(self.OUTPUT_PATH)
+
+    def test_all_problems(self):
+
+        category = [
+            "overall summary",
+            "bandwidth contention analysis",
+            "AICPU operator",
+            "Dynamic shape operator",
+            "Affinity apis",
+            "Operator dispatch"
+        ]
+
+        #True presents the attr is nan
+        description_len = [6,3,2,1,1,1]
+        suggestion_len = [True,1,2,3,1,1]
+        problem_count = [True,True,2.0,1.0,True,True]
+        total_time = [True,True,57674709.54,True,True,True]
+        time_ratio = [True,True,0.0,True,True,True]
+        income = [True,True,True,True,True,True]
+        income_ratio = [True,True,True,True,True,True]
+        try:
+            df = pd.read_excel(self.RESULT_EXCEL["all"], sheet_name='problems',header=0)
+        except FileNotFoundError:
+            logging.error("File %s not found.", str(self.RESULT_EXCEL["all"]))
+            return
+
+        for index, row in df.iterrows():
+            self.assertEqual(category[index], row["category"])
+            self.assertEqual(description_len[index], len(row["description"].split("\n")))
+            self.assertEqual(suggestion_len[index], isinstance(row["suggestion"],float) or
+                                                     len(row["suggestion"].split("\n")))
+            self.assertEqual(problem_count[index], (math.isnan(row["problem count"]) or row["problem count"]))
+            self.assertEqual(total_time[index], (math.isnan(row["total_time(us)"]) or
+                                                 round(row["total_time(us)"],2)))
+            self.assertEqual(time_ratio[index], (math.isnan(row["time ratio"]) or round(row["time ratio"],2)))
+            self.assertEqual(income[index], (math.isnan(row["income(us)"]) or round(row["income(us)"],2)))
+            self.assertEqual(income_ratio[index], (math.isnan(row["income ratio"]) or
+                                                   round(row["income ratio"],2)))
\ No newline at end of file
diff --git a/profiler/test/st/advisor/utils.py b/profiler/test/st/advisor/utils.py
new file mode 100644
index 000000000..bb34af6b0
--- /dev/null
+++ b/profiler/test/st/advisor/utils.py
@@ -0,0 +1,46 @@
+import os
+import re
+import logging
+import subprocess
+
+RE_EXCEL_MATCH_EXP = r"^mstt_advisor_\d{1,20}\.xlsx"
+RE_HTML_MATCH_EXP = r"^mstt_advisor_\d{1,20}\.html"
+
+def execute_cmd(cmd):
+    logging.info('Execute command:%s' % " ".join(cmd))
+    completed_process = subprocess.run(cmd, capture_output=True, shell=False, check=True)
+    return completed_process.returncode
+
+def get_files(out_path):
+    dirs = os.listdir(out_path)
+    result_html = {}
+    result_excel = {}
+    for pattern in dirs:
+        files_out_path = os.path.join(out_path, pattern)
+        files = os.listdir(files_out_path)
+        newest_html_file = None
+        newest_excel_file = None
+        for file_name in files:
+            if re.match(RE_HTML_MATCH_EXP, file_name):
+                file_time = file_name.split(".")[0].split("_")[-1]
+                if not newest_html_file or file_time > newest_html_file.split(".")[0].split("_")[-1]:
+                    newest_html_file = file_name
+        if not newest_html_file:
+            logging.error("advisor [%s] result html is not find.", str(pattern))
+        log_dir = os.path.join(files_out_path, "log")
+        log_files = os.listdir(log_dir)
+        for file_name in log_files:
+            if re.match(RE_EXCEL_MATCH_EXP, file_name):
+                file_time = file_name.split(".")[0].split("_")[-1]
+                if not newest_excel_file or file_time > newest_excel_file.split(".")[0].split("_")[-1]:
+                    newest_excel_file = file_name
+        if not newest_excel_file:
+            logging.error("advisor [%s] result excel is not find.", str(pattern))
+
+        # html time same with excel time
+        if newest_html_file.split(".")[0].split("_")[-1] != newest_excel_file.split(".")[0].split("_")[-1]:
+            logging.error("advisor [%s] html file and excel file dose not match.", str(pattern))
+
+        result_html[pattern] = os.path.join(files_out_path, newest_html_file)
+        result_excel[pattern] = os.path.join(log_dir, newest_excel_file)
+    return result_html, result_excel
-- 
Gitee