diff --git a/profiler/cluster_analyse/analysis/base_analysis.py b/profiler/cluster_analyse/analysis/base_analysis.py index 8f48fd10e34fd9b8fd610b65f06f00b5b6cba6eb..156c8285a5ddf951747699a803c1bc79ddef9ba9 100644 --- a/profiler/cluster_analyse/analysis/base_analysis.py +++ b/profiler/cluster_analyse/analysis/base_analysis.py @@ -95,6 +95,6 @@ class BaseAnalysis: setdefault(op_name, {}).setdefault(rank_id, op_info) def combine_ops_total_info(self): - for rank_tup, group_dict in self.comm_ops_struct.items(): - for step_id, communication_ops in group_dict.items(): + for _, group_dict in self.comm_ops_struct.items(): + for _, communication_ops in group_dict.items(): self.compute_total_info(communication_ops) diff --git a/profiler/cluster_analyse/analysis/comm_matrix_analysis.py b/profiler/cluster_analyse/analysis/comm_matrix_analysis.py index b8cf865d7f12603da9cb7ea34556e102acb54046..9e868ca0cc76c8a30d2b49050a2089d6c4344b64 100644 --- a/profiler/cluster_analyse/analysis/comm_matrix_analysis.py +++ b/profiler/cluster_analyse/analysis/comm_matrix_analysis.py @@ -77,29 +77,31 @@ class CommMatrixAnalysis(BaseAnalysis): return tmp_link project_local_global_rank_map = dict() + default_value = { + Constant.TRANSPORT_TYPE: '', + Constant.TRANSIT_TIME_MS: 0, + Constant.TRANSIT_SIZE_MB: 0, + Constant.OP_NAME: '' + } for op_name, op_dict in step_dict.items(): - link_info = defaultdict(lambda: { - Constant.TRANSPORT_TYPE: '', - Constant.TRANSIT_TIME_MS: 0, - Constant.TRANSIT_SIZE_MB: 0, - Constant.OP_NAME: '' - }) + link_info = defaultdict(lambda:default_value.copy()) for rank_id, rank_dict in op_dict.items(): process_link_key() step_dict[op_name] = convert_local_to_global_rank() def combine_link_info(self, step_dict: dict): - total_op_info = defaultdict(lambda: { + default_value = { Constant.TRANSPORT_TYPE: '', Constant.TRANSIT_TIME_MS: 0, Constant.TRANSIT_SIZE_MB: 0, Constant.OP_NAME: '' - }) + } + total_op_info = defaultdict(lambda: default_value.copy()) for op_name, op_dict in step_dict.items(): if self.check_add_op(op_name): for link_key, link_dict in op_dict.items(): self.combine_link(total_op_info[link_key], link_dict) - for link_key, link_dict in total_op_info.items(): + for _, link_dict in total_op_info.items(): link_dict[Constant.BANDWIDTH_GB_S] = \ self.compute_ratio(link_dict.get(Constant.TRANSIT_SIZE_MB, 0), link_dict.get(Constant.TRANSIT_TIME_MS, 0)) diff --git a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py index 023ec332fbe538abb724bf16e28a6a7813819424..f6ecd81ec87c3ab434074d2fdf3c3eb387ea3b01 100644 --- a/profiler/cluster_analyse/analysis/step_trace_time_analysis.py +++ b/profiler/cluster_analyse/analysis/step_trace_time_analysis.py @@ -43,11 +43,8 @@ class StepTraceTimeAnalysis: if not data_group_list: return [] ret = [] - for idx in range(len(data_group_list[0])): - max_val = 0 - for idy in range(len(data_group_list)): - max_val = max(max_val, data_group_list[idy][idx]) - ret.append(max_val) + for item in zip(*data_group_list): + ret.append(max(item)) return ret def run(self): diff --git a/profiler/test/st/advisor/__init__.py b/profiler/test/st/advisor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py b/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py new file mode 100644 index 0000000000000000000000000000000000000000..694438a1ddf1238ae1aac4d1e5a8f0dc7fa6fe8f --- /dev/null +++ b/profiler/test/st/advisor/test_advisor_cmd_single_ascend_pt_no_compare.py @@ -0,0 +1,76 @@ +import os +import subprocess +import logging +from unittest import TestCase + +import math +import pandas as pd +from bs4 import BeautifulSoup + +from profiler.prof_common.path_manager import PathManager +from .utils import get_files, execute_cmd + + +class TestAdvisorCmdSingleAscendPtNoCompare(TestCase): + ST_DATA_PATH = os.getenv("MSTT_PROFILER_ST_DATA_PATH", + "/home/dcs-50/smoke_project_for_msprof_analyze/mstt_profiler/st_data") + BASE_PROFILING_PATH = os.path.join(ST_DATA_PATH, "cluster_data_3", "n122-122-067_12380_20240912033946038_ascend_pt") + COMPARISON_PROFILING_PATH = os.path.join(ST_DATA_PATH, "cluster_data_2", + "n122-120-121_12321_20240911113658382_ascend_pt") + OUTPUT_PATH = os.path.join(os.path.abspath(os.path.dirname(__file__)), "TestAdvisorCmdSingleAscendPtNoCompare") + ALL_OUTPUT_PATH = os.path.join(OUTPUT_PATH,"all") + COMPUTATION_OUTPUT_PATH = os.path.join(OUTPUT_PATH, "computation") + SCHEDULE_OUTPUT_PATH = os.path.join(OUTPUT_PATH, "schedule") + RESULT_EXCEL = {} + RESULT_HTML = {} + COMMAND_SUCCESS = 0 + + def setup_class(self): + PathManager.make_dir_safety(self.OUTPUT_PATH) + PathManager.make_dir_safety(self.ALL_OUTPUT_PATH) + cmd_all = ["msprof-analyze", "advisor", "all" ,"-d", self.BASE_PROFILING_PATH, "-o",self.ALL_OUTPUT_PATH] + if execute_cmd(cmd_all) != self.COMMAND_SUCCESS or not os.path.exists(self.ALL_OUTPUT_PATH): + self.assertEqual(False, True, msg="advisor [all] task failed.") + + self.RESULT_HTML,self.RESULT_EXCEL = get_files(self.OUTPUT_PATH) + + def teardown_class(self): + PathManager.remove_path_safety(self.OUTPUT_PATH) + + def test_all_problems(self): + + category = [ + "overall summary", + "bandwidth contention analysis", + "AICPU operator", + "Dynamic shape operator", + "Affinity apis", + "Operator dispatch" + ] + + #True presents the attr is nan + description_len = [6,3,2,1,1,1] + suggestion_len = [True,1,2,3,1,1] + problem_count = [True,True,2.0,1.0,True,True] + total_time = [True,True,57674709.54,True,True,True] + time_ratio = [True,True,0.0,True,True,True] + income = [True,True,True,True,True,True] + income_ratio = [True,True,True,True,True,True] + try: + df = pd.read_excel(self.RESULT_EXCEL["all"], sheet_name='problems',header=0) + except FileNotFoundError: + logging.error("File %s not found.", str(self.RESULT_EXCEL["all"])) + return + + for index, row in df.iterrows(): + self.assertEqual(category[index], row["category"]) + self.assertEqual(description_len[index], len(row["description"].split("\n"))) + self.assertEqual(suggestion_len[index], isinstance(row["suggestion"],float) or + len(row["suggestion"].split("\n"))) + self.assertEqual(problem_count[index], (math.isnan(row["problem count"]) or row["problem count"])) + self.assertEqual(total_time[index], (math.isnan(row["total_time(us)"]) or + round(row["total_time(us)"],2))) + self.assertEqual(time_ratio[index], (math.isnan(row["time ratio"]) or round(row["time ratio"],2))) + self.assertEqual(income[index], (math.isnan(row["income(us)"]) or round(row["income(us)"],2))) + self.assertEqual(income_ratio[index], (math.isnan(row["income ratio"]) or + round(row["income ratio"],2))) \ No newline at end of file diff --git a/profiler/test/st/advisor/utils.py b/profiler/test/st/advisor/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..bb34af6b08449dca04f825effd6a7611ab09f7f7 --- /dev/null +++ b/profiler/test/st/advisor/utils.py @@ -0,0 +1,46 @@ +import os +import re +import logging +import subprocess + +RE_EXCEL_MATCH_EXP = r"^mstt_advisor_\d{1,20}\.xlsx" +RE_HTML_MATCH_EXP = r"^mstt_advisor_\d{1,20}\.html" + +def execute_cmd(cmd): + logging.info('Execute command:%s' % " ".join(cmd)) + completed_process = subprocess.run(cmd, capture_output=True, shell=False, check=True) + return completed_process.returncode + +def get_files(out_path): + dirs = os.listdir(out_path) + result_html = {} + result_excel = {} + for pattern in dirs: + files_out_path = os.path.join(out_path, pattern) + files = os.listdir(files_out_path) + newest_html_file = None + newest_excel_file = None + for file_name in files: + if re.match(RE_HTML_MATCH_EXP, file_name): + file_time = file_name.split(".")[0].split("_")[-1] + if not newest_html_file or file_time > newest_html_file.split(".")[0].split("_")[-1]: + newest_html_file = file_name + if not newest_html_file: + logging.error("advisor [%s] result html is not find.", str(pattern)) + log_dir = os.path.join(files_out_path, "log") + log_files = os.listdir(log_dir) + for file_name in log_files: + if re.match(RE_EXCEL_MATCH_EXP, file_name): + file_time = file_name.split(".")[0].split("_")[-1] + if not newest_excel_file or file_time > newest_excel_file.split(".")[0].split("_")[-1]: + newest_excel_file = file_name + if not newest_excel_file: + logging.error("advisor [%s] result excel is not find.", str(pattern)) + + # html time same with excel time + if newest_html_file.split(".")[0].split("_")[-1] != newest_excel_file.split(".")[0].split("_")[-1]: + logging.error("advisor [%s] html file and excel file dose not match.", str(pattern)) + + result_html[pattern] = os.path.join(files_out_path, newest_html_file) + result_excel[pattern] = os.path.join(log_dir, newest_excel_file) + return result_html, result_excel