From f777e60edf533c439fc3b0e079b2f2aba9b53f17 Mon Sep 17 00:00:00 2001 From: hehongzhe <935062458@qq.com> Date: Thu, 4 Sep 2025 10:28:31 +0800 Subject: [PATCH] safe fix --- .../ai_core_performance_checker.py | 8 +++++-- .../advisor/common/graph/graph_match.py | 2 ++ .../msprof_analyze/advisor/config/config.py | 13 +++++----- .../advisor/dataset/timeline_event_dataset.py | 24 ++++++++++++------- .../display/prompt/cn/dynamic_shape_prompt.py | 2 +- .../display/prompt/en/operator_prompt.py | 4 ++-- profiler/msprof_analyze/advisor/utils/file.py | 5 +++- .../msprof_analyze/advisor/utils/utils.py | 1 - .../analysis/communication_analysis.py | 8 ++++--- .../msprof_data_preprocessor.py | 9 ++++--- .../origin_data_bean/operator_memory_bean.py | 2 +- .../npu_profiling_db_parser.py | 6 +++++ .../prof_common/path_manager.py | 2 +- 13 files changed, 57 insertions(+), 29 deletions(-) diff --git a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py index b301e343f1..5a7f9e3f95 100644 --- a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py +++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py @@ -538,7 +538,9 @@ class AICorePerformanceChecker: suggestion = "" if "varlen" in op.lower(): # 处理变长算子 如果不亲和则affinity_flag为False - inner_axis = convert_to_int_with_exception(shape.split("-")[0].split(";")[0].split(",")[2]) + inner_axis = 0 + if len(shape.split("-")[0].split(";")[0].split(",")) >= 3: + inner_axis = convert_to_int_with_exception(shape.split("-")[0].split(";")[0].split(",")[2]) if inner_axis % self.INNER_AXIS_128 != 0: affinity_flag = True suggestion = self._fa_affinity_desc_head_dim_128 @@ -550,7 +552,9 @@ class AICorePerformanceChecker: else: # 处理定长算子 如果不亲和则affinity_flag为False head_dim = 0 - seq_len = convert_to_int_with_exception(shape.split("-")[1].split(";")[0].split(",")[2]) + seq_len = 0 + if len(shape.split("-")[1].split(";")[0].split(",")[2]) >= 3: + seq_len = convert_to_int_with_exception(shape.split("-")[1].split(";")[0].split(",")[2]) input_first_tensor = shape.split("-")[0].split(";")[0].split(",") if len(input_first_tensor) == 3: head_dim = safe_division(convert_to_int_with_exception(input_first_tensor[2]), diff --git a/profiler/msprof_analyze/advisor/common/graph/graph_match.py b/profiler/msprof_analyze/advisor/common/graph/graph_match.py index 1cf2fe170d..86b130e568 100644 --- a/profiler/msprof_analyze/advisor/common/graph/graph_match.py +++ b/profiler/msprof_analyze/advisor/common/graph/graph_match.py @@ -297,6 +297,8 @@ def get_next_candidates(config: CandidateArgsConfig) -> List[Dict[Hashable, Hash # Find a longer backbone node nodes_with_maximum_backbone.append(query_node_id) + if not nodes_with_maximum_backbone: + return [] # next_node is connected to the current backbone. next_node = max(nodes_with_maximum_backbone, key=lambda x: node_priority.get(x, 0)) diff --git a/profiler/msprof_analyze/advisor/config/config.py b/profiler/msprof_analyze/advisor/config/config.py index 80057b2a5d..f01a502fd1 100644 --- a/profiler/msprof_analyze/advisor/config/config.py +++ b/profiler/msprof_analyze/advisor/config/config.py @@ -16,6 +16,7 @@ import logging import os +import html from msprof_analyze.advisor.utils.utils import Timer from msprof_analyze.prof_common.singleton import singleton @@ -107,42 +108,42 @@ class Config: @property def timeline_api_doc_url(self) -> str: try: - return self.config.get("URL", "timeline_api_doc_url") + return html.escape(self.config.get("URL", "timeline_api_doc_url")) except Exception: return "" @property def timeline_with_stack_doc_url(self) -> str: try: - return self.config.get("URL", "timeline_with_stack_doc_url") + return html.escape(self.config.get("URL", "timeline_with_stack_doc_url")) except Exception: return "" @property def pytorch_aoe_operator_tune_url(self) -> str: try: - return self.config.get("URL", "pytorch_aoe_operator_tune_url") + return html.escape(self.config.get("URL", "pytorch_aoe_operator_tune_url")) except Exception: return "" @property def mslite_infer_aoe_operator_tune_url(self) -> str: try: - return self.config.get("URL", "mslite_infer_aoe_operator_tune_url") + return html.escape(self.config.get("URL", "mslite_infer_aoe_operator_tune_url")) except Exception: return "" @property def enable_compiled_tune_url(self) -> str: try: - return self.config.get("URL", "enable_compiled_tune_url") + return html.escape(self.config.get("URL", "enable_compiled_tune_url")) except Exception: return "" @property def ascend_profiler_url(self) -> str: try: - return self.config.get("URL", "ascend_profiler_url") + return html.escape(self.config.get("URL", "ascend_profiler_url")) except Exception: return "" diff --git a/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py b/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py index 512a7ae163..f676d82caf 100644 --- a/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py @@ -148,15 +148,23 @@ class BaseTimelineEventDataset(Dataset): return True def parse_from_db(self): - db_helper = TimelineDBHelper(self.timeline_file) - if not db_helper.init_timeline_db_helper(): + db_helper = None + try: + db_helper = TimelineDBHelper(self.timeline_file) + if not db_helper.init_timeline_db_helper(): + return False + for _, collector in tqdm(self.collector_map.items(), leave=False, + desc="Building dataset for timeline analysis"): + for event_type in collector.get_event_type(): + df = db_helper.query_timeline_event(event_type) + collector.add_op_from_db(df) + except Exception: + logger.warning("Error %s while parsing from db, file %s", traceback.format_exc(), + self.timeline_file) return False - for _, collector in tqdm(self.collector_map.items(), leave=False, - desc="Building dataset for timeline analysis"): - for event_type in collector.get_event_type(): - df = db_helper.query_timeline_event(event_type) - collector.add_op_from_db(df) - db_helper.destroy_db_connection() + finally: + if db_helper: + db_helper.destroy_db_connection() return True def parse_data_with_generator(self, func): diff --git a/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py b/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py index c525422b9a..37355d9ceb 100644 --- a/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py +++ b/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py @@ -20,4 +20,4 @@ class DynamicShapePrompt(object): ENABLE_COMPILED_SUGGESTION = "在python脚本入口加入以下代码关闭在线编译:\n" \ "'torch_npu.npu.set_compile_mode(jit_compile=False) \n " \ "torch_npu.npu.config.allow_internal_format = False' \n" - RELEASE_SUGGESTION = "详细信息请参考:链接" \ No newline at end of file + RELEASE_SUGGESTION = "详细信息请参考:链接" diff --git a/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py b/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py index b1fb76f9b7..e056ed1b79 100644 --- a/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py +++ b/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py @@ -22,10 +22,10 @@ class OperatorPrompt(object): "converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \ "--modelFile=$user_model.onnx --outputFile=user_model " \ "--configFile=./config.txt\n" - PYTORCH_RELEASE_SUGGESTION = "for details please refer to link : LINK" + PYTORCH_RELEASE_SUGGESTION = "for details please refer to link : LINK" MSLITE_RELEASE_SUGGESTION = "\nThe config file for MSLite AOE usage is as follows:\n" \ "[ascend_context]\n" \ "aoe_mode=\"operator tuning\"\n" \ "--tune_ops_file={}\n" \ "\nFor details please refer to link : LINK" + "\"{}\" target='_blank'>LINK" diff --git a/profiler/msprof_analyze/advisor/utils/file.py b/profiler/msprof_analyze/advisor/utils/file.py index 516077ee72..8eb4240b0c 100644 --- a/profiler/msprof_analyze/advisor/utils/file.py +++ b/profiler/msprof_analyze/advisor/utils/file.py @@ -73,7 +73,10 @@ class FdOpen: def __exit__(self, exc_type, exc_val, exc_tb): if self.file_open: - self.file_open.close() + try: + self.file_open.close() + except Exception: + os.close(self.fd) elif self.fd: os.close(self.fd) diff --git a/profiler/msprof_analyze/advisor/utils/utils.py b/profiler/msprof_analyze/advisor/utils/utils.py index 001949d6e2..9f84263ab9 100644 --- a/profiler/msprof_analyze/advisor/utils/utils.py +++ b/profiler/msprof_analyze/advisor/utils/utils.py @@ -346,7 +346,6 @@ class SafeOpen: def __exit__(self, exc_type, exc_val, exc_tb): if self.file: self.file.close() - return True def get_file_path_by_walk(root, filename): diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py b/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py index e8ca793f52..62e24bebe0 100644 --- a/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py +++ b/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py @@ -68,9 +68,11 @@ class CommunicationAnalysis(BaseAnalysis): result_db = os.path.join(output_path, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER) DBManager.create_tables(result_db, self.COMMUNICATION_TIME_TABLE, self.COMMUNICATION_BANDWIDTH_TABLE) conn, cursor = DBManager.create_connect_db(result_db) - self.execute(conn, res_comm_time, self.COMMUNICATION_TIME_TABLE) - self.execute(conn, res_comm_bandwidth, self.COMMUNICATION_BANDWIDTH_TABLE) - DBManager.destroy_db_connect(conn, cursor) + try: + self.execute(conn, res_comm_time, self.COMMUNICATION_TIME_TABLE) + self.execute(conn, res_comm_bandwidth, self.COMMUNICATION_BANDWIDTH_TABLE) + finally: + DBManager.destroy_db_connect(conn, cursor) def compute_total_info(self, comm_ops: dict): if not comm_ops: diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py index f751de56fe..4d23292bf4 100644 --- a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py +++ b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py @@ -14,6 +14,7 @@ # limitations under the License. import os import re +import shlex from collections import defaultdict from msprof_analyze.cluster_analyse.cluster_data_preprocess.data_preprocessor import DataPreprocessor @@ -56,6 +57,8 @@ class MsprofDataPreprocessor(DataPreprocessor): prof_data_uid = defaultdict(list) prof_data_rank = defaultdict(list) for dir_name in self.path_list: + # 对dir_name进行转义处理,防止命令注入 + escaped_dir = shlex.quote(dir_name) info_json_file = self._find_info_json_file(dir_name) if not info_json_file: logger.error(f"Profiling data in not completed, please check the info.json file in the path {dir_name}") @@ -68,12 +71,12 @@ class MsprofDataPreprocessor(DataPreprocessor): self.data_type.add(Constant.TEXT) else: logger.error(f"The profiling data has not been fully parsed. You can parse it by executing " - f"the following command: msprof --analyze=on --output={dir_name}") + f"the following command: msprof --analyze=on --output={escaped_dir}") continue else: logger.error(f"The profiling data has not been fully parsed. You can parse it by executing " - f"the following command: msprof --export=on --output={dir_name}; " - f"msprof --analyze=on --output={dir_name}") + f"the following command: msprof --export=on --output={escaped_dir}; " + f"msprof --analyze=on --output={escaped_dir}") continue info_json = FileManager.read_json_file(info_json_file) rank_id = info_json.get("rank_id") diff --git a/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py b/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py index 9678b91abd..2f07bab67d 100644 --- a/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py +++ b/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py @@ -22,7 +22,7 @@ class OperatorMemoryBean: NA = "N/A" def __init__(self, data: dict): - self._data = data + self._data = data.copy() self._name = "" self._size = 0.0 self._allocation_time = Decimal(0) diff --git a/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py b/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py index b68029a57e..592f2a11a2 100644 --- a/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py +++ b/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py @@ -75,6 +75,12 @@ class NPUProfilingDbParser: self.comm_task_data = [] self.compute_op_data = [] + def __del__(self): + try: + DBManager.destroy_db_connect(self.conn, self.cursor) + except Exception: + logger.warning(f"Failed to release database connection in NPUProfilingDbParser.") + def load_data(self) -> ProfilingResult: self._prepare_data() if self._enable_communication_compare: diff --git a/profiler/msprof_analyze/prof_common/path_manager.py b/profiler/msprof_analyze/prof_common/path_manager.py index 05970362ba..c6ac5a1dcd 100644 --- a/profiler/msprof_analyze/prof_common/path_manager.py +++ b/profiler/msprof_analyze/prof_common/path_manager.py @@ -179,7 +179,7 @@ class PathManager: if os.path.exists(path): return try: - os.makedirs(path, mode=cls.DATA_DIR_AUTHORITY) + os.makedirs(path, mode=cls.DATA_DIR_AUTHORITY, exist_ok=True) except Exception as err: raise RuntimeError(msg) from err -- Gitee