diff --git a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
index b301e343f131faa6262faed5a99976c78065ffa9..5a7f9e3f95c2e283e69a3a4b3e60b5c8ac179d5f 100644
--- a/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
+++ b/profiler/msprof_analyze/advisor/analyzer/computation/ai_core_performance/ai_core_performance_checker.py
@@ -538,7 +538,9 @@ class AICorePerformanceChecker:
suggestion = ""
if "varlen" in op.lower():
# 处理变长算子 如果不亲和则affinity_flag为False
- inner_axis = convert_to_int_with_exception(shape.split("-")[0].split(";")[0].split(",")[2])
+ inner_axis = 0
+ if len(shape.split("-")[0].split(";")[0].split(",")) >= 3:
+ inner_axis = convert_to_int_with_exception(shape.split("-")[0].split(";")[0].split(",")[2])
if inner_axis % self.INNER_AXIS_128 != 0:
affinity_flag = True
suggestion = self._fa_affinity_desc_head_dim_128
@@ -550,7 +552,9 @@ class AICorePerformanceChecker:
else:
# 处理定长算子 如果不亲和则affinity_flag为False
head_dim = 0
- seq_len = convert_to_int_with_exception(shape.split("-")[1].split(";")[0].split(",")[2])
+ seq_len = 0
+ if len(shape.split("-")[1].split(";")[0].split(",")[2]) >= 3:
+ seq_len = convert_to_int_with_exception(shape.split("-")[1].split(";")[0].split(",")[2])
input_first_tensor = shape.split("-")[0].split(";")[0].split(",")
if len(input_first_tensor) == 3:
head_dim = safe_division(convert_to_int_with_exception(input_first_tensor[2]),
diff --git a/profiler/msprof_analyze/advisor/common/graph/graph_match.py b/profiler/msprof_analyze/advisor/common/graph/graph_match.py
index 1cf2fe170d2ab8d3e29429785c7b6398cc0dd964..86b130e56818b439958f9f56552fc908867dfd13 100644
--- a/profiler/msprof_analyze/advisor/common/graph/graph_match.py
+++ b/profiler/msprof_analyze/advisor/common/graph/graph_match.py
@@ -297,6 +297,8 @@ def get_next_candidates(config: CandidateArgsConfig) -> List[Dict[Hashable, Hash
# Find a longer backbone node
nodes_with_maximum_backbone.append(query_node_id)
+ if not nodes_with_maximum_backbone:
+ return []
# next_node is connected to the current backbone.
next_node = max(nodes_with_maximum_backbone, key=lambda x: node_priority.get(x, 0))
diff --git a/profiler/msprof_analyze/advisor/config/config.py b/profiler/msprof_analyze/advisor/config/config.py
index 80057b2a5d664c38e5bc428e5b70065df074f4c7..f01a502fd176dab0d0b38847921c832d7639cfba 100644
--- a/profiler/msprof_analyze/advisor/config/config.py
+++ b/profiler/msprof_analyze/advisor/config/config.py
@@ -16,6 +16,7 @@
import logging
import os
+import html
from msprof_analyze.advisor.utils.utils import Timer
from msprof_analyze.prof_common.singleton import singleton
@@ -107,42 +108,42 @@ class Config:
@property
def timeline_api_doc_url(self) -> str:
try:
- return self.config.get("URL", "timeline_api_doc_url")
+ return html.escape(self.config.get("URL", "timeline_api_doc_url"))
except Exception:
return ""
@property
def timeline_with_stack_doc_url(self) -> str:
try:
- return self.config.get("URL", "timeline_with_stack_doc_url")
+ return html.escape(self.config.get("URL", "timeline_with_stack_doc_url"))
except Exception:
return ""
@property
def pytorch_aoe_operator_tune_url(self) -> str:
try:
- return self.config.get("URL", "pytorch_aoe_operator_tune_url")
+ return html.escape(self.config.get("URL", "pytorch_aoe_operator_tune_url"))
except Exception:
return ""
@property
def mslite_infer_aoe_operator_tune_url(self) -> str:
try:
- return self.config.get("URL", "mslite_infer_aoe_operator_tune_url")
+ return html.escape(self.config.get("URL", "mslite_infer_aoe_operator_tune_url"))
except Exception:
return ""
@property
def enable_compiled_tune_url(self) -> str:
try:
- return self.config.get("URL", "enable_compiled_tune_url")
+ return html.escape(self.config.get("URL", "enable_compiled_tune_url"))
except Exception:
return ""
@property
def ascend_profiler_url(self) -> str:
try:
- return self.config.get("URL", "ascend_profiler_url")
+ return html.escape(self.config.get("URL", "ascend_profiler_url"))
except Exception:
return ""
diff --git a/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py b/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py
index 512a7ae16354be0dea91f824e14241c4592438d2..f676d82caf005ab8f637b8eebf1b9247a287e00b 100644
--- a/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py
+++ b/profiler/msprof_analyze/advisor/dataset/timeline_event_dataset.py
@@ -148,15 +148,23 @@ class BaseTimelineEventDataset(Dataset):
return True
def parse_from_db(self):
- db_helper = TimelineDBHelper(self.timeline_file)
- if not db_helper.init_timeline_db_helper():
+ db_helper = None
+ try:
+ db_helper = TimelineDBHelper(self.timeline_file)
+ if not db_helper.init_timeline_db_helper():
+ return False
+ for _, collector in tqdm(self.collector_map.items(), leave=False,
+ desc="Building dataset for timeline analysis"):
+ for event_type in collector.get_event_type():
+ df = db_helper.query_timeline_event(event_type)
+ collector.add_op_from_db(df)
+ except Exception:
+ logger.warning("Error %s while parsing from db, file %s", traceback.format_exc(),
+ self.timeline_file)
return False
- for _, collector in tqdm(self.collector_map.items(), leave=False,
- desc="Building dataset for timeline analysis"):
- for event_type in collector.get_event_type():
- df = db_helper.query_timeline_event(event_type)
- collector.add_op_from_db(df)
- db_helper.destroy_db_connection()
+ finally:
+ if db_helper:
+ db_helper.destroy_db_connection()
return True
def parse_data_with_generator(self, func):
diff --git a/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py b/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py
index c525422b9a4749d4987b245c83211efe8f5df83f..37355d9ceb503608f26c40f579a74d4ac3217a13 100644
--- a/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py
+++ b/profiler/msprof_analyze/advisor/display/prompt/cn/dynamic_shape_prompt.py
@@ -20,4 +20,4 @@ class DynamicShapePrompt(object):
ENABLE_COMPILED_SUGGESTION = "在python脚本入口加入以下代码关闭在线编译:\n" \
"'torch_npu.npu.set_compile_mode(jit_compile=False) \n " \
"torch_npu.npu.config.allow_internal_format = False' \n"
- RELEASE_SUGGESTION = "详细信息请参考:链接"
\ No newline at end of file
+ RELEASE_SUGGESTION = "详细信息请参考:链接"
diff --git a/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py b/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py
index b1fb76f9b79e359fe20762128287df00f8839f84..e056ed1b7989b7b53658f6b621b65a91545d6ee1 100644
--- a/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py
+++ b/profiler/msprof_analyze/advisor/display/prompt/en/operator_prompt.py
@@ -22,10 +22,10 @@ class OperatorPrompt(object):
"converter_lite --fmk=ONNX --optimize=ascend_oriented --saveType=MINDIR " \
"--modelFile=$user_model.onnx --outputFile=user_model " \
"--configFile=./config.txt\n"
- PYTORCH_RELEASE_SUGGESTION = "for details please refer to link : LINK"
+ PYTORCH_RELEASE_SUGGESTION = "for details please refer to link : LINK"
MSLITE_RELEASE_SUGGESTION = "\nThe config file for MSLite AOE usage is as follows:\n" \
"[ascend_context]\n" \
"aoe_mode=\"operator tuning\"\n" \
"--tune_ops_file={}\n" \
"\nFor details please refer to link : LINK"
+ "\"{}\" target='_blank'>LINK"
diff --git a/profiler/msprof_analyze/advisor/utils/file.py b/profiler/msprof_analyze/advisor/utils/file.py
index 516077ee72ea431201d0bcc3ae7aa217b3982602..8eb4240b0c2f42d7dc847ee39c911ffaa02abbfc 100644
--- a/profiler/msprof_analyze/advisor/utils/file.py
+++ b/profiler/msprof_analyze/advisor/utils/file.py
@@ -73,7 +73,10 @@ class FdOpen:
def __exit__(self, exc_type, exc_val, exc_tb):
if self.file_open:
- self.file_open.close()
+ try:
+ self.file_open.close()
+ except Exception:
+ os.close(self.fd)
elif self.fd:
os.close(self.fd)
diff --git a/profiler/msprof_analyze/advisor/utils/utils.py b/profiler/msprof_analyze/advisor/utils/utils.py
index 001949d6e29c04837f7bb527a3bce2547f58f116..9f84263ab9c6a3361f70331ed67b3775edf874db 100644
--- a/profiler/msprof_analyze/advisor/utils/utils.py
+++ b/profiler/msprof_analyze/advisor/utils/utils.py
@@ -346,7 +346,6 @@ class SafeOpen:
def __exit__(self, exc_type, exc_val, exc_tb):
if self.file:
self.file.close()
- return True
def get_file_path_by_walk(root, filename):
diff --git a/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py b/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py
index e8ca793f525b0279053bc9848f99f21016ea6295..62e24bebe03d65c0afb6abad8ffe278826eb6810 100644
--- a/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py
+++ b/profiler/msprof_analyze/cluster_analyse/analysis/communication_analysis.py
@@ -68,9 +68,11 @@ class CommunicationAnalysis(BaseAnalysis):
result_db = os.path.join(output_path, Constant.DB_CLUSTER_COMMUNICATION_ANALYZER)
DBManager.create_tables(result_db, self.COMMUNICATION_TIME_TABLE, self.COMMUNICATION_BANDWIDTH_TABLE)
conn, cursor = DBManager.create_connect_db(result_db)
- self.execute(conn, res_comm_time, self.COMMUNICATION_TIME_TABLE)
- self.execute(conn, res_comm_bandwidth, self.COMMUNICATION_BANDWIDTH_TABLE)
- DBManager.destroy_db_connect(conn, cursor)
+ try:
+ self.execute(conn, res_comm_time, self.COMMUNICATION_TIME_TABLE)
+ self.execute(conn, res_comm_bandwidth, self.COMMUNICATION_BANDWIDTH_TABLE)
+ finally:
+ DBManager.destroy_db_connect(conn, cursor)
def compute_total_info(self, comm_ops: dict):
if not comm_ops:
diff --git a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py
index f751de56fe3d622e705c481220cf4a6760b163d0..4d23292bf4629b027acef0f164154efa517f893f 100644
--- a/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py
+++ b/profiler/msprof_analyze/cluster_analyse/cluster_data_preprocess/msprof_data_preprocessor.py
@@ -14,6 +14,7 @@
# limitations under the License.
import os
import re
+import shlex
from collections import defaultdict
from msprof_analyze.cluster_analyse.cluster_data_preprocess.data_preprocessor import DataPreprocessor
@@ -56,6 +57,8 @@ class MsprofDataPreprocessor(DataPreprocessor):
prof_data_uid = defaultdict(list)
prof_data_rank = defaultdict(list)
for dir_name in self.path_list:
+ # 对dir_name进行转义处理,防止命令注入
+ escaped_dir = shlex.quote(dir_name)
info_json_file = self._find_info_json_file(dir_name)
if not info_json_file:
logger.error(f"Profiling data in not completed, please check the info.json file in the path {dir_name}")
@@ -68,12 +71,12 @@ class MsprofDataPreprocessor(DataPreprocessor):
self.data_type.add(Constant.TEXT)
else:
logger.error(f"The profiling data has not been fully parsed. You can parse it by executing "
- f"the following command: msprof --analyze=on --output={dir_name}")
+ f"the following command: msprof --analyze=on --output={escaped_dir}")
continue
else:
logger.error(f"The profiling data has not been fully parsed. You can parse it by executing "
- f"the following command: msprof --export=on --output={dir_name}; "
- f"msprof --analyze=on --output={dir_name}")
+ f"the following command: msprof --export=on --output={escaped_dir}; "
+ f"msprof --analyze=on --output={escaped_dir}")
continue
info_json = FileManager.read_json_file(info_json_file)
rank_id = info_json.get("rank_id")
diff --git a/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py b/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py
index 9678b91abd1a973f4720511b9f640740bd7236cf..2f07bab67d3d1ab34bd4514d393e24ef663f7ea6 100644
--- a/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py
+++ b/profiler/msprof_analyze/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py
@@ -22,7 +22,7 @@ class OperatorMemoryBean:
NA = "N/A"
def __init__(self, data: dict):
- self._data = data
+ self._data = data.copy()
self._name = ""
self._size = 0.0
self._allocation_time = Decimal(0)
diff --git a/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py b/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py
index b68029a57ec8398cd1e934d556034608afb0ba9b..592f2a11a29a2e3241b8875642336df26069f93a 100644
--- a/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py
+++ b/profiler/msprof_analyze/compare_tools/compare_backend/profiling_parser/npu_profiling_db_parser.py
@@ -75,6 +75,12 @@ class NPUProfilingDbParser:
self.comm_task_data = []
self.compute_op_data = []
+ def __del__(self):
+ try:
+ DBManager.destroy_db_connect(self.conn, self.cursor)
+ except Exception:
+ logger.warning(f"Failed to release database connection in NPUProfilingDbParser.")
+
def load_data(self) -> ProfilingResult:
self._prepare_data()
if self._enable_communication_compare:
diff --git a/profiler/msprof_analyze/prof_common/path_manager.py b/profiler/msprof_analyze/prof_common/path_manager.py
index 05970362ba2410da3ea281a4fb9a8812c9d1575a..c6ac5a1dcd4eb75029e40d7e8cab5262ea6869b9 100644
--- a/profiler/msprof_analyze/prof_common/path_manager.py
+++ b/profiler/msprof_analyze/prof_common/path_manager.py
@@ -179,7 +179,7 @@ class PathManager:
if os.path.exists(path):
return
try:
- os.makedirs(path, mode=cls.DATA_DIR_AUTHORITY)
+ os.makedirs(path, mode=cls.DATA_DIR_AUTHORITY, exist_ok=True)
except Exception as err:
raise RuntimeError(msg) from err