diff --git a/profiler/advisor/analyzer/analyzer_controller.py b/profiler/advisor/analyzer/analyzer_controller.py
index 833289816d9e84fec494f998bb61d14eb36597ce..74b1ef7f300607d73195dacd9f9848dd02f80732 100644
--- a/profiler/advisor/analyzer/analyzer_controller.py
+++ b/profiler/advisor/analyzer/analyzer_controller.py
@@ -18,6 +18,7 @@ from profiler.advisor.analyzer.cluster.slow_link_analyzer import SlowLinkAnalyze
from profiler.advisor.analyzer.computation.pp_stage_computation_analyzer import PPStageComputationAnalyzer
from profiler.advisor.analyzer.overall.overall_summary_analyzer import OverallSummaryAnalyzer
from profiler.advisor.config.config import Config
+from profiler.advisor.common import constant as const
from profiler.advisor.common.analyzer_scopes import SupportedScopes
from profiler.advisor.common.async_analysis_status import AsyncAnalysisStatus
from profiler.advisor.utils.utils import Timer, safe_index_value, safe_division, safe_index
@@ -141,7 +142,7 @@ class AnalyzerController:
self._do_analysis(dimensions, pid=pid, async_resp=resp, **kwargs)
except Exception as e:
- self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.FAILED_STATUS_CODE,
+ self._update_analysis_process_resp(pid, resp, status_code=AsyncAnalysisStatus.INNER_ERROR_STATUS_CODE,
status=AsyncAnalysisStatus.FAILED, error_msg=str(e))
logger.error(e)
raise RuntimeError(e)
@@ -157,7 +158,24 @@ class AnalyzerController:
return async_analysis_process
def get_response_by_pid(self, pid):
- return self.analysis_process_resp.get(pid)
+ def _is_pid_exists(pid):
+ try:
+ psutil.Process(pid)
+ return True
+ except psutil.NoSuchProcess:
+ return False
+
+ pid_not_exist_response = dict(id=pid, status_code=AsyncAnalysisStatus.NOT_FOUND_STATUS_CODE,
+ status=AsyncAnalysisStatus.FAILED,
+ error_msg="The advisor task id does not exist")
+ if pid not in self.analysis_process_resp:
+ return pid_not_exist_response
+
+ response = self.analysis_process_resp.get(pid)
+ if response.get("status") not in [AsyncAnalysisStatus.FAILED,
+ AsyncAnalysisStatus.SUCCESS] and not _is_pid_exists(pid):
+ return pid_not_exist_response
+ return response
def single_rank_analysis(self, profiling_path, benchmark_profiling_path=None):
job_list = []
@@ -223,7 +241,7 @@ class AnalyzerController:
overall_analyzer.optimize()
def schedule_analysis(self, profiling_path, benchmark_profiling_path=None, step=None, benchmark_step=None,
- **kwargs):
+ rank=None, **kwargs):
# 任意单卡的下发分析
kwargs = copy.deepcopy(self.kwargs)
@@ -233,6 +251,7 @@ class AnalyzerController:
kwargs["benchmark_profiling_path"] = benchmark_profiling_path
kwargs["step"] = step
kwargs["benchmark_step"] = benchmark_step
+ kwargs["rank"] = rank
for dimension in [Interface.SCHEDULE]:
for scope in Interface.get_scope(dimension):
@@ -241,7 +260,7 @@ class AnalyzerController:
return job_list
def computation_analysis(self, profiling_path, benchmark_profiling_path=None, step=None,
- benchmark_step=None, stage=None, **kwargs):
+ benchmark_step=None, stage=None, rank=None, **kwargs):
# 任意单卡的计算分析
kwargs = copy.deepcopy(self.kwargs)
@@ -250,6 +269,7 @@ class AnalyzerController:
kwargs["step"] = step
kwargs["benchmark_step"] = benchmark_step
kwargs["stage"] = stage
+ kwargs["rank"] = rank
job_list = []
for dimension in [Interface.COMPUTATION]:
@@ -260,7 +280,7 @@ class AnalyzerController:
job_list.append((dimension, scope, interface, kwargs))
return job_list
- def memory_analysis(self, profiling_path, benchmark_profiling_path=None, step=None, benchmark_step=None):
+ def memory_analysis(self, profiling_path, benchmark_profiling_path=None, step=None, benchmark_step=None, rank=None):
# 任意单卡的内存分析
kwargs = copy.deepcopy(self.kwargs)
@@ -270,6 +290,7 @@ class AnalyzerController:
kwargs["benchmark_profiling_path"] = benchmark_profiling_path
kwargs["step"] = step
kwargs["benchmark_step"] = benchmark_step
+ kwargs["rank"] = rank
for dimension in [Interface.MEMORY]:
for scope in Interface.get_scope(dimension):
@@ -301,12 +322,18 @@ class AnalyzerController:
job_list = []
global_step_rank = self.slow_rank_analyzer.get_global_step_rank(SlowRankAnalyzer.FREE)
- slow_rank_id = global_step_rank.get("maximum", {}).get("rank_id") or self.default_rank_id
+
+ slow_rank_id = global_step_rank.get("maximum", {}).get("rank_id")
+ if slow_rank_id is not None:
+ info_msg = f"Maximum free for rank {slow_rank_id}"
+ else:
+ slow_rank_id = self.default_rank_id
+ info_msg = f"No slow rank with free time, analysis for default rank {slow_rank_id}"
+
fast_rank_id = global_step_rank.get("minimum", {}).get("rank_id") or self.default_rank_id
slow_step = global_step_rank.get("maximum", {}).get("step")
fast_step = global_step_rank.get("minimum", {}).get("step")
- info_msg = f"Maximum free for rank {slow_rank_id}"
if slow_step:
info_msg += f" and step {slow_step}"
logger.info(info_msg)
@@ -398,14 +425,14 @@ class AnalyzerController:
if not self._check_profiling_path_valid(profiling_path):
error_msg = f"Got invalid argument '-d/--profiling_path' {profiling_path}, skip analysis"
self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
- status_code=AsyncAnalysisStatus.FAILED_STATUS_CODE,
+ status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE,
status=AsyncAnalysisStatus.FAILED)
logger.error(error_msg)
return
if benchmark_profiling_path and not self._check_profiling_path_valid(benchmark_profiling_path):
error_msg = f"Got invalid argument '-bp/--benchmark_profiling_path' {benchmark_profiling_path}, skip analysis"
self._update_analysis_process_resp(pid, async_resp, error_msg=error_msg,
- status_code=AsyncAnalysisStatus.FAILED_STATUS_CODE,
+ status_code=AsyncAnalysisStatus.BAD_REQUEST_STATUS_CODE,
status=AsyncAnalysisStatus.FAILED)
logger.error(error_msg)
return
@@ -472,6 +499,11 @@ class AnalyzerController:
def _profiling_comparison(self, compare_profiling_list):
job_list = []
+ disable_profiling_comparison = os.getenv(const.DISABLE_PROFILING_COMPARISON)
+ if disable_profiling_comparison is not None and disable_profiling_comparison.lower()=="true":
+ logger.info(
+ "Skip profiling comparison due to longer processing time due to env 'DISABLE_PROFILING_COMPARISON'")
+ return job_list
for index, _kwargs in enumerate(compare_profiling_list):
kwargs = copy.deepcopy(self.kwargs)
@@ -650,13 +682,17 @@ class AnalyzerController:
# 不区分stage,对所有卡取Min max进行分析
logger.info("Without pipeline parallel stage, steps and ranks to be analyzed are %s",
json.dumps(global_step_rank))
- slow_rank_id = global_step_rank.get("maximum", {}).get("rank_id") or self.default_rank_id
+ slow_rank_id = global_step_rank.get("maximum", {}).get("rank_id")
+ if slow_rank_id:
+ info_msg = f"Maximum computation time for rank {slow_rank_id}"
+ else:
+ slow_rank_id = self.default_rank_id
+ info_msg = f"No slow rank with computation time, analysis for default rank {slow_rank_id}"
slow_step = global_step_rank.get("maximum", {}).get("step")
# 如果没有标杆profiling数据的rank id,说明没有快慢卡问题,直接对默认rank id进行分析,因此这里取值为None
fast_rank_id = global_step_rank.get("minimum", {}).get("rank_id")
fast_step = global_step_rank.get("minimum", {}).get("step")
- info_msg = f"Maximum computation time for rank {slow_rank_id}"
if slow_step is not None:
info_msg += f" and step {slow_step}, "
if fast_rank_id is not None:
diff --git a/profiler/advisor/analyzer/cluster/slow_link_analyzer.py b/profiler/advisor/analyzer/cluster/slow_link_analyzer.py
index 9653a25c197e5c76f21863d9c9c905b6b2e0b3d5..259e5eb0c4255afc97aad83210b72a14b7285888 100644
--- a/profiler/advisor/analyzer/cluster/slow_link_analyzer.py
+++ b/profiler/advisor/analyzer/cluster/slow_link_analyzer.py
@@ -22,7 +22,7 @@ from profiler.advisor.common import constant
from profiler.advisor.result.result import OptimizeResult
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
from profiler.advisor.dataset.cluster.cluster_dataset import ClusterCommunicationDataset
-from profiler.advisor.utils.utils import safe_index_value
+from profiler.advisor.utils.utils import safe_index_value, convert_to_int
logger = logging.getLogger()
@@ -51,6 +51,7 @@ class SlowLinkAnalyzer(BaseAnalyzer):
self.result = OptimizeResult()
self.bottelneck = ''
self.suggestion = ''
+ self.format_datas = {}
if self.rank_bw_dict is not None:
self.format_datas = self.format_details()
@@ -104,7 +105,7 @@ class SlowLinkAnalyzer(BaseAnalyzer):
data_list = []
for step_rank, rank_bw in self.rank_bw_dict.items():
- step_rank_list = list(map(int, step_rank.split(constant.STEP_RANK_SEP)))
+ step_rank_list = list(map(convert_to_int, step_rank.split(constant.STEP_RANK_SEP)))
value_list = [rank_bw.get(i, 0) for i in headers]
data_list.append(step_rank_list + value_list)
data_list.sort(key=lambda x: (x[0], x[1])) # 按rank_id排序
@@ -147,6 +148,9 @@ class SlowLinkAnalyzer(BaseAnalyzer):
def get_global_step_rank(self, bindwidth_type):
global_step_rank = {}
+ if not self.format_datas:
+ return global_step_rank
+
bindwidth_key_map = {self.RDMA: self.RDMA_BANDWIDTH, self.SDMA: self.SDMA_BANDWIDTH}
if bindwidth_type not in bindwidth_key_map:
@@ -188,4 +192,4 @@ class SlowLinkAnalyzer(BaseAnalyzer):
return global_step_rank
def get_priority(self):
- pass
\ No newline at end of file
+ pass
diff --git a/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py b/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py
index efe32184b1ae1ebf025ade9ab65d474b9dc90672..bb3a8fdbd597a40b54a7274592d9efd7f82461b9 100644
--- a/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py
+++ b/profiler/advisor/analyzer/cluster/slow_rank_analyzer.py
@@ -20,8 +20,7 @@ from profiler.advisor.common import constant
from profiler.advisor.result.result import OptimizeResult
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
from profiler.advisor.dataset.cluster.cluster_dataset import ClusterStepTraceTimeDataset
-from profiler.advisor.utils.utils import safe_index_value
-from profiler.advisor.utils.utils import safe_division
+from profiler.advisor.utils.utils import safe_index_value, safe_division, convert_to_int
logger = logging.getLogger()
@@ -114,7 +113,7 @@ class SlowRankAnalyzer(BaseAnalyzer):
data_list = []
for key, value in self.step_trace_dict.items():
step, rank_id = key.split(constant.STEP_RANK_SEP)
- data_list.append([int(step), int(rank_id)] + value)
+ data_list.append([convert_to_int(step), convert_to_int(rank_id)] + value)
if step and step not in self._steps:
self._steps.add(step)
diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py
index 1c6aadb156a0439d2ae7572eb48fad7f82659f37..049952931a7007a2642e910f326ac59a3a648edd 100644
--- a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py
+++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_analyzer.py
@@ -33,9 +33,10 @@ class AICoreFreqAnalyzer(BaseAnalyzer):
add_render_list = kwargs.get("add_render_list", True)
ai_core_freq_checker = AICoreFreqChecker()
- ai_core_freq_checker.check_ai_core_freq(self.dataset, rank_id=kwargs.get("rank"), stage=kwargs.get("stage"))
+ ai_core_freq_checker.check_ai_core_freq(self.dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
ai_core_freq_checker.make_record(self.result)
- self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority())
+ self.html = ai_core_freq_checker.make_render(self.html_render, add_render_list, priority=self.get_priority(),
+ rank=kwargs.get("rank"))
return self.result
def get_priority(self):
diff --git a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py
index 05c5cd25ee09637487dac4c9c464abda5936624a..2fd49a22a35c41f3ff19c715d36a7103fde2e540 100644
--- a/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py
+++ b/profiler/advisor/analyzer/computation/ai_core_freq/ai_core_freq_checker.py
@@ -23,10 +23,10 @@ class AICoreFreqChecker:
self.decrease_freq_ops = []
self.headers = []
self.op_freq = None
- self.rank_id = None
+ self.rank = None
self.stage = None
- def check_ai_core_freq(self, event_dataset: ComputationAnalysisDataset, rank_id=None, stage=None):
+ def check_ai_core_freq(self, event_dataset: ComputationAnalysisDataset, rank=None, stage=None):
"""
:Param event_dataset: dataset of timeline event
"""
@@ -35,7 +35,7 @@ class AICoreFreqChecker:
"because no ai core frequency were recorded in trace_view.json")
return
- self.rank_id = rank_id
+ self.rank = rank
self.stage = stage
self.op_freq = event_dataset.op_freq
for op_name, op_info in self.op_freq.items():
@@ -67,8 +67,8 @@ class AICoreFreqChecker:
self.desc = (f"{len(self.decrease_freq_ops)} operators are found during frequency reduction, and the reduction "
f"ratio is larger than {self.DECREASE_FREQ_RATIO}.")
- if self.rank_id:
- self.desc = f"For rank {self.rank_id}, " + self.desc.lower()
+ if self.rank:
+ self.desc = f"For rank {self.rank}, " + self.desc.lower()
self.suggestions = "Please check the temperature or max power of your machine."
def make_record(self, result: OptimizeResult):
@@ -79,8 +79,8 @@ class AICoreFreqChecker:
return self.ai_core_freq_issues
sheet_name = "AI Core Frequency"
- if self.rank_id is not None:
- sheet_name = f"rank {self.rank_id} AI Core Frequency".capitalize()
+ if self.rank is not None:
+ sheet_name = f"rank {self.rank} AI Core Frequency".capitalize()
optimization_item = OptimizeItem(sheet_name, self.desc, [self.suggestions])
result.add(OptimizeRecord(optimization_item))
@@ -108,4 +108,5 @@ class AICoreFreqChecker:
headers=self.headers,
data=self.decrease_freq_ops[:self.SHOW_TOPK_OPS],
add_render_list=add_render_list,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
diff --git a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py
index 394ad74fd7dcb739caa1f69929646f98207b2aa8..0c1b454cc8fc62a1522843d90335da5fb6be5709 100644
--- a/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py
+++ b/profiler/advisor/analyzer/computation/aicpu/aicpu_checker.py
@@ -157,7 +157,8 @@ class AicpuChecker(OperatorChecker):
format_result=self.format_operator_result(record,
constant.OPERATOR_LIST_UNLIMIT),
add_render_list=add_render_list,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
def format_operator_result(self, record, limit):
"""
diff --git a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py
index 8b8e3fa9f6c98f8cabc98de60197894d6a34c541..cb6a824cb7b768bf8b9a2387ec90ee309d521445 100644
--- a/profiler/advisor/analyzer/computation/bound/block_dim_checker.py
+++ b/profiler/advisor/analyzer/computation/bound/block_dim_checker.py
@@ -62,7 +62,8 @@ class BlockDimChecker(OperatorChecker):
format_result=self.format_operator_result(record,
constant.OPERATOR_OUT_TOPK),
add_render_list=add_render_list,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
def _check_operator(self, op_info) -> bool:
if op_info.task_type not in ["AI_CORE", "AI_VECTOR_CORE", "MIX_AIC"]:
diff --git a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py
index 2096e9ffaf2e5e041716dea381e2d99824fefe0f..cc4e6f135c85339faef69e1dad5782d12fd597bd 100644
--- a/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py
+++ b/profiler/advisor/analyzer/computation/bound/operator_bound_checker.py
@@ -54,4 +54,5 @@ class OperatorBoundChecker(OperatorChecker):
format_result=self.format_operator_result(record,
constant.OPERATOR_OUT_TOPK),
add_render_list=add_render_list,
- priority_background_color=priority)
\ No newline at end of file
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
\ No newline at end of file
diff --git a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py
index 9cd75a6e9323c63686cdad1f7c5efc2a408f64d5..639bc994ea5cbf3d0b62aff29e7aa8a6a768f498 100644
--- a/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py
+++ b/profiler/advisor/analyzer/computation/op_compile/dynamic_shape_checker.py
@@ -12,8 +12,9 @@ logger = logging.getLogger()
class DynamicShapeChecker(OperatorChecker):
- ENABLE_COMPILED_SUGGESTION = "Optimize by enabling compiled operator, such as:\n" \
- "`torch_npu.npu.set_compile_mode(jit_compile=False)`\n"
+ ENABLE_COMPILED_SUGGESTION = "Please place the following code at the entrance of the python script to disable jit compile. " \
+ "Code: `torch_npu.npu.set_compile_mode(jit_compile=False); " \
+ "torch_npu.npu.config.allow_internal_format = False`"
_SUGGESTION: List[str] = [ENABLE_COMPILED_SUGGESTION]
_CHECKER = "dynamic shape operator"
_PROBLEM = "Dynamic shape operator"
@@ -28,13 +29,13 @@ class DynamicShapeChecker(OperatorChecker):
def check(self, profiling_database) -> bool:
return self.is_dynamic_shape(profiling_database)
- def make_record(self, profiling_database, rank_id=None) -> OptimizeRecord:
+ def make_record(self, profiling_database, rank=None) -> OptimizeRecord:
"""
make record for what and how to optimize
"""
- if rank_id is not None:
- self._PROBLEM = f"rank {rank_id} ".capitalize() + self._PROBLEM.lower()
+ if rank is not None:
+ self._PROBLEM = f"rank {rank} ".capitalize() + self._PROBLEM.lower()
optimization_item = OptimizeItem(
self._PROBLEM,
self._description,
@@ -56,7 +57,7 @@ class DynamicShapeChecker(OperatorChecker):
release_suggestion = copy.deepcopy(suggestion)
if release_suggestion == DynamicShapeChecker.ENABLE_COMPILED_SUGGESTION:
release_suggestion += \
- f"for details please refer to link : LINK"
+ f"for details please refer to link : LINK"
release_suggestion_list.append(release_suggestion.replace('\n', '
'))
format_result = {"record": record.__dict__, "suggestion": '
'.join(release_suggestion_list)}
return format_result
@@ -68,4 +69,5 @@ class DynamicShapeChecker(OperatorChecker):
template_name="operator_dynamic_shape.html",
format_result=self.format_operator_result(record),
add_render_list=add_render_list,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
diff --git a/profiler/advisor/analyzer/computation/operator_checker.py b/profiler/advisor/analyzer/computation/operator_checker.py
index 397b9d507ef3baf50efc4bae698a94c767bb8148..02b3e17f5517c43c9a7c28e6538fe88a633c2798 100644
--- a/profiler/advisor/analyzer/computation/operator_checker.py
+++ b/profiler/advisor/analyzer/computation/operator_checker.py
@@ -95,15 +95,15 @@ class OperatorChecker(VersionControl):
return True
return False
- def make_record(self, profiling_data: ProfilingDataset, rank_id=None):
+ def make_record(self, profiling_data: ProfilingDataset, rank=None):
"""
Make record for what and how to optimize
:param profiling_data: profiling data
:return: optimize record
"""
- if rank_id is not None:
- self._PROBLEM = f"rank {rank_id} ".capitalize() + self._PROBLEM.lower()
+ if rank is not None:
+ self._PROBLEM = f"rank {rank} ".capitalize() + self._PROBLEM.lower()
task_duration_list = [float(op_info.get_attr("task_duration")) for op_info in self._op_list if
hasattr(op_info, "get_attr")]
@@ -181,14 +181,14 @@ class OperatorChecker(VersionControl):
release_suggestion = copy.deepcopy(suggestion)
if release_suggestion == OperatorChecker.PyTorch_OPERATOR_TUNE_SUGGESTION:
release_suggestion += \
- (f"for details please refer to link : LINK")
+ (f"for details please refer to link : LINK")
elif release_suggestion == OperatorChecker.MSLite_OPERATOR_TUNE_SUGGESTION:
release_suggestion += \
(f"\nThe config file for MSLite AOE usage is as follows:\n" \
f"[ascend_context]\n" \
f"aoe_mode=\"operator tuning\"\n" \
f"--tune_ops_file={Config().tune_ops_file}\n"
- f"\nFor details please refer to link : LINK")
+ f"\nFor details please refer to link : LINK")
release_suggestion_list.append(release_suggestion.replace('\n', '
'))
format_result = {"record": record.__dict__,
"suggestion": fill('
'.join(release_suggestion_list), width=200),
diff --git a/profiler/advisor/analyzer/computation/pp_stage_computation_analyzer.py b/profiler/advisor/analyzer/computation/pp_stage_computation_analyzer.py
index 64a6c2ceba594ab3d9c34b17527c2119e13bca9b..b84b983c3f8da790bb2eb314f979ce79e47c3e5b 100644
--- a/profiler/advisor/analyzer/computation/pp_stage_computation_analyzer.py
+++ b/profiler/advisor/analyzer/computation/pp_stage_computation_analyzer.py
@@ -59,7 +59,7 @@ class PPStageComputationAnalyzer(BaseAnalyzer):
pass
def _optimize(self, profiling_path, **kwargs):
- stage_html_record = dict(stage=kwargs.get("stage"), rank_id=kwargs.get("rank"), step=kwargs.get("step"))
+ stage_html_record = dict(stage=kwargs.get("stage"), rank=kwargs.get("rank"), step=kwargs.get("step"))
kwargs["add_render_list"] = False
# stage 并行分析时,避免调用本身,即SupportedScopes.STAGE_COMPUTE
diff --git a/profiler/advisor/analyzer/computation/profiling_analyzer.py b/profiler/advisor/analyzer/computation/profiling_analyzer.py
index 20ebf1da7332ed8639f694a8cd9df16fec4ab0f4..a3e1b36fafd3ec8bee0e608f267703c0971995af 100644
--- a/profiler/advisor/analyzer/computation/profiling_analyzer.py
+++ b/profiler/advisor/analyzer/computation/profiling_analyzer.py
@@ -34,7 +34,7 @@ class ProfilingAnalyzer(BaseAnalyzer, ABC):
"""
profiling_data = self.get_first_data_by_key(self.dataset_list, ProfilingDataset.get_key())
checker = self.checker
- rank_id = kwargs.get("rank")
+ rank = kwargs.get("rank")
add_render_list = kwargs.get("add_render_list", True)
@@ -42,16 +42,16 @@ class ProfilingAnalyzer(BaseAnalyzer, ABC):
return self.result
if checker.check(profiling_data):
# add record
- record = checker.make_record(profiling_data, rank_id)
+ record = checker.make_record(profiling_data, rank)
self.html = checker.make_render(self.html_render, record, add_render_list,
- priority=self.get_priority(checker))
+ priority=self.get_priority(checker), rank=kwargs.get("rank"))
self.result.add(record)
# add details
details = checker.get_details()
if details:
for i, detail in enumerate(details):
- sheet_name = checker.get_name() if rank_id is None else \
- f"rank {rank_id} ".capitalize() + checker.get_name()
+ sheet_name = checker.get_name() if rank is None else \
+ f"rank {rank} ".capitalize() + checker.get_name()
if i == 0:
# the first row is header
self.result.add_detail(sheet_name, headers=detail)
diff --git a/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py
index 3d1a537c211a3ba26133f31e23284844d681d6e4..debbaa9eef493780f7e0d4ac2143f4e6dcc22f2e 100644
--- a/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py
+++ b/profiler/advisor/analyzer/dataloader/dataloader_analyzer.py
@@ -27,7 +27,7 @@ class DataloaderAnalyzer(BaseAnalyzer):
dataloader_checker = DataloaderChecker()
dataloader_checker.check_slow_dataloader(self.dataset)
dataloader_checker.make_record(self.result)
- dataloader_checker.make_render(self.html_render, priority=self.get_priority())
+ dataloader_checker.make_render(self.html_render, priority=self.get_priority(), rank=kwargs.get("rank"))
return self.result
def get_priority(self):
diff --git a/profiler/advisor/analyzer/dataloader/dataloader_checker.py b/profiler/advisor/analyzer/dataloader/dataloader_checker.py
index f392a0838ac03fd180c6f5201c7fc489f19a2ab7..376729a1b61cf838189ed86735bec56b7806a6b1 100644
--- a/profiler/advisor/analyzer/dataloader/dataloader_checker.py
+++ b/profiler/advisor/analyzer/dataloader/dataloader_checker.py
@@ -62,7 +62,8 @@ class DataloaderChecker:
template_name="slow_dataloader.html",
desc=self.desc,
suggestions=self.suggestions,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
def _init_rule(self):
dataloader_rule_path = os.path.join(
diff --git a/profiler/advisor/analyzer/memory/memory_analyzer.py b/profiler/advisor/analyzer/memory/memory_analyzer.py
index 097b6e17949b1afa14e3b5804795f1df593f6084..5f34b03f2cf74001c868c9caf9d8c84b6ff53630 100644
--- a/profiler/advisor/analyzer/memory/memory_analyzer.py
+++ b/profiler/advisor/analyzer/memory/memory_analyzer.py
@@ -25,7 +25,7 @@ class MemoryAnalyzer(BaseAnalyzer):
memory_checker = MemoryOpsChecker()
memory_checker.check_memory_ops(self.dataset)
memory_checker.make_record(self.result)
- memory_checker.make_render(self.html_render, priority=self.get_priority(memory_checker.max_mem_op_dur))
+ memory_checker.make_render(self.html_render, priority=self.get_priority(memory_checker.max_mem_op_dur), rank=kwargs.get("rank"))
return self.result
def get_priority(self, max_mem_op_dur):
diff --git a/profiler/advisor/analyzer/memory/memory_checker.py b/profiler/advisor/analyzer/memory/memory_checker.py
index b906ffbc89531a64d8f72fa6f478824bc8bf9d2c..b66761d7a4876e5c85965937c753241e96a7d55b 100644
--- a/profiler/advisor/analyzer/memory/memory_checker.py
+++ b/profiler/advisor/analyzer/memory/memory_checker.py
@@ -73,4 +73,5 @@ class MemoryOpsChecker:
template_name="memory.html",
desc=self.desc,
suggestions=self.suggestions,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=kwargs.get("rank"))
diff --git a/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py b/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py
index 58b2c301b590e74c054ef997b1973e7a595bbc73..126fe30176cf6ca0f1d7d3557c360f95af7b20be 100644
--- a/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py
+++ b/profiler/advisor/analyzer/schedule/dispatch/timeline_op_dispatch_analyzer.py
@@ -51,7 +51,7 @@ class OpDispatchAnalyzer(BaseAnalyzer):
"""
self.get_op_compile_info(self.dataset)
self.make_record(self.result)
- self.make_render(self.html_render)
+ self.make_render(self.html_render, rank=kwargs.get('rank'))
return self.result
def get_op_compile_info(self, event_dataset: ScheduleAnalysisDataset):
@@ -106,7 +106,8 @@ class OpDispatchAnalyzer(BaseAnalyzer):
template_name="operator_dispatch.html",
issues=issues,
optimizers=optimizations,
- priority_background_color=self.get_priority())
+ priority_background_color=self.get_priority(),
+ rank=kwargs.get("rank"))
def get_priority(self):
step_duration = getattr(self.dataset, "step_duration", None)
diff --git a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py
index 97f1d052a1e1d9223165e2465b6cbf0897f89069..b40e258818319d0c7428ce71a6d45bc9d1cc2026 100644
--- a/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py
+++ b/profiler/advisor/analyzer/schedule/fusion_ops/fusion_ops_analyzer.py
@@ -45,7 +45,7 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer):
logger.info("Finish timeline analysis")
self.make_record()
- self.make_render()
+ self.make_render(rank=kwargs.get("rank"))
return self.result
def find_fusion_ops(self, event_dataset, ops: str, npu_api: str, mode: str):
@@ -180,7 +180,8 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer):
detail = [api_name, *stack]
self.result.add_detail(sheet_name, detail=detail)
- def make_render(self):
+ def make_render(self, **kwargs):
+ rank = kwargs.get("rank")
format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True)
self.html_render.render_template(key="schedule",
@@ -192,7 +193,8 @@ class TimelineFusionOpsAnalyzer(BaseAnalyzer):
with_stack_doc_url=Config().timeline_with_stack_doc_url,
api_doc_url=Config().timeline_api_doc_url,
result=format_result_for_html,
- priority_background_color=self.get_priority())
+ priority_background_color=self.get_priority(),
+ rank=rank)
def query_stack(self, event_dataset):
if all([len(matched_index) == 0 for matched_index in self._matched_op_index.values()]):
diff --git a/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py b/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py
index 3d142819db8222106a6bd58eb4341d43a8ca3b59..a504a21c70a839b9fec29b99f52275839ef9bf10 100644
--- a/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py
+++ b/profiler/advisor/analyzer/schedule/gc/gc_analyzer.py
@@ -36,9 +36,9 @@ class GcAnalyzer(BaseAnalyzer):
@BaseAnalyzer.check_data((ScheduleAnalysisDataset.get_key(),))
def optimize(self, **kwargs):
gc_checker = GcChecker()
- gc_checker.check_gc(self.timeline_event_dataset, rank_id=kwargs.get("rank_id"), stage=kwargs.get("stage"))
+ gc_checker.check_gc(self.timeline_event_dataset, rank=kwargs.get("rank"), stage=kwargs.get("stage"))
gc_checker.make_record(self.result)
- gc_checker.make_render(self.html_render, priority=self.get_priority())
+ gc_checker.make_render(self.html_render, priority=self.get_priority(), rank=kwargs.get("rank"))
return self.result
def get_priority(self):
diff --git a/profiler/advisor/analyzer/schedule/gc/gc_checker.py b/profiler/advisor/analyzer/schedule/gc/gc_checker.py
index 1fbddf655758d4a4d31fe6925f6240b085c6c975..37a225ef8e597dece5f72d9f12d70846bd707188 100644
--- a/profiler/advisor/analyzer/schedule/gc/gc_checker.py
+++ b/profiler/advisor/analyzer/schedule/gc/gc_checker.py
@@ -13,13 +13,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
+import math
import os
from profiler.advisor.dataset.timeline_event_dataset import ScheduleAnalysisDataset
from profiler.advisor.result.result import OptimizeResult
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
+from profiler.advisor.utils.utils import convert_to_float, convert_to_int, safe_division
+from profiler.advisor.common import constant as const
from profiler.cluster_analyse.common_func.file_manager import FileManager
-from profiler.advisor.utils.utils import convert_to_float, convert_to_int
logger = logging.getLogger()
@@ -28,9 +30,11 @@ class GcChecker:
def __init__(self):
self.stage = None
- self.rank_id = None
+ self.rank = None
self.optimization_item = []
self.gc_issues = False
+ self.gc_problem_with_count = ""
+ self.gc_problem_with_free = ""
self.desc = ""
self.suggestions = []
self.solutions = None
@@ -42,15 +46,29 @@ class GcChecker:
self.headers = ["timestamp", "duration(us)"]
self._init_rule()
- def check_gc(self, event_dataset: ScheduleAnalysisDataset, rank_id=None, stage=None):
+ def check_gc(self, event_dataset: ScheduleAnalysisDataset, rank=None, stage=None):
"""
:Param event_dataset: dataset of timeline event
"""
if not hasattr(event_dataset, "gc_events"):
logger.debug("Skip gc checker, because no gc event found")
return
- self.rank_id = rank_id
+ self.rank = rank
self.stage = stage
+
+ # 当用户cann和pta版本不支持采集gc信息时,通过timeline中的free和cann层acl事件 综合判断是否可能存在free
+ if not event_dataset.gc_events:
+ acl_events = getattr(event_dataset, "acl_events", [])
+ large_free_events = getattr(event_dataset, "large_free_events", [])
+ # 如果acl_events为空,则没有采集cann信息,不基于free+acl events进行gc分析
+ if acl_events and large_free_events:
+ free_event = self.get_free_events_include_gc(large_free_events, acl_events)
+ if not free_event:
+ return
+ self.desc = self.gc_problem_with_free.format(free_duration_time=free_event.dur)
+
+ return
+
for gc_event in event_dataset.gc_events:
if convert_to_float(gc_event.dur) >= self.gc_threshold:
self.gc_issues = True
@@ -59,7 +77,8 @@ class GcChecker:
self.abnormal_gc_list.append([gc_event.ts, gc_event.dur])
self.abnormal_gc_duration = round(self.abnormal_gc_duration / 1000, 4)
self.abnormal_gc_list.sort(key=lambda x: x[1], reverse=True)
- self.desc = self.desc.format(gc_count=self.abnormal_gc_count, gc_total_time=self.abnormal_gc_duration)
+ self.desc = self.gc_problem_with_count.format(gc_count=self.abnormal_gc_count,
+ gc_total_time=self.abnormal_gc_duration)
def make_record(self, result: OptimizeResult):
"""
@@ -68,23 +87,24 @@ class GcChecker:
if not self.gc_issues:
return
- self.optimization_item.append(OptimizeItem("gc", self.desc, self.suggestions))
+ self.optimization_item.append(OptimizeItem("GC", self.desc, self.suggestions))
for optimization in self.optimization_item:
result.add(OptimizeRecord(optimization))
- if self.rank_id is not None:
+ if self.rank is not None:
self.headers = ["Rank id"] + self.headers
sub_table_name = "GcAnalysis" if not self.stage else f"Stage-{self.stage}: GcAnalysis"
result.add_detail(sub_table_name, headers=self.headers)
for row in self.abnormal_gc_list:
- if self.rank_id is not None:
- row = [self.rank_id] + row
+ if self.rank is not None:
+ row = [self.rank] + row
result.add_detail(sub_table_name, detail=row)
def make_render(self, html_render, **kwargs):
if not self.gc_issues:
return
priority = kwargs.get("priority")
+ rank = kwargs.get("rank")
show_num = min(self.gc_topk_num, self.abnormal_gc_count)
html_render.render_template(key="schedule",
template_dir="templates",
@@ -94,7 +114,8 @@ class GcChecker:
headers=self.headers,
datas=self.abnormal_gc_list[:show_num],
num=show_num,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=rank)
def _init_rule(self):
gc_rule_path = os.path.join(
@@ -107,9 +128,62 @@ class GcChecker:
self.gc_threshold = convert_to_float(gc_rule.get("gc_threshold", 0))
self.gc_topk_num = convert_to_int(gc_rule.get("top_num", 0))
- self.desc = gc_rule.get("problem", "")
+ self.gc_problem_with_count = gc_rule.get("gc_problem_with_count", "")
+ self.gc_problem_with_free = gc_rule.get("gc_problem_with_free", "")
+ self.max_acl_event_num_ratio = convert_to_float(gc_rule.get("max_acl_event_num_ratio"))
+ self.max_acl_event_time_ratio = convert_to_float(gc_rule.get("max_acl_event_time_ratio"))
self.solutions = gc_rule.get("solutions", [])
for solution in self.solutions:
for key, val in solution.items():
self.suggestions.append(f"{key}, {val.get('desc')}")
+
+ def get_free_events_include_gc(self, large_free_events, acl_events):
+ free_event_index, acl_event_index = 0, 0
+ free_include_acl_events = {}
+
+ while free_event_index < len(large_free_events) and acl_event_index < len(acl_events):
+ free_event = large_free_events[free_event_index]
+ free_event_name = f"{const.FREE}-{free_event_index}"
+ free_event_start_time = convert_to_float(free_event.ts)
+ free_event_end_time = free_event_start_time + convert_to_float(free_event.dur)
+
+ while acl_event_index < len(acl_events):
+ acl_event = acl_events[acl_event_index]
+ acl_event_index += 1
+ acl_event_start_time = convert_to_float(acl_event.ts)
+ acl_event_end_time = acl_event_start_time + convert_to_float(acl_event.dur)
+
+ if acl_event_start_time < free_event_start_time:
+ continue
+ if acl_event_end_time > free_event_end_time:
+ break
+
+ if free_event_name not in free_include_acl_events:
+ free_include_acl_events[free_event_name] = {}
+
+ if "acl_event_count" not in free_include_acl_events[free_event_name]:
+ free_include_acl_events[free_event_name]["acl_event_count"] = 0.0
+ free_include_acl_events[free_event_name]["acl_event_count"] += 1
+
+ if "acl_event_dur" not in free_include_acl_events[free_event_name]:
+ free_include_acl_events[free_event_name]["acl_event_dur"] = 0.0
+ free_include_acl_events[free_event_name]["acl_event_dur"] += convert_to_float(acl_event.dur)
+
+ free_event_index += 1
+
+ # 按free持续时间降序排列,优先判断持续时间最长的free
+ event_indexs = range(len(large_free_events))
+ for index, free_event in sorted(zip(event_indexs, large_free_events), key=lambda x: x[1].dur, reverse=True):
+
+
+ free_event_name = f"{const.FREE}-{index}"
+ free_duration = convert_to_float(free_event.dur)
+ acl_event_dur = free_include_acl_events.get(free_event_name, {}).get("acl_event_dur", 0.0)
+ acl_event_count = free_include_acl_events.get(free_event_name, {}).get("acl_event_count", 0.0)
+
+ if safe_division(acl_event_dur, free_duration) < self.max_acl_event_time_ratio and safe_division(
+ acl_event_count, free_duration) < self.max_acl_event_num_ratio:
+ self.gc_issues = True
+ return free_event
+ return None
diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py
index df8c22fa5161d8f4315748cb629a3dd19b79e39a..b123bc3cca848de964802c0920c1bb0ee2c187d9 100644
--- a/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py
+++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_analyzer.py
@@ -25,7 +25,7 @@ class SyncBNAnalyzer(BaseAnalyzer):
syncbn_checker = SyncBNChecker()
syncbn_checker.check_syncbn(self.timeline_event_dataset)
syncbn_checker.make_record(self.result)
- syncbn_checker.make_render(self.html_render, priority=self.get_priority())
+ syncbn_checker.make_render(self.html_render, priority=self.get_priority(), rank=kwargs.get("rank"))
return self.result
def get_priority(self):
diff --git a/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py
index e83a1549184b2a48f5ddc25ae15f6cece34825c2..04556ee743a5b9812aaf3b5dcda8aafcef81e4dd 100644
--- a/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py
+++ b/profiler/advisor/analyzer/schedule/syncbn/syncbn_checker.py
@@ -48,12 +48,14 @@ class SyncBNChecker:
return
priority = kwargs.get("priority")
+ rank = kwargs.get("rank")
html_render.render_template(key="schedule",
template_dir="templates",
template_name="sync_batchnorm.html",
desc=self.desc,
solutions=self.solutions,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=rank)
def _init_rule(self):
syncbn_rule_path = os.path.join(
diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py
index 61ec7d1fa602f8359ce2bf9d1ae0297151588ef3..965c2bcf3a1e8710e0aaed1c66f684fad800961c 100644
--- a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py
+++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_analyzer.py
@@ -1,11 +1,8 @@
import logging
-from typing import List, Dict, Any
-
from profiler.advisor.analyzer.base_analyzer import BaseAnalyzer
from profiler.advisor.result.result import OptimizeResult
from profiler.advisor.analyzer.schedule.synchronize_stream.synchronize_stream_checker import SynchronizeStreamChecker
-from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
from profiler.advisor.display.html.render import HTMLRender
from profiler.advisor.dataset.timeline_event_dataset import ScheduleAnalysisDataset
@@ -25,13 +22,12 @@ class SynchronizeStreamAnalyzer(BaseAnalyzer):
@BaseAnalyzer.check_data((ScheduleAnalysisDataset.get_key(),))
def optimize(self, **kwargs):
-
synchronize_stream_checker = SynchronizeStreamChecker()
synchronize_stream_checker.check_synchronize(self.timeline_event_dataset, kwargs.get("profiling_with_stack"))
synchronize_stream_checker.make_record(self.result)
- synchronize_stream_checker.make_render(self.html_render, priority=self.get_priority())
+ synchronize_stream_checker.make_render(self.html_render, priority=self.get_priority(synchronize_stream_checker),
+ rank=kwargs.get("rank"))
return self.result
-
- def get_priority(self):
- return PriorityBackgroundColor.low
\ No newline at end of file
+ def get_priority(self, synchronize_stream_checker):
+ return synchronize_stream_checker.priority
diff --git a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py
index 9136d611db74f07ef7d8811507e5923fdee18dbd..9f25c0c1a7006adac4c77fae7ef796876cb6d0e2 100644
--- a/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py
+++ b/profiler/advisor/analyzer/schedule/synchronize_stream/synchronize_stream_checker.py
@@ -1,12 +1,13 @@
import logging
+from profiler.advisor.analyzer.schedule.timeline_base_checker import TimelineBaseChecker
from profiler.advisor.common import constant as const
from profiler.advisor.config.config import Config
from profiler.advisor.dataset.timeline_event_dataset import ScheduleAnalysisDataset
+from profiler.advisor.display.html.priority_background_color import PriorityBackgroundColor
from profiler.advisor.result.result import OptimizeResult
from profiler.advisor.result.item import OptimizeItem, OptimizeRecord
-from profiler.advisor.analyzer.schedule.timeline_base_checker import TimelineBaseChecker
-from profiler.advisor.utils.utils import format_timeline_result
+from profiler.advisor.utils.utils import format_timeline_result, safe_division
logger = logging.getLogger()
@@ -20,23 +21,37 @@ class SynchronizeStreamChecker(TimelineBaseChecker):
self.desc = ""
self.suggestions = []
self.solutions = []
- self.max_synchronize_num = None
+ self.max_synchronize_num = 0
+ self.max_synchronize_num_ratio = 0
+ self.step_synchronize_num = 0
+ self.step_synchronize_num_ratio = 0
+ self.priority = None
def check_synchronize(self, event_dataset: ScheduleAnalysisDataset, profiling_with_stack=None):
"""
:Param event_dataset: dataset of timeline event
"""
if not hasattr(event_dataset, "synchronize_stream") or not getattr(event_dataset, "synchronize_stream"):
- logger.debug("Skip synchronize stream checker, because no synchronize stream found")
+ logger.info("Skip synchronize stream checker, because no synchronize stream found")
return
- synchronize_num = event_dataset.synchronize_stream.total_count
+ self.step_synchronize_num = event_dataset.synchronize_stream.total_count
+ self._cal_synchronize_stream_num_ratio(event_dataset)
+
slow_synchronize_stream = event_dataset.synchronize_stream.slow_synchronize_stream
total_slow_synchronize_time = sum((float(sync_stream.dur) for sync_stream in slow_synchronize_stream))
synchronize_stream_rule = event_dataset.synchronize_stream.rule
self.max_synchronize_num = synchronize_stream_rule.get("max_synchronize_num")
- self.synchronize_issues = synchronize_num >= self.max_synchronize_num and len(slow_synchronize_stream) > 0
+ self.max_synchronize_num_ratio = synchronize_stream_rule.get("max_synchronize_num_ratio")
+
+ is_reach_max_ratio_limit = self.step_synchronize_num_ratio >= self.max_synchronize_num_ratio
+ is_reach_max_num_limit = self.step_synchronize_num >= self.max_synchronize_num
+ is_reach_max_slow_num_limit = len(slow_synchronize_stream) > 0
+
+ self.priority = self.get_priority(is_reach_max_ratio_limit, is_reach_max_num_limit, is_reach_max_slow_num_limit)
+ self.synchronize_issues = is_reach_max_ratio_limit or is_reach_max_num_limit or is_reach_max_slow_num_limit
+
if not self.synchronize_issues:
return
@@ -47,7 +62,8 @@ class SynchronizeStreamChecker(TimelineBaseChecker):
self.query_stack(event_dataset, profiling_with_stack)
self.desc = synchronize_stream_rule.get("problem")
- self.desc = self.desc.format(synchronize_num=synchronize_num,
+ self.desc = self.desc.format(synchronize_num=self.step_synchronize_num,
+ synchronize_aten_ratio=self.step_synchronize_num_ratio,
slow_synchronize_num=len(slow_synchronize_stream),
total_synchronize_stream_time=total_slow_synchronize_time)
@@ -78,6 +94,7 @@ class SynchronizeStreamChecker(TimelineBaseChecker):
if not self.synchronize_issues:
return
priority = kwargs.get("priority")
+ rank = kwargs.get("rank")
format_result_for_html = format_timeline_result(dict(self.matched_op_stacks), dump_html=True)
html_render.render_template(key="schedule",
template_dir="templates",
@@ -88,4 +105,17 @@ class SynchronizeStreamChecker(TimelineBaseChecker):
with_stack_doc_url=Config().timeline_with_stack_doc_url,
empty_stacks=self.empty_stacks,
framework_black_list=self.framework_black_list,
- priority_background_color=priority)
+ priority_background_color=priority,
+ rank=rank)
+
+ def get_priority(self, is_reach_max_ratio_limit=None, is_reach_max_num_limit=None,
+ is_reach_max_slow_num_limit=None):
+ if is_reach_max_ratio_limit or is_reach_max_num_limit:
+ return PriorityBackgroundColor.high
+ if is_reach_max_slow_num_limit:
+ return PriorityBackgroundColor.low
+
+ def _cal_synchronize_stream_num_ratio(self, event_dataset):
+ if event_dataset.aten:
+ self.step_synchronize_num_ratio = round(safe_division(self.step_synchronize_num, len(event_dataset.aten)),
+ 4)
diff --git a/profiler/advisor/common/async_analysis_status.py b/profiler/advisor/common/async_analysis_status.py
index f67ca235a97c54cd107308a030a3b82d0eaf3352..36b41e0d55b0e1d4ae35f7ad68d23d3a9e7afe0d 100644
--- a/profiler/advisor/common/async_analysis_status.py
+++ b/profiler/advisor/common/async_analysis_status.py
@@ -3,5 +3,7 @@ class AsyncAnalysisStatus:
SUCCESS = "success"
ANALYZING = "analyzing"
- FAILED_STATUS_CODE = 400
+ BAD_REQUEST_STATUS_CODE = 400
+ NOT_FOUND_STATUS_CODE = 404
+ INNER_ERROR_STATUS_CODE = 500
NON_FAILED_STATUS_CODE = 200
diff --git a/profiler/advisor/common/constant.py b/profiler/advisor/common/constant.py
index 647856d3cd7c0f2378707344b5ba3bebd9d813e8..3030bf96c335c9f1654ac3d3a2731dd3380e64f9 100644
--- a/profiler/advisor/common/constant.py
+++ b/profiler/advisor/common/constant.py
@@ -27,6 +27,7 @@ OPTIMIZER_SEP = "#"
OPTIMIZER_STEP = "step"
ENQUEUE = "enqueue"
TORCH_TO_NPU = "torch_to_npu"
+FREE = "free"
OP_COMPILE_NAME = "AscendCL@aclopCompileAndExecute"
OP_COMPILE_ID = "aclopCompileAndExecute"
SYNC_STREAM = "AscendCL@aclrtSynchronizeStream"
@@ -146,3 +147,6 @@ MAX_READ_DB_FILE_BYTES = 8 * 1024 * 1024 * 1024
WRITE_MODES = stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP
WRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
+
+DISABLE_PROFILING_COMPARISON = "DISABLE_PROFILING_COMPARISON"
+FREE_DURATION_FOR_GC_ANALYSIS = "FREE_DURATION_FOR_GC_ANALYSIS"
\ No newline at end of file
diff --git a/profiler/advisor/dataset/timeline_event_dataset.py b/profiler/advisor/dataset/timeline_event_dataset.py
index e0a12fab6fa9e3dd381839e94572d401088461e0..659601587e1926c7e98d3a6542febde414b6b9cb 100644
--- a/profiler/advisor/dataset/timeline_event_dataset.py
+++ b/profiler/advisor/dataset/timeline_event_dataset.py
@@ -23,7 +23,9 @@ from profiler.advisor.dataset.timeline_op_collector.timeline_op_collector import
AclToNpuCollector,
OpStackCollector,
StepCollector,
- GcCollector
+ GcCollector,
+ FreeEventsCollector,
+ AclEventsCollector
)
logger = logging.getLogger()
@@ -162,7 +164,9 @@ class ScheduleAnalysisDataset(BaseTimelineEventDataset):
SyncBNCollector=SyncBNCollector(),
AtenCollector=AtenCollector(),
OptimizerCollector=OptimizerCollector(),
- GcCollector=GcCollector()
+ GcCollector=GcCollector(),
+ FreeEventsCollector=FreeEventsCollector(),
+ AclEventsCollector=AclEventsCollector()
)
def __init__(self, collection_path, data: dict, build_dataset=True, **kwargs) -> None:
diff --git a/profiler/advisor/dataset/timeline_op_collector/timeline_op_collector.py b/profiler/advisor/dataset/timeline_op_collector/timeline_op_collector.py
index 56e6165dd24aa4d7a8aafaab455793c6c8df8e13..5ea349ad9cea249158d061e8f4fee7361486fdfa 100644
--- a/profiler/advisor/dataset/timeline_op_collector/timeline_op_collector.py
+++ b/profiler/advisor/dataset/timeline_op_collector/timeline_op_collector.py
@@ -374,3 +374,53 @@ class GcCollector(BaseOpCollector):
def post_process(self, target_op_list, **kwargs):
self.attribute_to_dataset["gc_events"] = self.op_list
+
+
+class FreeEventsCollector(BaseOpCollector):
+ def __init__(self):
+ super().__init__()
+
+ @staticmethod
+ def _load_rule():
+ sync_stream_rule_path = os.path.join(
+ os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))),
+ "rules",
+ "gc.yaml")
+
+ gc_rule = FileManager.read_yaml_file(sync_stream_rule_path)
+ return gc_rule
+
+ def add_op(self, event):
+ if event.name.lower() == const.FREE:
+ self.op_list.append(event)
+
+ def post_process(self, target_op_list, **kwargs):
+ gc_rule = self._load_rule()
+ if os.getenv(const.FREE_DURATION_FOR_GC_ANALYSIS):
+ max_free_threshold = convert_to_float(os.getenv(const.FREE_DURATION_FOR_GC_ANALYSIS))
+ else:
+ max_free_threshold = gc_rule.get("max_free_threshold")
+
+ large_free_events = []
+
+ for op in target_op_list:
+ if convert_to_float(op.dur) > max_free_threshold:
+ large_free_events.append(op)
+
+ large_free_events.sort(key=lambda x: convert_to_float(x.ts))
+ self.attribute_to_dataset["large_free_events"] = large_free_events
+
+
+class AclEventsCollector(BaseOpCollector):
+ ACL_EVENT_PREFIX = "AscendCL@"
+
+ def __init__(self):
+ super().__init__()
+
+ def add_op(self, event):
+ if event.name.startswith(self.ACL_EVENT_PREFIX):
+ self.op_list.append(event)
+
+ def post_process(self, target_op_list, **kwargs):
+ target_op_list.sort(key=lambda x: convert_to_float(x.ts))
+ self.attribute_to_dataset["acl_events"] = target_op_list
diff --git a/profiler/advisor/display/html/templates/affinity_api.html b/profiler/advisor/display/html/templates/affinity_api.html
index e9f3dd29c433c6d2481fe755ab5426d42f94a50f..7cd3d7ad33d0220c7aba055721eddf049161a0d8 100644
--- a/profiler/advisor/display/html/templates/affinity_api.html
+++ b/profiler/advisor/display/html/templates/affinity_api.html
@@ -2,6 +2,9 @@