From 2d5e77cf61b80c76b56840be1191f5d673a37fd7 Mon Sep 17 00:00:00 2001 From: fanglanyue Date: Fri, 13 Jun 2025 11:25:23 +0800 Subject: [PATCH] bugfix: cluster dataset init rank_bw_dict with bandwidth keys --- .../msprof_analyze/advisor/analyzer/analyzer_controller.py | 2 +- .../msprof_analyze/advisor/dataset/cluster/cluster_dataset.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/profiler/msprof_analyze/advisor/analyzer/analyzer_controller.py b/profiler/msprof_analyze/advisor/analyzer/analyzer_controller.py index bde9e5cd34..869c0519cf 100644 --- a/profiler/msprof_analyze/advisor/analyzer/analyzer_controller.py +++ b/profiler/msprof_analyze/advisor/analyzer/analyzer_controller.py @@ -188,7 +188,7 @@ class AnalyzerController: def _get_step_rank_for_cluster_statistic_diff(target_cluster_statistic_data, benchmark_cluster_statistic_data, headers, dimension, get_max=False): if dimension not in headers: - logger.error("Error dimension %s for cluster statistics data, optionals are %s.", dimension, headers) + logger.warning("Invalid dimension %s for cluster statistics data, optionals are %s.", dimension, headers) return None, None, None dimension_index = safe_index_value(headers, dimension) diff --git a/profiler/msprof_analyze/advisor/dataset/cluster/cluster_dataset.py b/profiler/msprof_analyze/advisor/dataset/cluster/cluster_dataset.py index c976ac0f9d..888c65f750 100644 --- a/profiler/msprof_analyze/advisor/dataset/cluster/cluster_dataset.py +++ b/profiler/msprof_analyze/advisor/dataset/cluster/cluster_dataset.py @@ -228,8 +228,10 @@ class ClusterCommunicationDataset(ClusterDataset): return { self.RDMA_TIME_MS: 0, self.RDMA_SIZE_MB: 0, + self.RDMA_BANDWIDTH: 0, self.SDMA_TIME_MS: 0, self.SDMA_SIZE_MB: 0, + self.SDMA_BANDWIDTH: 0 } def process(self, communication_json: dict): @@ -328,7 +330,6 @@ class ClusterCommunicationDataset(ClusterDataset): if row.band_type == self.SDMA: self.rank_bw_dict[row.step_rank][self.SDMA_SIZE_MB] = row.transit_size self.rank_bw_dict[row.step_rank][self.SDMA_TIME_MS] = row.transit_time - self.rank_bw_dict[row.step_rank][self.SDMA_TIME_MS] = row.transit_time self.rank_bw_dict[row.step_rank][self.SDMA_BANDWIDTH] = row.bandwidth elif row.band_type == self.RDMA: self.rank_bw_dict[row.step_rank][self.RDMA_SIZE_MB] = row.transit_size -- Gitee