From 1254b2c9cae889d5cd600b09b792b4770cd79424 Mon Sep 17 00:00:00 2001
From: wuyulong11 <wuyulong11@huawei.com>
Date: Thu, 30 Nov 2023 19:15:04 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E4=BF=A1=E6=81=AF?=
 =?UTF-8?q?=E3=80=91=E3=80=90tbplugin=E3=80=91=E3=80=90=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E5=8D=95=E3=80=91Memory=E7=95=8C=E9=9D=A2=E6=97=B6=E9=97=B4?=
 =?UTF-8?q?=E6=98=BE=E7=A4=BA=E7=B2=BE=E5=BA=A6=E6=94=B9=E4=B8=BAus?=
 =?UTF-8?q?=E7=BA=A7=E5=88=AB=20=E3=80=90=E4=BF=AE=E6=94=B9=E4=BA=BA?=
 =?UTF-8?q?=E3=80=91=20wuyulong=2030031080?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fe/src/components/charts/NewLineChart.tsx |  3 +-
 .../torch_tb_profiler/profiler/data.py        | 23 ++++----
 .../profiler/run_generator.py                 | 52 +++++++++----------
 .../tb_plugin/torch_tb_profiler/run.py        | 10 ++--
 .../tb_plugin/torch_tb_profiler/utils.py      |  2 +-
 5 files changed, 47 insertions(+), 43 deletions(-)
diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx
index 5bcbd445c..af350e93d 100644
--- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx
+++ b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx
@@ -364,8 +364,7 @@ export const LineChart: React.FC<IProps> = (props) => {
   }, [graph, height, resizeEventDependency])
 
   React.useEffect(() => {
-    const compare_fn = (key: number, mid: Array<number>) =>
-      key - parseFloat(mid[0].toFixed(2))
+    const compare_fn = (key: number, mid: Array<number>) => key - mid[0]
     if (chartObj && tag === 'Operator') {
       if (record) {
         let startId = -1
diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py
index 909cbc555..ba423019a 100644
--- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py
+++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py
@@ -139,7 +139,7 @@ class RunProfileData(object):
 
     @staticmethod
     def parse_gpu(worker, span, path, cache_dir):
-        trace_path, trace_json, _ = RunProfileData._preprocess_file(path, cache_dir, 'GPU')
+        trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU')
 
         profile = RunProfileData.from_json(worker, span, trace_json)
         profile.trace_file_path = trace_path
@@ -149,7 +149,6 @@ class RunProfileData(object):
     def parse_npu(worker, span, path, cache_dir):
         trace_json = {}
         trace_path = path
-        start_ts = 0
         has_trace = False
         has_kernel = False
         has_memory_record = False
@@ -160,13 +159,14 @@ class RunProfileData(object):
             if utils.is_npu_trace_path(file):
                 has_trace = True
                 trace_file = io.join(path, file)
-                trace_path, trace_json, start_ts = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend')
+                trace_path, trace_json = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend')
                 break
 
-        profile = RunProfileData.from_json(worker, span, trace_json)
+        profile = RunProfileData(worker, span, trace_json)
         profile.trace_file_path = trace_path
         profile.has_trace = has_trace
-        profile.profiler_start_ts = 0 if math.isinf(start_ts) else start_ts
+        if math.isinf(profile.profiler_start_ts):
+            profile.profiler_start_ts = 0
 
         for file in io.listdir(path):
             if str(file) == 'kernel_details.csv':
@@ -193,6 +193,14 @@ class RunProfileData(object):
         profile.has_kernel = has_kernel
         profile.has_memory = has_memory_operator and has_memory_record
         profile.has_communication = has_communication_wait_ops and has_communication_overlap
+        if profile.has_communication:
+            with utils.timing('EventParser.parse'):
+                parser = EventParser()
+                with utils.timing('EventParser: parse steps times'):
+                    # Process steps
+                    parser.parse_steps(profile.events, parser.communication_data)
+
+            profile.steps_names = parser.steps_names
         return profile
 
     @staticmethod
@@ -235,10 +243,7 @@ class RunProfileData(object):
         event_list = trace_json['traceEvents']
         end_index = None
         start_index = None
-        start_ts = float('inf')
         for i in reversed(range(len(event_list))):
-            if event_list[i].get('ts') is not None:
-                start_ts = min(start_ts, float(event_list[i]['ts']))
             if device_target != 'Ascend':
                 if event_list[i]['name'] == 'Record Window End':
                     end_index = i
@@ -260,7 +265,7 @@ class RunProfileData(object):
                 fzip.write(json.dumps(trace_json))
             trace_path = fp.name
 
-        return trace_path, trace_json, start_ts
+        return trace_path, trace_json
 
     def process(self):
         with utils.timing('EventParser.parse'):
diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py
index f13948aa3..4184e3830 100644
--- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py
+++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py
@@ -57,6 +57,7 @@ class RunGenerator(object):
         profile_run.has_communication = self.profile_data.has_communication
         profile_run.has_memcpy_or_memset = self.profile_data.has_memcpy_or_memset
         profile_run.profiler_start_ts = self.profile_data.profiler_start_ts
+        profile_run.device_target = self.device_target
 
         if self.device_target != 'Ascend':
             profile_run.views.append(consts.OVERALL_VIEW)
@@ -81,6 +82,26 @@ class RunGenerator(object):
             if self.profile_data.memory_snapshot:
                 profile_run.views.append(consts.MEMORY_VIEW)
                 profile_run.memory_snapshot = self.profile_data.memory_snapshot
+
+            profile_run.gpu_metrics = self.profile_data.gpu_metrics_parser.get_gpu_metrics()
+
+            gpu_infos = {gpu_id: RunGenerator._get_gpu_info(self.profile_data.device_props, gpu_id)
+                         for gpu_id in self.profile_data.gpu_metrics_parser.gpu_ids}
+            gpu_infos = {gpu_id: gpu_info for gpu_id, gpu_info in gpu_infos.items() if gpu_info is not None}
+
+            profile_run.gpu_summary, profile_run.gpu_tooltip = \
+                self.profile_data.gpu_metrics_parser.get_gpu_metrics_data_tooltip(
+                    gpu_infos, self.profile_data.tc_ratio)
+
+            profile_run.tid2tree = self.profile_data.tid2tree
+            profile_run.pl_tid2tree = self.profile_data.pl_tid2tree
+
+            profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events)
+            profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events)
+            if profile_run.is_pytorch_lightning and profile_run.pl_module_stats:
+                profile_run.views.append(consts.LIGHTNING_VIEW)
+            elif profile_run.module_stats:
+                profile_run.views.append(consts.MODULE_VIEW)
         else:
             if self.profile_data.has_operator_view:
                 profile_run.views.append(consts.OP_VIEW)
@@ -114,27 +135,6 @@ class RunGenerator(object):
             profile_run.views.append(consts.TRACE_VIEW)
             profile_run.trace_file_path = self.profile_data.trace_file_path
 
-        profile_run.gpu_metrics = self.profile_data.gpu_metrics_parser.get_gpu_metrics()
-
-        gpu_infos = {gpu_id: RunGenerator._get_gpu_info(self.profile_data.device_props, gpu_id)
-                     for gpu_id in self.profile_data.gpu_metrics_parser.gpu_ids}
-        gpu_infos = {gpu_id: gpu_info for gpu_id, gpu_info in gpu_infos.items() if gpu_info is not None}
-
-        profile_run.gpu_summary, profile_run.gpu_tooltip = \
-            self.profile_data.gpu_metrics_parser.get_gpu_metrics_data_tooltip(
-                gpu_infos, self.profile_data.tc_ratio)
-
-        profile_run.tid2tree = self.profile_data.tid2tree
-        profile_run.pl_tid2tree = self.profile_data.pl_tid2tree
-        profile_run.device_target = self.device_target
-
-        profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events)
-        profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events)
-        if profile_run.is_pytorch_lightning and profile_run.pl_module_stats:
-            profile_run.views.append(consts.LIGHTNING_VIEW)
-        elif profile_run.module_stats:
-            profile_run.views.append(consts.MODULE_VIEW)
-
         return profile_run
 
     def _npu_get_overlap(self):
@@ -436,9 +436,9 @@ class RunGenerator(object):
             # convert time metric 'us' to 'ms'
             # some operators may not have the following columns
             nums = [ls[0] if ls[0] else '<unknown>', abs(float(ls[1])),
-                    round((float(ls[2]) - self.profile_data.profiler_start_ts) / 1000, 2) if ls[2] else None,
-                    round((float(ls[3]) - self.profile_data.profiler_start_ts) / 1000, 2) if ls[3] else None,
-                    round(float(ls[4]) / 1000, 2) if ls[4] else None]
+                    round((float(ls[2]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[2] else None,
+                    round((float(ls[3]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[3] else None,
+                    round(float(ls[4]) / 1000, 3) if ls[4] else None]
             display_datas[device_type].append(nums)
         table['rows'] = display_datas
         for name in display_datas:
@@ -580,7 +580,7 @@ class RunGenerator(object):
             logger.error('Required column is missing in file "memory_record.csv"')
         else:
             for ls in datas[1:]:
-                time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 2)
+                time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3)
                 device_type = ls[device_type_idx]
                 if ls[tag_type_idx] == 'PTA+GE':
                     process_data.setdefault(device_type, {}).setdefault('Allocated', []).append(
@@ -639,7 +639,7 @@ class RunGenerator(object):
         reserved_idx = memory_curve_id_dict.get('reserved_idx')
         tag_type_idx = memory_curve_id_dict.get('tag_type_idx')
         time_idx = memory_curve_id_dict.get('time_idx')
-        time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 2)
+        time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3)
         for item in peak_memory_rows[ls[device_type_idx]]:
             if item[0] == ls[tag_type_idx]:
                 if item[1] < ls[reserved_idx]:
diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py
index edbe5d230..2f719fb0c 100644
--- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py
+++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py
@@ -192,7 +192,7 @@ class RunProfile(object):
 
     def get_memory_stats(self, start_ts=None, end_ts=None, memory_metric='K'):
         cano = Canonicalizer(memory_metric=memory_metric)
-        rounder = DisplayRounder(ndigits=2)
+        rounder = DisplayRounder(ndigits=3)
 
         stats = self.memory_snapshot.get_memory_statistics(self.tid2tree, start_ts=start_ts, end_ts=end_ts)
 
@@ -281,9 +281,9 @@ class RunProfile(object):
                     continue
 
                 curves[dev].append([
-                    cano.convert_time(ts - self.profiler_start_ts),
-                    cano.convert_memory(ta),
-                    cano.convert_memory(tr),
+                    round(cano.convert_time(ts - self.profiler_start_ts), 3),
+                    round(cano.convert_memory(ta), 3),
+                    round(cano.convert_memory(tr), 3),
                 ])
                 peaks[dev] = max(peaks[dev], ta)
 
@@ -366,7 +366,7 @@ class RunProfile(object):
             return name
 
         cano = Canonicalizer(time_metric=time_metric, memory_metric=memory_metric)
-        rounder = DisplayRounder(ndigits=2)
+        rounder = DisplayRounder(ndigits=3)
 
         profiler_start_ts = self.profiler_start_ts
         memory_records = RunProfile._filtered_by_ts(self.memory_snapshot.memory_records, start_ts, end_ts)
diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py
index be87e02eb..8f4189d76 100644
--- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py
+++ b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py
@@ -130,7 +130,7 @@ class DisplayRounder:
     def __call__(self, v: float):
         _v = abs(v)
         if _v >= self.precision or v == 0:
-            return round(v, 2)
+            return round(v, 3)
         else:
             ndigit = abs(math.floor(math.log10(_v)))
             return round(v, ndigit)
-- 
Gitee