From ae5ff82d0f854286c6461ce380d75ea35b227a2e Mon Sep 17 00:00:00 2001
From: jiangmianjiao <jiangmianjiao@huawei.com>
Date: Wed, 17 May 2023 09:54:49 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=E3=80=91=20memory=20view=E9=9C=80=E6=B1=82=E5=BC=80=E5=8F=91?=
 =?UTF-8?q?=20=E3=80=90=E4=BF=AE=E6=94=B9=E4=BA=BA=E3=80=91=20jiangmianjia?=
 =?UTF-8?q?o=2030036454?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tb_plugin/fe/src/components/Kernel.tsx    |   2 +-
 .../tb_plugin/torch_tb_profiler/plugin.py     |  24 ++--
 .../torch_tb_profiler/profiler/data.py        |  14 ++
 .../profiler/run_generator.py                 | 129 ++++++++++++++++--
 .../tb_plugin/torch_tb_profiler/run.py        |   5 +
 5 files changed, 155 insertions(+), 19 deletions(-)
diff --git a/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx b/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx
index aecb86b3ea3..89c2f5b2c52 100644
--- a/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx
+++ b/tb_plugins/profiling/tb_plugin/fe/src/components/Kernel.tsx
@@ -275,7 +275,7 @@ export const Kernel: React.FC<IProps> = (props) => {
                           value={searchOpName}
                           onChange={onSearchOpChanged}
                           type="search"
-                          label="Search by Step ID"
+                          label="Search by Step Id"
                         />
                       </Grid>)
                     :
diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py
index 798fc5fb84f..25d7ede912a 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/plugin.py
@@ -291,17 +291,22 @@ class TorchProfilerPlugin(base_plugin.TBPlugin):
             start_ts = int(start_ts)
         if end_ts is not None:
             end_ts = int(end_ts)
-
-        return self.respond_as_json(
-            profile.get_memory_stats(start_ts=start_ts, end_ts=end_ts, memory_metric=memory_metric), True)
+        if profile.device_target == 'Ascend':
+            return None
+        else:
+            return self.respond_as_json(
+                profile.get_memory_stats(start_ts=start_ts, end_ts=end_ts, memory_metric=memory_metric), True)
 
     @wrappers.Request.application
     def memory_curve_route(self, request: werkzeug.Request):
         profile = self._get_profile_for_request(request)
         time_metric = request.args.get('time_metric', 'ms')
         memory_metric = request.args.get('memory_metric', 'MB')
-        return self.respond_as_json(
-            profile.get_memory_curve(time_metric=time_metric, memory_metric=memory_metric), True)
+        if profile.device_target == 'Ascend':
+            return self.respond_as_json(profile.memory_all_curve, True)
+        else:
+            return self.respond_as_json(
+                profile.get_memory_curve(time_metric=time_metric, memory_metric=memory_metric), True)
 
     @wrappers.Request.application
     def memory_events_route(self, request: werkzeug.Request):
@@ -315,9 +320,12 @@ class TorchProfilerPlugin(base_plugin.TBPlugin):
         if end_ts is not None:
             end_ts = int(end_ts)
 
-        return self.respond_as_json(
-            profile.get_memory_events(start_ts, end_ts, time_metric=time_metric,
-                                      memory_metric=memory_metric), True)
+        if profile.device_target == 'Ascend':
+            return self.respond_as_json(profile.memory_events, True)
+        else:
+            return self.respond_as_json(
+                profile.get_memory_events(start_ts, end_ts, time_metric=time_metric,
+                                          memory_metric=memory_metric), True)
 
     @wrappers.Request.application
     def module_route(self, request: werkzeug.Request):
diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py
index 4637527cda3..224c626e8ac 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py
@@ -75,6 +75,7 @@ class RunProfileData(object):
         self.steps_costs = None
         self.steps_names = None
         self.avg_costs = None
+        self.has_memory: bool = False
 
         # GPU parser
         self.gpu_metrics_parser: GPUMetricsParser = None
@@ -100,6 +101,10 @@ class RunProfileData(object):
         # recommendation based on analysis result.
         self.recommendations = []
 
+        # npu memory data
+        self.memory_form_path: str = None
+        self.memory_line_path: str = None
+
     @staticmethod
     def parse_gpu(worker, span, path, cache_dir):
         trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU')
@@ -114,6 +119,8 @@ class RunProfileData(object):
         trace_path = path
         has_trace = False
         has_kernel = False
+        has_memory_line = False
+        has_memory_form = False
         for file in io.listdir(path):
             if utils.is_npu_trace_path(file):
                 has_trace = True
@@ -129,7 +136,14 @@ class RunProfileData(object):
             if str(file) == 'kernel_details.csv':
                 has_kernel = True
                 profile.kernel_file_path = io.join(path, file)
+            if str(file) == 'memory_view_line_chart.csv':
+                has_memory_line = True
+                profile.memory_line_path = io.join(path, file)
+            if str(file) == 'memory_view_form.csv':
+                has_memory_form = True
+                profile.memory_form_path = io.join(path, file)
         profile.has_kernel = has_kernel
+        profile.has_memory = has_memory_form and has_memory_line
         return profile
 
     @staticmethod
diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py
index 9107947a5ea..0045a9cfe20 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py
@@ -1,7 +1,7 @@
 # -------------------------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # --------------------------------------------------------------------------
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from typing import Dict, Iterable, List
 import csv
 
@@ -11,6 +11,7 @@ from .data import DistributedRunProfileData, RunProfileData
 from .module_op import aggegate_module_view, aggegate_pl_module_view
 from .op_agg import KernelAggByNameOp, OperatorAgg
 from .overall_parser import ProfileRole
+from ..utils import Canonicalizer
 
 logger = utils.get_logger()
 
@@ -78,6 +79,14 @@ class RunGenerator(object):
             profile_run.views.append(consts.MEMORY_VIEW)
             profile_run.memory_snapshot = self.profile_data.memory_snapshot
 
+        profile_run.device_target = self.device_target
+        if self.device_target == 'Ascend':
+            if self.profile_data.has_memory:
+                profile_run.views.append(consts.MEMORY_VIEW)
+                profile_run.memory_div_curve = None
+                profile_run.memory_all_curve = self._get_memory_all_curve()
+                profile_run.memory_events = self._get_memory_event()
+
         profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events)
         profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events)
         if profile_run.is_pytorch_lightning and profile_run.pl_module_stats:
@@ -87,6 +96,105 @@ class RunGenerator(object):
 
         return profile_run
 
+    def _get_memory_event(self):
+        display_columns = ('Operator', 'Size(KB)', 'Allocation Time(us)', 'Release Time(us)', 'Duration(us)')
+        path = self.profile_data.memory_form_path
+        display_datas = defaultdict(list)
+        devices_type = []
+        table = {
+            'metadata': {
+                'title': 'Memory Events',
+                'default_device': 'all',
+            },
+            'columns': [],
+            'rows': {}
+        }
+        datas = self._get_csv_data(path)
+        for idx, column in enumerate(datas[0]):
+            if column == 'Device Type':
+                self.device_type_form_idx = idx
+            if column in display_columns:
+                if column == 'Operator':
+                    table['columns'].append({'name': column, 'type': 'string'})
+                else:
+                    table['columns'].append({'name': column, 'type': 'number'})
+        for ls in datas[1:]:
+            device_type = ls[self.device_type_form_idx]
+            nums = [ls[1], float(ls[2]), float(ls[3])]
+            if ls[4]:
+                nums.append(float(ls[4]))
+            if ls[5]:
+                nums.append(round(float(ls[5]), 2))
+            display_datas[device_type].append(nums)
+        table['rows'] = display_datas
+        for name in display_datas:
+            devices_type.append(name)
+        table['metadata'].update({'default_device': devices_type[0]})
+        return table
+
+    def _get_memory_all_curve(self):
+        time_metric: str = 'us'
+        memory_metric: str = 'KB'
+        cano = Canonicalizer(time_metric, memory_metric)
+        pta_and_ge_data, pta_or_ge_data = self._handle_memory_data()
+        devices_type, peaks = self._get_peaks_and_devices_type()
+        result = {
+            'metadata': {
+                'default_device': devices_type[0],
+                'devices': devices_type,
+                'peaks': peaks,
+                'totals': {},
+                'first_ts': 0,
+                'time_metric': cano.time_metric,
+                'memory_metric': cano.memory_metric,
+                'time_factor': cano.time_factor,
+                'memory_factor': cano.memory_factor,
+            },
+            'columns': [
+                {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'},
+                {'name': f'Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'Total memory in use.'},
+                {'name': f'Reserved ({cano.memory_metric})', 'type': 'number',
+                 'tooltip': 'Total reserved memory by allocator, both used and unused.'},
+            ],
+            'rows': pta_and_ge_data,
+        }
+        return result
+
+    def _get_peaks_and_devices_type(self):
+        devices_type = []
+        peaks = {}
+        pta_and_ge_data, pta_or_ge_data = self._handle_memory_data()
+        for name in pta_and_ge_data:
+            devices_type.append(name)
+            max_reserved = 0
+            for array_value in pta_and_ge_data.get(name):
+                max_reserved = max(array_value[2], max_reserved)
+            peaks[name] = 'Peak Memory Usage: {:.1f}'.format(max_reserved)
+        return devices_type, peaks
+
+    def _handle_memory_data(self):
+        pta_and_ge_data = defaultdict(list)
+        pta_or_ge_data = {}
+        path = self.profile_data.memory_line_path
+        datas = self._get_csv_data(path)
+        for idx, column in enumerate(datas[0]):
+            if column == 'Tag':
+                self.tag_type_idx = idx
+            if column == 'Device Type':
+                self.device_type_idx = idx
+            if column == 'Timestamp(us)':
+                self.time_idx = idx
+            if column == 'Total Reserved(KB)':
+                self.reserved_idx = idx
+            if column == 'Total Allocated(KB)':
+                self.allocated_idx = idx
+        for ls in datas[1:]:
+            temp: list = [float(ls[self.time_idx]), float(ls[self.reserved_idx]), float(ls[self.allocated_idx])]
+            device_type = ls[self.device_type_idx]
+            pta_and_ge_data[device_type].append(temp)
+            pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[self.tag_type_idx], []).append(temp)
+        return pta_and_ge_data, pta_or_ge_data
+
     def _generate_overview(self):
         def build_part_time_str(part_cost: float, part_name: str):
             format_str = ('<div class="visualization-tooltip" style="white-space: nowrap;">'
@@ -447,7 +555,7 @@ class RunGenerator(object):
         return datas
 
     def _generate_kernel_table_npu(self):
-        display_columns = ('Step ID', 'Name', 'Type', 'Accelerator Core', 'Start Time', 'Duration(us)', 'Wait Time(us)',
+        display_columns = ('Step Id', 'Name', 'Type', 'Accelerator Core', 'Start Time', 'Duration(us)', 'Wait Time(us)',
                            'Block Dim', 'Input Shapes', 'Input Data Types', 'Input Formats', 'Output Shapes',
                            'Output Data Types', 'Output Formats')
         display_idxs = []
@@ -478,7 +586,8 @@ class RunGenerator(object):
                          enumerate(datas) if idx != 0]
         return result
 
-    def _get_csv_data(self, path: str):
+    @staticmethod
+    def _get_csv_data(path: str):
         if path is None:
             return
         datas = []
@@ -587,7 +696,7 @@ class DistributedRunGenerator(object):
             for used_device in data.used_devices:
                 gpu_info = RunGenerator._get_gpu_info(data.device_props, used_device)
                 if gpu_info is not None:
-                    result[node][process_id]['GPU'+str(used_device)] = gpu_info
+                    result[node][process_id]['GPU' + str(used_device)] = gpu_info
 
         if result:
             for k, v in result.items():
@@ -622,7 +731,7 @@ class DistributedRunGenerator(object):
                 ]
                 steps_to_overlap['all'][data.worker] = [
                     sum(x) for x in zip(steps_to_overlap['all'][data.worker], steps_to_overlap[step_name][data.worker])]
-            steps_to_overlap['all'][data.worker] = [x/step_number for x in steps_to_overlap['all'][data.worker]]
+            steps_to_overlap['all'][data.worker] = [x / step_number for x in steps_to_overlap['all'][data.worker]]
         for k, v in steps_to_overlap.items():
             steps_to_overlap[k] = OrderedDict(sorted(v.items()))
         result['data'] = steps_to_overlap
@@ -644,11 +753,11 @@ class DistributedRunGenerator(object):
             for step, comm_stats in data.step_comm_stats.items():
                 steps_to_wait.setdefault(step, OrderedDict())[data.worker] = [
                     comm_stats[1],
-                    comm_stats[0]-comm_stats[1]
+                    comm_stats[0] - comm_stats[1]
                 ]
                 steps_to_wait['all'][data.worker] = [
                     sum(x) for x in zip(steps_to_wait['all'][data.worker], steps_to_wait[step][data.worker])]
-            steps_to_wait['all'][data.worker] = [x/step_number for x in steps_to_wait['all'][data.worker]]
+            steps_to_wait['all'][data.worker] = [x / step_number for x in steps_to_wait['all'][data.worker]]
 
         for k, v in steps_to_wait.items():
             steps_to_wait[k] = OrderedDict(sorted(v.items()))
@@ -680,11 +789,11 @@ class DistributedRunGenerator(object):
                     op,
                     stats[0],
                     stats[1],
-                    round(stats[1]/stats[0]),
+                    round(stats[1] / stats[0]),
                     stats[2],
-                    round(stats[2]/stats[0]),
+                    round(stats[2] / stats[0]),
                     stats[3],
-                    round(stats[3]/stats[0])
+                    round(stats[3] / stats[0])
                 ]
                 table['rows'].append(row)
             workers_to_comm_ops[data.worker] = table
diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/run.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/run.py
index f148e4ccbf1..7d4e23503f0 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/run.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/run.py
@@ -139,6 +139,11 @@ class RunProfile(object):
 
         self.module_stats: Optional[List(Stats)] = None
         self.pl_module_stats: Optional[List(Stats)] = None
+        self.device_target = None
+
+        self.memory_all_curve = None
+        self.memory_div_curve = None
+        self.memory_events = None
 
     def append_gpu_metrics(self, raw_data: bytes):
         counter_json_str = ', {}'.format(', '.join(self.gpu_metrics))
-- 
Gitee