From e7f87077f65933ceada9cfb607cb8e4436270981 Mon Sep 17 00:00:00 2001
From: jiangmianjiao <jiangmianjiao@huawei.com>
Date: Fri, 26 May 2023 15:26:26 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E?=
 =?UTF-8?q?=E3=80=91=20Operator=20view=E9=9C=80=E6=B1=82=E5=BC=80=E5=8F=91?=
 =?UTF-8?q?=20=E3=80=90=E4=BF=AE=E6=94=B9=E4=BA=BA=E3=80=91=20jiangmianjia?=
 =?UTF-8?q?o=2030036454?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tb_plugin/torch_tb_profiler/consts.py     |   6 +
 .../torch_tb_profiler/profiler/data.py        |   7 +
 .../profiler/run_generator.py                 | 212 ++++++++++++++++--
 3 files changed, 208 insertions(+), 17 deletions(-)

diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/consts.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/consts.py
index 486e5f05bf5..4fced0bfe64 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/consts.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/consts.py
@@ -75,3 +75,9 @@ TOOLTIP_KERNEL_USES_TC = \
     'Whether this kernel uses Tensor Cores.'
 TOOLTIP_KERNEL_OP_TC_ELIGIBLE = \
     'Whether the operator launched this kernel is eligible to use Tensor Cores.'
+TOOLTIP_OP_TC_ELIGIBLE_AICORE = \
+    'Whether this operator is eligible to use AICore.'
+TOOLTIP_OP_TC_SELF_AICORE = \
+    'Time of Device Self Duration With AICore / Device Self Duration.'
+TOOLTIP_OP_TC_TOTAL_AICORE = \
+    'Time of Device Total Duration With AICore / Device Total Duration.'
diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py
index 224c626e8ac..1c38081fbb1 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/data.py
@@ -76,6 +76,7 @@ class RunProfileData(object):
         self.steps_names = None
         self.avg_costs = None
         self.has_memory: bool = False
+        self.has_operator_view: bool = False
 
         # GPU parser
         self.gpu_metrics_parser: GPUMetricsParser = None
@@ -105,6 +106,9 @@ class RunProfileData(object):
         self.memory_form_path: str = None
         self.memory_line_path: str = None
 
+        # npu operator data
+        self.operator_path: str = None
+
     @staticmethod
     def parse_gpu(worker, span, path, cache_dir):
         trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU')
@@ -142,6 +146,9 @@ class RunProfileData(object):
             if str(file) == 'memory_view_form.csv':
                 has_memory_form = True
                 profile.memory_form_path = io.join(path, file)
+            if str(file) == 'operator_details.csv':
+                profile.has_operator_view = True
+                profile.operator_path = io.join(path, file)
         profile.has_kernel = has_kernel
         profile.has_memory = has_memory_form and has_memory_line
         return profile
diff --git a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py
index 0045a9cfe20..cd28b52e7b3 100644
--- a/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py
+++ b/tb_plugins/profiling/tb_plugin/torch_tb_profiler/profiler/run_generator.py
@@ -33,17 +33,28 @@ class RunGenerator(object):
         profile_run.has_communication = self.profile_data.has_communication
         profile_run.has_memcpy_or_memset = self.profile_data.has_memcpy_or_memset
         profile_run.profiler_start_ts = self.profile_data.profiler_start_ts
-        # profile_run.views.append(consts.OVERALL_VIEW)
-        profile_run.overview = self._generate_overview()
-
-        # profile_run.views.append(consts.OP_VIEW)
-        profile_run.operation_pie_by_name = self._generate_op_pie()
-        profile_run.operation_table_by_name = self._generate_op_table(self.profile_data.op_list_groupby_name)
-        profile_run.operation_stack_by_name = self._generate_op_table_for_stack(False)
-        profile_run.operation_pie_by_name_input = self._generate_op_pie(True)
-        profile_run.operation_table_by_name_input = self._generate_op_table(
-            self.profile_data.op_list_groupby_name_input, True)
-        profile_run.operation_stack_by_name_input = self._generate_op_table_for_stack(True)
+
+        if self.device_target == 'GPU':
+            profile_run.views.append(consts.OVERALL_VIEW)
+            profile_run.overview = self._generate_overview()
+
+            profile_run.views.append(consts.OP_VIEW)
+            profile_run.operation_pie_by_name = self._generate_op_pie()
+            profile_run.operation_table_by_name = self._generate_op_table(self.profile_data.op_list_groupby_name)
+            profile_run.operation_stack_by_name = self._generate_op_table_for_stack(False)
+            profile_run.operation_pie_by_name_input = self._generate_op_pie(True)
+            profile_run.operation_table_by_name_input = self._generate_op_table(
+                self.profile_data.op_list_groupby_name_input, True)
+            profile_run.operation_stack_by_name_input = self._generate_op_table_for_stack(True)
+        else:
+            if self.profile_data.has_operator_view:
+                profile_run.views.append(consts.OP_VIEW)
+                profile_run.operation_pie_by_name = self._get_operator_pie()
+                profile_run.operation_table_by_name = self._get_operator_table_by_name()
+                profile_run.operation_stack_by_name = self._get_call_stack_by_name()
+                profile_run.operation_pie_by_name_input = self._get_operator_pie(True)
+                profile_run.operation_table_by_name_input = self._get_operator_table_by_name(True)
+                profile_run.operation_stack_by_name_input = self._get_call_stack_by_name_shapes(True)
 
         if self.profile_data.has_kernel:
             profile_run.views.append(consts.KERNEL_VIEW)
@@ -96,6 +107,173 @@ class RunGenerator(object):
 
         return profile_run
 
+    def _get_operator_details_by_name(self):
+        operator_by_name = defaultdict(list)
+        operator_by_name_and_input_shapes = defaultdict(list)
+        path = self.profile_data.operator_path
+        datas = RunGenerator._get_csv_data(path)
+        if len(datas) <= 1:
+            return operator_by_name, operator_by_name_and_input_shapes
+        for ls in datas[1:]:
+            temp: list = [ls[0], RunGenerator._trans_shape(str(ls[1])), ls[2], float(ls[3]), float(ls[4]),
+                          float(ls[5]), float(ls[6]), float(ls[7]), float(ls[8])]
+            operator_by_name[ls[0]].append(temp)
+            key = "{}###{}".format(str(ls[0]), RunGenerator._trans_shape(str(ls[1])))
+            operator_by_name_and_input_shapes[key].append(temp)
+        return operator_by_name, operator_by_name_and_input_shapes
+
+    def _get_operator_table_by_name(self, group_by_input_shape=False):
+        result = {
+            'metadata': {
+                'sort': 'device_self_duration',
+                'tooltips': {
+                    'tc_eligible': consts.TOOLTIP_OP_TC_ELIGIBLE_AICORE,
+                    'tc_self_ratio': consts.TOOLTIP_OP_TC_SELF_AICORE,
+                    'tc_total_ratio': consts.TOOLTIP_OP_TC_TOTAL_AICORE
+                }
+            },
+            'data': self._set_operator_data(True) if group_by_input_shape else self._set_operator_data()
+        }
+        return result
+
+    def _get_operator_pie(self, group_by_input_shape=False):
+        data = {}
+        tag = {'device_self_time': 'Device Self Time (us)', 'device_total_time': 'Device Total Time (us)',
+               'host_self_time': 'Host Self Time (us)', 'host_total_time': 'Host Total Time (us)'}
+        for key, value in tag.items():
+            data[key] = {
+                'title': value,
+                'columns': [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}],
+                'rows': []
+            }
+        for value in iter(self._set_operator_data(group_by_input_shape)
+                          if group_by_input_shape else self._set_operator_data()):
+            data['device_self_time'].get('rows').append([value.get('name'), value.get('device_self_duration')])
+            data['device_total_time'].get('rows').append([value.get('name'), value.get('device_total_duration')])
+            data['host_self_time'].get('rows').append([value.get('name'), value.get('host_self_duration')])
+            data['host_total_time'].get('rows').append([value.get('name'), value.get('host_total_duration')])
+        return data
+
+    def _set_operator_data(self, group_by_input_shape=False):
+        result = []
+        if group_by_input_shape:
+            _, operator_by_name = self._get_operator_details_by_name()
+        else:
+            operator_by_name, _ = self._get_operator_details_by_name()
+        for name_key, values in operator_by_name.items():
+            if group_by_input_shape:
+                name = name_key.split("###")[0]
+                shape = name_key.split("###")[1]
+                result.append(RunGenerator._get_table_head(name, shape, None, values))
+            else:
+                result.append(RunGenerator._get_table_head(name_key, None, None, values))
+        return result
+
+    def _set_name_callstack_data(self, group_by_input_shape=False):
+        if group_by_input_shape:
+            _, operator_by_name = self._get_operator_details_by_name()
+        else:
+            operator_by_name, _ = self._get_operator_details_by_name()
+
+        result = dict()
+        for key, values in operator_by_name.items():
+            name_callstack = defaultdict(list)
+            for value in iter(values):
+                name_callstack[str(value[2])].append(value)
+            result[key] = name_callstack
+        return result
+
+    def _get_call_stack_by_name_shapes(self, group_by_input_shape: bool = False):
+        result = dict()
+        name_input_shapes_callstack_data = self._set_name_callstack_data(group_by_input_shape)
+        for name_key, values in name_input_shapes_callstack_data.items():
+            name = name_key.split("###")[0]
+            shape = name_key.split("###")[1]
+            table = {
+                'metadata': {
+                    'sort': 'device_self_duration',
+                    'tooltips': {
+                        'tc_eligible': consts.TOOLTIP_OP_TC_ELIGIBLE_AICORE,
+                        'tc_self_ratio': consts.TOOLTIP_OP_TC_SELF_AICORE,
+                        'tc_total_ratio': consts.TOOLTIP_OP_TC_TOTAL_AICORE
+                    }
+                },
+                'data': []
+            }
+            for callstack_key, value in values.items():
+                table['data'].append(RunGenerator._get_table_head(name, shape, callstack_key, value))
+            result[name_key] = table
+        return result
+
+    @staticmethod
+    def _trans_shape(shape: str):
+        result = list()
+        if ';' not in shape:
+            result.append('[' + shape.strip() + ']')
+            return '[' + ', '.join(result) + ']'
+        if len(shape.strip()) <= 1:
+            result.append('[]')
+            return '[' + ', '.join(result) + ']'
+        shape_spl = shape.split("\n")
+        for shape_div in iter(shape_spl):
+            result.append('[' + str(shape_div.replace(';', '')) + ']')
+        return '[' + ', '.join(result) + ']'
+
+    def _get_call_stack_by_name(self):
+        result = dict()
+        name_callstack_data = self._set_name_callstack_data()
+        for name_key, values in name_callstack_data.items():
+            table = {
+                'metadata': {
+                    'sort': 'device_self_duration',
+                    'tooltips': {
+                        'tc_eligible': consts.TOOLTIP_OP_TC_ELIGIBLE_AICORE,
+                        'tc_self_ratio': consts.TOOLTIP_OP_TC_SELF_AICORE,
+                        'tc_total_ratio': consts.TOOLTIP_OP_TC_TOTAL_AICORE
+                    }
+                },
+                'data': []
+            }
+            for callstack_key, value in values.items():
+                table['data'].append(RunGenerator._get_table_head(name_key, None, callstack_key, value))
+            result[name_key] = table
+        return result
+
+    @staticmethod
+    def _get_table_head(name: str, input_shape: str, call_stack: str, value: list):
+        if name is None:
+            return {}
+        temp = {'name': name, 'calls': 0, 'host_self_duration': 0,
+                'host_total_duration': 0, 'device_self_duration': 0, 'device_total_duration': 0,
+                'tc_self_ratio': 0, 'tc_total_ratio': 0, 'tc_eligible': 'Yes'}
+        if input_shape is not None:
+            temp['input_shape'] = input_shape
+            if call_stack is not None:
+                temp['call_stack'] = call_stack
+            else:
+                temp['has_call_stack'] = False
+        else:
+            if call_stack is not None:
+                temp['call_stack'] = call_stack
+            else:
+                temp['has_call_stack'] = False
+        for vl in iter(value):
+            if 'has_call_stack' in temp and vl[2]:
+                temp['has_call_stack'] = True
+            temp['calls'] += 1
+            temp['host_self_duration'] = round(temp['host_self_duration'] + vl[3], 2)
+            temp['host_total_duration'] = round(temp['host_total_duration'] + vl[4], 2)
+            temp['device_self_duration'] = round(temp['device_self_duration'] + vl[5], 2)
+            temp['device_total_duration'] = round(temp['device_total_duration'] + vl[6], 2)
+            temp['tc_self_ratio'] = round(temp['tc_self_ratio'] + vl[7], 2)
+            temp['tc_total_ratio'] = round(temp['tc_total_ratio'] + vl[8], 2)
+        temp['tc_eligible'] = 'Yes' if temp['tc_self_ratio'] > 0 or temp['tc_total_ratio'] > 0 else 'No'
+        temp['tc_self_ratio'] = 0 if temp['device_self_duration'] == 0 \
+            else round(temp['tc_self_ratio'] / temp['device_self_duration'] * 100, 2)
+        temp['tc_total_ratio'] = 0 if temp['device_total_duration'] == 0 \
+            else round(temp['tc_total_ratio'] / temp['device_total_duration'] * 100, 2)
+        return temp
+
     def _get_memory_event(self):
         display_columns = ('Operator', 'Size(KB)', 'Allocation Time(us)', 'Release Time(us)', 'Duration(us)')
         path = self.profile_data.memory_form_path
@@ -109,7 +287,7 @@ class RunGenerator(object):
             'columns': [],
             'rows': {}
         }
-        datas = self._get_csv_data(path)
+        datas = RunGenerator._get_csv_data(path)
         for idx, column in enumerate(datas[0]):
             if column == 'Device Type':
                 self.device_type_form_idx = idx
@@ -176,7 +354,7 @@ class RunGenerator(object):
         pta_and_ge_data = defaultdict(list)
         pta_or_ge_data = {}
         path = self.profile_data.memory_line_path
-        datas = self._get_csv_data(path)
+        datas = RunGenerator._get_csv_data(path)
         for idx, column in enumerate(datas[0]):
             if column == 'Tag':
                 self.tag_type_idx = idx
@@ -555,9 +733,9 @@ class RunGenerator(object):
         return datas
 
     def _generate_kernel_table_npu(self):
-        display_columns = ('Step Id', 'Name', 'Type', 'Accelerator Core', 'Start Time', 'Duration(us)', 'Wait Time(us)',
-                           'Block Dim', 'Input Shapes', 'Input Data Types', 'Input Formats', 'Output Shapes',
-                           'Output Data Types', 'Output Formats')
+        display_columns = ('Step Id', 'Name', 'Type', 'Accelerator Core', 'Start Time(us)', 'Duration(us)',
+                           'Wait Time(us)', 'Block Dim', 'Input Shapes', 'Input Data Types', 'Input Formats',
+                           'Output Shapes', 'Output Data Types', 'Output Formats')
         display_idxs = []
         table = {'columns': [], 'rows': []}
         result = {
@@ -567,7 +745,7 @@ class RunGenerator(object):
             'data': table
         }
         path = self.profile_data.kernel_file_path
-        datas = self._get_csv_data(path)
+        datas = RunGenerator._get_csv_data(path)
         for idx, column in enumerate(datas[0]):
             if column == 'Name':
                 self.name_idx = idx
-- 
Gitee