diff --git a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py
index a194ce5e40ce95fadf83be882dd4c76b61ee0981..7bcbed5a75807b57a55787c743cfaaff55a68589 100644
--- a/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py
+++ b/profiler/advisor/advisor_backend/compute_advice/npu_fused/op_perf.py
@@ -23,9 +23,9 @@ from common_func_advisor.constant import PerfColor
class OpPerfFactory:
@classmethod
def build(cls, op_row: Dict):
- if op_row.get(Constant.TITLE.TASK_TYPE) in (CoreType.AIV, CoreType.MIX_AIV):
+ if op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIV:
return VecOpPerf(op_row)
- elif op_row.get(Constant.TITLE.TASK_TYPE) in (CoreType.AIC, CoreType.MIX_AIC):
+ elif op_row.get(Constant.TITLE.TASK_TYPE) == CoreType.AIC:
return CubeOpPerf(op_row)
else:
return OpPerf(op_row)
diff --git a/profiler/advisor/compute_perf_analysis.ipynb b/profiler/advisor/compute_perf_analysis.ipynb
index 86a1fb5b7e899b835d61068872e451441ef6a04e..e7a663130c8da335129513a5ca1a99cf28fe48b7 100644
--- a/profiler/advisor/compute_perf_analysis.ipynb
+++ b/profiler/advisor/compute_perf_analysis.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2024-02-21T09:19:13.937531900Z",
@@ -36,7 +36,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2024-02-22T08:41:17.455567500Z",
@@ -136,7 +136,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 5,
"metadata": {
"collapsed": false
},
@@ -274,14 +274,14 @@
"
0.1 | \n",
" 0.1 | \n",
" 0.7 | \n",
- " 1.77 | \n",
- " 29508.0 | \n",
+ " 0.00 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.0062 | \n",
+ " 0.0 | \n",
+ " 0.0000 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 5856.0 | \n",
" 0.046921 | \n",
" 14.592698 | \n",
" RED | \n",
@@ -293,7 +293,7 @@
"text/plain": [
" Step Id Model ID Task ID Stream ID Name Type Accelerator Core Start Time(us) Duration(us) Wait Time(us) Block Dim Mix Block Dim Input Shapes Input Data Types Input Formats Output Shapes Output Data Types Output Formats Context ID aicore_time(us) aic_total_cycles aic_mac_ratio aic_mac_int8_ratio aic_cube_fops aic_vector_fops aiv_time(us) aiv_total_cycles aiv_vec_fp32_ratio aiv_vec_fp16_ratio aiv_vec_int32_ratio aiv_vec_misc_ratio aiv_cube_fops aiv_vector_fops size(MB) throughput(GB/s) color\n",
"0 1 4294967295 1265 16 Slice1 Slice AI_VECTOR_CORE 1699529623106750 21.20 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 0.0 0.0 0.0 0.0 0.0 0.0 1.77 29508.0 0.0 0.0 0.0062 0.0 0.0 5856.0 0.046921 2.161371 RED\n",
- "4 1 4294967295 1265 16 Add1 Add AI_CORE 1699529623106754 3.14 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 2.3 28888.0 0.2 0.1 0.1 0.7 1.77 29508.0 0.0 0.0 0.0062 0.0 0.0 5856.0 0.046921 14.592698 RED"
+ "4 1 4294967295 1265 16 Add1 Add AI_CORE 1699529623106754 3.14 261.56 9 0 4,1025 INT64 FORMAT_ND 4,1025 INT32 FORMAT_ND NaN 2.3 28888.0 0.2 0.1 0.1 0.7 0.00 0.0 0.0 0.0 0.0000 0.0 0.0 0.0 0.046921 14.592698 RED"
]
},
"metadata": {},
@@ -312,7 +312,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -321,7 +321,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
diff --git a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py
index 41da4b3c27eb01fecb6d7db8232b2b1d5a00a089..8830d495992cfcd2c26024863f8b644d5b4c6902 100644
--- a/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py
+++ b/profiler/test/ut/advisor/advisor_backend/compute_advice/test_npu_slow_advice.py
@@ -7,7 +7,6 @@ import unittest
from advisor_backend.interface import Interface
from advisor_backend.compute_advice.npu_slow_advice import NpuSlowAdvice
-from advisor_backend.common_func_advisor.trace_view_json import TraceViewJson
class TestNpuSlowAdvice(unittest.TestCase):
@@ -109,17 +108,17 @@ class TestNpuSlowAdvice(unittest.TestCase):
'aiv_time(us)', 'aiv_total_cycles', 'aiv_vec_fp32_ratio', 'aiv_vec_fp16_ratio',
'aiv_vec_int32_ratio',
'aiv_vec_misc_ratio', 'aiv_cube_fops', 'aiv_vector_fops']
- # red: size=0.0492 MB, throughput=2.32 GB/s, task_duration=21.2us
+ # RED: size=0.0492 MB, throughput=2.32 GB/s, task_duration=21.2us
csv_row1 = [1, 4294967295, 1265, 16, 'Slice1', 'Slice', 'AI_VECTOR_CORE', "1699529623106750\t", 21.2, 261.56, 9,
0,
'4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
0, 0, 0, 0, 0, 0,
1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
# YELLOW: size=0.0492 MB, throughput=984 GB/s, task_duration=0.05us
- csv_row2 = [1, 4294967295, 1265, 16, 'Slice2', 'Slice', 'MIX_AIV', "1699529623106751\t", 0.05, 261.56, 9,
+ csv_row2 = [1, 4294967295, 1265, 16, 'Slice2', 'Slice', 'AI_VECTOR_CORE', "1699529623106751\t", 0.05, 261.56, 9,
0,
'4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
- 2.3, 28888, 0.4, 0.1, 0.1, 0.7,
+ 0, 0, 0, 0, 0, 0,
1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
# WHITE: AI_CPU
csv_row3 = [1, 4294967295, 1265, 16, 'Swish1', 'Swish', 'AI_CPU', "1699529623106752\t", 3.14, 261.56, 9,
@@ -136,16 +135,28 @@ class TestNpuSlowAdvice(unittest.TestCase):
csv_row5 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "1699529623106754\t", 3.14, 261.56, 9, 0,
'4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
2.3, 28888, 0.2, 0.1, 0.1, 0.7,
- 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+ 0, 0, 0, 0, 0, 0, 0, 0]
# GREEN: aic_mac_ratio=0.85
- csv_row6 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'MIX_AIC', "1699529623106754\t", 3.14, 261.56, 9, 0,
+ csv_row6 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "1699529623106754\t", 3.14, 261.56, 9, 0,
'4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
2.3, 38888, 0.85, 0.1, 0.1, 0.7,
- 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+ 0, 0, 0, 0, 0, 0, 0, 0]
# YELLOW: aic_mac_ratio=0.64
- csv_row7 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'MIX_AIC', "1699529623106754\t", 3.14, 261.56, 9, 0,
+ csv_row7 = [1, 4294967295, 1265, 16, 'Add1', 'Add', 'AI_CORE', "1699529623106754\t", 3.14, 261.56, 9, 0,
'4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
2.3, 48888, 0.64, 0.1, 0.1, 0.7,
+ 0, 0, 0, 0, 0, 0, 0, 0]
+ # WHITE: MIX_AIC
+ csv_row8 = [1, 4294967295, 1265, 16, 'Slice2', 'Slice', 'MIX_AIC', "1699529623106751\t", 0.05, 261.56, 9,
+ 0,
+ '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+ 2.3, 28888, 0.4, 0.1, 0.1, 0.7,
+ 1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
+ # WHITE: MIX_AIV
+ csv_row9 = [1, 4294967295, 1265, 16, 'Slice2', 'Slice', 'MIX_AIV', "1699529623106751\t", 0.05, 261.56, 9,
+ 0,
+ '4,1025', 'INT64', 'FORMAT_ND', '4,1025', 'INT32', 'FORMAT_ND', 'N/A',
+ 2.3, 28888, 0.4, 0.1, 0.1, 0.7,
1.77, 29508, 0, 0, 0.0062, 0, 0, 5856]
with os.fdopen(os.open(f"{TestNpuSlowAdvice.OUTPUT_DIR}/kernel_details.csv",
os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fp:
@@ -158,6 +169,8 @@ class TestNpuSlowAdvice(unittest.TestCase):
csv_writer.writerow(csv_row5)
csv_writer.writerow(csv_row6)
csv_writer.writerow(csv_row7)
+ csv_writer.writerow(csv_row8)
+ csv_writer.writerow(csv_row9)
def test_run_should_return_empty_when_ascend_pt_path_not_exist(self):
interface = Interface("")
@@ -174,7 +187,7 @@ class TestNpuSlowAdvice(unittest.TestCase):
interface = Interface(self.ASCEND_PT_DIR)
data = interface.get_data('compute', 'npu_slow')
call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)")
- self.assertEqual(7, len(data))
+ self.assertEqual(9, len(data))
self.assertEqual("", call_stack)
def test_run_should_return_7_data_with_call_stack_when_new_trace_view_exists(self):
@@ -186,7 +199,7 @@ class TestNpuSlowAdvice(unittest.TestCase):
slow_op_data = data[data["color"] == "RED"]
NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx"))
call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)")
- self.assertEqual(7, len(data))
+ self.assertEqual(9, len(data))
self.assertEqual(2, len(slow_op_data))
print(call_stack)
call_stack_res = "/root/torch/module.py\n" \
@@ -202,7 +215,7 @@ class TestNpuSlowAdvice(unittest.TestCase):
slow_op_data = data[data["color"] == "RED"]
NpuSlowAdvice.save_to_excel(data, file_path=os.path.join(self.ASCEND_PT_DIR, "slow_op.xlsx"))
call_stack = NpuSlowAdvice(self.ASCEND_PT_DIR).get_call_stack(data, index_id=0, ts_col="Start Time(us)")
- self.assertEqual(7, len(data))
+ self.assertEqual(9, len(data))
self.assertEqual(2, len(slow_op_data))
print(call_stack)
call_stack_res = "/root/test/slice.py(116)\n\r\n" \