\n", - " | rank id | \n", - "Name | \n", - "Input Shapes | \n", - "Input Data Types | \n", - "Output Shapes | \n", - "Duration(us)_mean | \n", - "Duration(us)_var | \n", - "Duration(us)_max | \n", - "Duration(us)_min | \n", - "Duration(us)_count | \n", - "Duration(us)_sum | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "0 | \n", - "Add | \n", - "\"1024,2,5120;1024,2,5120\" | \n", - "DT_BF16;DT_BF16 | \n", - "\"1024,2,5120\" | \n", - "45.012050 | \n", - "82.952748 | \n", - "55.9255 | \n", - "35.3108 | \n", - "16 | \n", - "720.1928 | \n", - "
1 | \n", - "0 | \n", - "Add | \n", - "\"2,8192,5120;2,8192,5120\" | \n", - "DT_BF16;DT_BF16 | \n", - "\"2,8192,5120\" | \n", - "447.183700 | \n", - "NaN | \n", - "447.1837 | \n", - "447.1837 | \n", - "1 | \n", - "447.1837 | \n", - "
2 | \n", - "0 | \n", - "Add | \n", - "\"8192,2,1920;1920\" | \n", - "DT_BF16;DT_BF16 | \n", - "\"8192,2,1920\" | \n", - "54.330850 | \n", - "1.342846 | \n", - "55.2456 | \n", - "52.6463 | \n", - "4 | \n", - "217.3234 | \n", - "
3 | \n", - "0 | \n", - "Add | \n", - "\"8192,2,2560;2560\" | \n", - "DT_BF16;DT_BF16 | \n", - "\"8192,2,2560\" | \n", - "75.485375 | \n", - "0.761315 | \n", - "76.2802 | \n", - "74.2407 | \n", - "4 | \n", - "301.9415 | \n", - "
4 | \n", - "0 | \n", - "Add | \n", - "\";\" | \n", - "FLOAT;FLOAT | \n", - "\"\" | \n", - "1.200884 | \n", - "0.017257 | \n", - "1.4996 | \n", - "0.9597 | \n", - "50 | \n", - "60.0442 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
1441 | \n", - "15 | \n", - "atomic_memset-1_67_1998432_1_0 | \n", - "\"\" | \n", - "UNDEFINED | \n", - "\"\" | \n", - "3.160000 | \n", - "NaN | \n", - "3.1600 | \n", - "3.1600 | \n", - "1 | \n", - "3.1600 | \n", - "
1442 | \n", - "15 | \n", - "trans_Cast_14 | \n", - "\"1\" | \n", - "FLOAT | \n", - "\"1\" | \n", - "1.390000 | \n", - "0.023067 | \n", - "1.6000 | \n", - "1.2600 | \n", - "4 | \n", - "5.5600 | \n", - "
1443 | \n", - "15 | \n", - "trans_Cast_15 | \n", - "\"\" | \n", - "INT32 | \n", - "\"\" | \n", - "64.445000 | \n", - "36.276100 | \n", - "70.3000 | \n", - "59.2000 | \n", - "4 | \n", - "257.7800 | \n", - "
1444 | \n", - "15 | \n", - "trans_Cast_4 | \n", - "\"1\" | \n", - "FLOAT | \n", - "\"1\" | \n", - "1.555000 | \n", - "0.035857 | \n", - "1.9400 | \n", - "1.3200 | \n", - "8 | \n", - "12.4400 | \n", - "
1445 | \n", - "15 | \n", - "trans_Cast_5 | \n", - "\"\" | \n", - "INT32 | \n", - "\"\" | \n", - "62.895000 | \n", - "15.584200 | \n", - "69.8600 | \n", - "56.7600 | \n", - "8 | \n", - "503.1600 | \n", - "
1446 rows × 11 columns
\n", - "\n", - " | pattern_name | \n", - "pattern | \n", - "len | \n", - "count | \n", - "duration sum(us) | \n", - "op durations(us) | \n", - "index | \n", - "
---|---|---|---|---|---|---|---|
18 | \n", - "torch_npu.npu_swiglu | \n", - "(Slice, Slice, Swish, Mul) | \n", - "4 | \n", - "1 | \n", - "27.53 | \n", - "[21.2, 0.05, 3.14, 3.14] | \n", - "[0] | \n", - "
\n", - " | Step Id | \n", - "Model ID | \n", - "Task ID | \n", - "Stream ID | \n", - "Name | \n", - "Type | \n", - "Accelerator Core | \n", - "Start Time(us) | \n", - "Duration(us) | \n", - "Wait Time(us) | \n", - "Block Dim | \n", - "Mix Block Dim | \n", - "Input Shapes | \n", - "Input Data Types | \n", - "Input Formats | \n", - "Output Shapes | \n", - "Output Data Types | \n", - "Output Formats | \n", - "Context ID | \n", - "aicore_time(us) | \n", - "aic_total_cycles | \n", - "aic_mac_ratio | \n", - "aic_mac_int8_ratio | \n", - "aic_cube_fops | \n", - "aic_vector_fops | \n", - "aiv_time(us) | \n", - "aiv_total_cycles | \n", - "aiv_vec_fp32_ratio | \n", - "aiv_vec_fp16_ratio | \n", - "aiv_vec_int32_ratio | \n", - "aiv_vec_misc_ratio | \n", - "aiv_cube_fops | \n", - "aiv_vector_fops | \n", - "size(MB) | \n", - "throughput(GB/s) | \n", - "color | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "1 | \n", - "4294967295 | \n", - "1265 | \n", - "16 | \n", - "Slice1 | \n", - "Slice | \n", - "AI_VECTOR_CORE | \n", - "1699529623106750 | \n", - "21.20 | \n", - "261.56 | \n", - "9 | \n", - "0 | \n", - "4,1025 | \n", - "INT64 | \n", - "FORMAT_ND | \n", - "4,1025 | \n", - "INT32 | \n", - "FORMAT_ND | \n", - "NaN | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "1.77 | \n", - "29508.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0062 | \n", - "0.0 | \n", - "0.0 | \n", - "5856.0 | \n", - "0.046921 | \n", - "2.161371 | \n", - "RED | \n", - "
4 | \n", - "1 | \n", - "4294967295 | \n", - "1265 | \n", - "16 | \n", - "Add1 | \n", - "Add | \n", - "AI_CORE | \n", - "1699529623106754 | \n", - "3.14 | \n", - "261.56 | \n", - "9 | \n", - "0 | \n", - "4,1025 | \n", - "INT64 | \n", - "FORMAT_ND | \n", - "4,1025 | \n", - "INT32 | \n", - "FORMAT_ND | \n", - "NaN | \n", - "2.3 | \n", - "28888.0 | \n", - "0.2 | \n", - "0.1 | \n", - "0.1 | \n", - "0.7 | \n", - "0.00 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0000 | \n", - "0.0 | \n", - "0.0 | \n", - "0.0 | \n", - "0.046921 | \n", - "14.592698 | \n", - "RED | \n", - "
{{ header }} | + {% endfor %} +|
{{ element|round(2) }} | + {% else %} +{{ element }} | + {% endif %} + {% endfor %} +
{{ header }} | + {% endfor %} +|
{{ element|round(2) }} | + {% else %} +{{ element }} | + {% endif %} + {% endfor %} +
Structure | +Counts | +Elapsed Time(us) | +
---|---|---|
{{ node.fusion_pattern|safe }} | +{{ node.counts|safe }} | +{{ node.total_duration|safe }} | +
OP Name | +OP Type | +Elapsed Time(us) | +
---|---|---|
{{ node.op_name|safe }} | +{{ node.dtype|safe }} | +{{ node.duration|safe }} | +
Description | +Suggestion | +Elapsed Time(us) | +Time Ratio | +
---|---|---|---|
{{ format_result.record.optimization_item.description|safe }} | +{{ format_result.suggestion|safe }} | +{{ format_result.task_duration|safe }} | +{{ format_result.record.statistics_item.task_duration_ratio|safe }} | +
Operator Type | +Counts | +Elapsed Time(us) | +
---|---|---|
{{ op_info.summary.op_type|safe }} | +{{ op_info.summary.counts|safe }} | +{{ op_info.summary.total_duration|safe }} | +
+ Suggestion {{ loop.index|safe }}: {{suggestion|safe}} +
+ {% endfor %} +Suggestion 1: Modify code to avoid AICPU operator
+ {% endif %} + + {{ info.op_info_list[0].stack_info|safe }} +Description | +Suggestion | +Elapsed Time(us) | +Time Ratio | +
---|---|---|---|
{{ format_result.record.optimization_item.description|safe }} | +{{ format_result.suggestion|safe }} | +{{ format_result.task_duration|safe }} | +{{ format_result.record.statistics_item.task_duration_ratio|safe }} | +
Operator Type | +Counts | +Elapsed Time(us) | +
---|---|---|
{{ op_info.summary.op_type|safe }} | +{{ op_info.summary.counts|safe }} | +{{ op_info.summary.total_duration|safe }} | +
Description | +Suggestion | +
---|---|
{{ format_result.record.optimization_item.description|safe }} | +{{ format_result.suggestion|safe }} | +
Description | +Suggestion | +Elapsed Time(us) | +Time Ratio | +
---|---|---|---|
{{ format_result.record.optimization_item.description|safe }} | +{{ format_result.suggestion|safe }} | +{{ format_result.task_duration|safe }} | +{{ format_result.record.statistics_item.task_duration_ratio|safe }} | +
Operator Type | +Counts | +Elapsed Time(us) | +
---|---|---|
{{ op_info.summary.op_type|safe }} | +{{ op_info.summary.counts|safe }} | +{{ op_info.summary.total_duration|safe }} | +
{{ header }} | + {% endfor %} +
{{ element }} | + {% endfor %} +