From 18e37213f27105a590f30b7d5cd979af21ca3e08 Mon Sep 17 00:00:00 2001 From: zhouxianqi <13165993773@163.com> Date: Fri, 12 Jul 2024 17:19:26 +0800 Subject: [PATCH] del --- profiler/compare_tools/README.md | 302 ---------------- profiler/compare_tools/__init__.py | 14 - .../compare_tools/compare_backend/__init__.py | 0 .../compare_backend/comparator/__init__.py | 0 .../comparator/base_comparator.py | 24 -- .../comparator/communication_comparator.py | 20 -- .../comparator/module_comparetor.py | 36 -- .../comparator/module_statistic_comparator.py | 45 --- .../comparator/operator_comparator.py | 13 - .../operator_statistic_comparator.py | 28 -- .../overall_performance_comparator.py | 76 ----- .../compare_backend/compare_bean/__init__.py | 0 .../compare_bean/communication_bean.py | 72 ---- .../compare_bean/memory_compare_bean.py | 47 --- .../compare_bean/memory_statistic_bean.py | 38 --- .../compare_bean/module_compare_bean.py | 83 ----- .../compare_bean/module_statistic_bean.py | 98 ------ .../compare_bean/operator_compare_bean.py | 47 --- .../compare_bean/operator_statistic_bean.py | 36 -- .../compare_bean/origin_data_bean/__init__.py | 0 .../origin_data_bean/compare_event.py | 79 ----- .../origin_data_bean/kernel_details_bean.py | 87 ----- .../origin_data_bean/memory_record_bean.py | 15 - .../origin_data_bean/operator_memory_bean.py | 43 --- .../origin_data_bean/trace_event_bean.py | 216 ------------ .../compare_bean/profiling_info.py | 128 ------- .../compare_backend/comparison_generator.py | 44 --- .../compare_backend/data_prepare/__init__.py | 0 .../data_prepare/module_data_prepare.py | 99 ------ .../data_prepare/operator_data_prepare.py | 19 -- .../compare_backend/disaggregate/__init__.py | 0 .../disaggregate/overall_perf_interface.py | 34 -- .../compare_backend/generator/__init__.py | 0 .../generator/base_generator.py | 23 -- .../generator/detail_performance_generator.py | 161 --------- .../overall_performance_generator.py | 19 -- .../compare_backend/interface/__init__.py | 0 .../interface/overall_interface.py | 13 - .../profiling_parser/__init__.py | 0 .../profiling_parser/base_profiling_parser.py | 211 ------------ .../profiling_parser/gpu_profiling_parser.py | 189 ---------- .../profiling_parser/npu_profiling_parser.py | 323 ------------------ .../compare_backend/utils/__init__.py | 0 .../compare_backend/utils/args_manager.py | 136 -------- .../compare_backend/utils/common_func.py | 95 ------ .../compare_backend/utils/compare_args.py | 24 -- .../compare_backend/utils/constant.py | 80 ----- .../compare_backend/utils/excel_config.py | 185 ---------- .../compare_backend/utils/file_reader.py | 64 ---- .../compare_backend/utils/module_node.py | 171 ---------- .../compare_backend/utils/name_function.py | 52 --- .../compare_backend/utils/torch_op_node.py | 92 ----- .../compare_backend/utils/tree_builder.py | 82 ----- .../compare_backend/view/__init__.py | 0 .../compare_backend/view/base_view.py | 10 - .../compare_backend/view/excel_view.py | 22 -- .../compare_backend/view/screen_view.py | 19 -- .../view/work_sheet_creator.py | 60 ---- .../compare_interface/__init__.py | 0 .../compare_interface/comparison_interface.py | 31 -- profiler/compare_tools/performance_compare.py | 37 -- 61 files changed, 3842 deletions(-) delete mode 100644 profiler/compare_tools/README.md delete mode 100644 profiler/compare_tools/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/base_comparator.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/communication_comparator.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/module_comparetor.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/module_statistic_comparator.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/operator_comparator.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/operator_statistic_comparator.py delete mode 100644 profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/communication_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/memory_compare_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/memory_statistic_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/module_compare_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/module_statistic_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/operator_compare_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/operator_statistic_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/compare_event.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/memory_record_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py delete mode 100644 profiler/compare_tools/compare_backend/compare_bean/profiling_info.py delete mode 100644 profiler/compare_tools/compare_backend/comparison_generator.py delete mode 100644 profiler/compare_tools/compare_backend/data_prepare/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/data_prepare/module_data_prepare.py delete mode 100644 profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py delete mode 100644 profiler/compare_tools/compare_backend/disaggregate/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py delete mode 100644 profiler/compare_tools/compare_backend/generator/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/generator/base_generator.py delete mode 100644 profiler/compare_tools/compare_backend/generator/detail_performance_generator.py delete mode 100644 profiler/compare_tools/compare_backend/generator/overall_performance_generator.py delete mode 100644 profiler/compare_tools/compare_backend/interface/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/interface/overall_interface.py delete mode 100644 profiler/compare_tools/compare_backend/profiling_parser/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py delete mode 100644 profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py delete mode 100644 profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py delete mode 100644 profiler/compare_tools/compare_backend/utils/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/utils/args_manager.py delete mode 100644 profiler/compare_tools/compare_backend/utils/common_func.py delete mode 100644 profiler/compare_tools/compare_backend/utils/compare_args.py delete mode 100644 profiler/compare_tools/compare_backend/utils/constant.py delete mode 100644 profiler/compare_tools/compare_backend/utils/excel_config.py delete mode 100644 profiler/compare_tools/compare_backend/utils/file_reader.py delete mode 100644 profiler/compare_tools/compare_backend/utils/module_node.py delete mode 100644 profiler/compare_tools/compare_backend/utils/name_function.py delete mode 100644 profiler/compare_tools/compare_backend/utils/torch_op_node.py delete mode 100644 profiler/compare_tools/compare_backend/utils/tree_builder.py delete mode 100644 profiler/compare_tools/compare_backend/view/__init__.py delete mode 100644 profiler/compare_tools/compare_backend/view/base_view.py delete mode 100644 profiler/compare_tools/compare_backend/view/excel_view.py delete mode 100644 profiler/compare_tools/compare_backend/view/screen_view.py delete mode 100644 profiler/compare_tools/compare_backend/view/work_sheet_creator.py delete mode 100644 profiler/compare_tools/compare_interface/__init__.py delete mode 100644 profiler/compare_tools/compare_interface/comparison_interface.py delete mode 100644 profiler/compare_tools/performance_compare.py diff --git a/profiler/compare_tools/README.md b/profiler/compare_tools/README.md deleted file mode 100644 index d81ce05f44..0000000000 --- a/profiler/compare_tools/README.md +++ /dev/null @@ -1,302 +0,0 @@ -# 性能比对工具 - -compare_tools(性能比对工具)支持比较GPU与NPU之间、NPU与NPU之间的性能差异,通过对训练耗时和内存占用的比对分析,定位到具体劣化的算子,帮助用户提升性能调优的效率。工具将训练耗时拆分为计算、通信、调度三大维度,并针对计算和通信分别进行算子级别的比对;将训练占用的总内存,拆分成算子级别的内存占用进行比对。 - -## 使用场景 - -场景一:PyTorch训练工程从GPU迁移至NPU后出现性能劣化,通过工具分析出劣化点。 - -场景二:PyTorch或MindSpore训练工程在NPU上,不同版本之间存在性能差距,通过工具定位具体差异。 - -场景三:PyTorch训练工程从GPU迁移至MindSpore NPU后出现性能劣化,通过工具分析出劣化点。 - -## 使用指导 - -### 环境依赖 - -使用本工具前需要安装的依赖包: - -```bash -pip3 install prettytable -pip3 install xlsxwriter -pip3 install pandas -pip3 install numpy -``` - -### PyTorch框架性能数据采集 - -使用本工具之前需要采集GPU或者NPU的性能数据,建议只采集一个step的性能数据,然后进行性能比对分析。 - -#### GPU性能数据采集 - -通过PyTorch Profiler工具采集GPU的性能数据,参考链接:[torch.profiler](https://pytorch.org/docs/stable/profiler.html)。 - -采集样例代码参考一: - -```Python -with torch.profiler.profile( - profile_memory=True, # 内存数据采集的开关 - record_shapes=True, # 算子input shape信息采集的开关 - schedule=torch.profiler.schedule(wait=10, warmup=0, active=1, repeat=1), - on_trace_ready=torch.profiler.tensorboard_trace_handler("./result_dir") -) as prof: - for step in ranges(step_number): - train_one_step() - prof.step() -``` - -采集样例代码参考二: - -```Python -prof = torch.profiler.profile( - profile_memory=True, # 内存数据采集的开关 - record_shapes=True, # 算子input shape信息采集的开关 - on_trace_ready=torch.profiler.tensorboard_trace_handler("./result_dir")) -for step in range(step_number): - if step == 11: - prof.start() - train_one_step() - if step == 11: - prof.stop() -``` - -PyTorch Profiler采集结果数据目录结构如下: - -```Python -|- pytorch_profiling - |- *.pt.trace.json -``` - -#### NPU性能数据采集 - -通过Ascend PyTorch Profiler工具采集NPU的性能数据,采集参数配置与GPU基本一致,只需将GPU的性能数据采集代码中torch.profiler替换成torch_npu.profiler。,参考链接:[Profiling数据采集](https://gitee.com/ascend/mstt/tree/master/profiler)。 - -Ascend PyTorch Profiler采集结果数据目录结构如下: - -```bash -|- ascend_pytorch_profiling - |- * _ascend_pt - |- ASCEND_PROFILER_OUTPUT - |- trace_view.json - |- FRAMEWORK - |- PROF_XXX - |- * _ascend_pt -``` - -### MindSpore框架性能数据采集 - -#### NPU性能数据采集 - -当前MindSpore场景仅支持NPU环境性能数据与PyTorch GPU性能数据进行比对;以及MindSpore训练工程在NPU上,不同版本之间的性能数据进行比对。 - -通过MindSpore性能调试工具采集NPU的性能数据,建议只采集或只解析一个step的性能数据,参考链接:[性能调试(Ascend)](https://www.mindspore.cn/mindinsight/docs/zh-CN/r2.3/performance_profiling_ascend.html)。 - -MindSpore性能调试工具采集结果数据目录结构如下: - -``` -|- profiler/{rank-*}_{timestamps}_ascend_ms - |- ASCEND_PROFILER_OUTPUT - |- kernel_details.csv - |- trace_view.json -``` - -进行性能比对时,MindSpore采集的性能数据须指定到`profiler/{rank-*}_{timestamps}_ascend_ms`或`ASCEND_PROFILER_OUTPUT`层级。 - -### 性能数据比对 - -性能比对工具将总体性能拆解为训练耗时和内存占用,其中训练耗时可拆分为算子(包括算子和nn.Module)、通信、调度三个维度,以打屏的形式输出总体指标,帮助用户定界劣化的方向。与此同时,工具还会生成performance_comparison_result_*.xlsx,展示每个算子在执行耗时、通信耗时、内存占用的优劣,可通过DIFF列大于0筛选出劣化算子。详细介绍请参见“**比对结果说明**”。 - -性能比对工具支持使用**命令行**和**脚本**两种方式执行性能数据比对操作,这两种方式均支持**通用参数**和**算子性能比对特有参数**。 - -#### 命令行方式 - -1. 参见《[性能工具](../README.md)》完成工具安装。 - -2. 执行如下命令进行性能数据比对: - - ``` - msprof-analyze compare -d [比对性能数据文件所在路径] -bp [基准性能数据文件所在路径] --output_path=[比对结果文件存放路径] - ``` - - - -d(必选):比对性能数据文件所在路径。可以指定以“ascend_pt”结尾的目录、ASCEND_PROFILER_OUTPUT目录或trace_view.json文件,指定trace_view.json无法显示算子的内存占用。 - - -bp(必选):基准性能数据文件所在路径。基准性能数据文件若以GPU为基准,指定到以".pt.trace"结尾的json文件;若以NPU不同版本为基准,指定文件与-d一致。 - - --output_path(可选):性能比对结果存放的路径,默认保存在当前目录。 - -#### 脚本方式 - -将mstt代码仓下载到本地,执行如下命令: - -```bash -# 进入mstt代码仓目录下的compare_tools目录 -cd mstt/profiler/compare_tools -# 执行最简比对命令 -python performance_compare.py [基准性能数据文件所在路径] [比对性能数据文件所在路径] --output_path=[比对结果文件存放路径] -``` - -- 基准性能数据文件所在路径(必选):若以GPU为基准,指定到以".pt.trace"结尾的json文件;若以NPU不同版本为基准,指定文件参考**比对性能数据文件所在路径**。 -- 比对性能数据文件所在路径(必选):可以指定以“ascend_pt”结尾的目录、ASCEND_PROFILER_OUTPUT目录或trace_view.json文件,指定trace_view.json无法显示算子的内存占用。 -- --output_path(可选):性能比对结果存放的路径,默认保存在当前目录。 - -#### 通用参数说明 - -| 参数名 | 说明 | 是否必选 | -| ------------------------------ | ------------------------------------------------------------ | -------- | -| --enable_profiling_compare | 开启总体性能比对。 | 否 | -| --enable_operator_compare | 开启算子性能比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | -| --enable_communication_compare | 开启通信性能比对。 | 否 | -| --enable_memory_compare | 开启算子内存比对。MindSpore场景暂不支持。该开关较耗时,建议只采集一个step的性能数据。 | 否 | -| --disable_details | 隐藏明细比对,只进行统计级比对。 | 否 | - -说明:以上开关均不设置的情况下,**工具默认开启所有的性能比对**,当用户设置了以上开关,则按照用户设置的开关进行性能比对,示例如下: - -```bash -msprof-analyze compare -d [比对性能数据文件所在路径] -bp [基准性能数据文件所在路径] --output_path=./result_dir --enable_profiling_compare -``` - -或 - -```bash -python performance_compare.py [基准性能数据文件] [比对性能数据文件] --output_path=./result_dir --enable_profiling_compare -``` - -此时表示仅开启总体性能比对。 - -#### 算子性能比对特有参数说明 - -| 参数名 | 说明 | 是否必选 | -| ----------------- | ------------------------------------------------------------ | -------- | -| --gpu_flow_cat | 配置GPU trace中CPU侧算子与device kernel的连线标识,当GPU的Device Duration(us)均为0时设置。使用chrome://tracing打开GPU的json,右上角Flow events找到连线标识,将标识配置进该参数。使用示例:--gpu_flow_cat=async_gpu | 否 | -| --use_input_shape | 开启算子精准匹配,默认关闭。使用示例:--use_input_shape | 否 | -| --max_kernel_num | 设置CPU侧算子下发的最大kernel数量,当超过设定值时工具会自动往下找子算子,直至满足条件。默认仅比对最上层算子,粒度较粗;若想要更细粒度的算子比对,可设置该参数,参数值不得小于4,参数值设置越小,比对粒度越细。使用示例:--max_kernel_num=10 | 否 | -| --op_name_map | 设置GPU与NPU等价的算子名称的映射关系,以字典形式存入。使用示例:--op_name_map={'Optimizer.step#SGD.step':'Optimizer.step#NpuFusedSGD.step'} | 否 | - -## 比对结果说明 - -MindSpore场景仅支持**总体性能**和**通信性能**的对比。 - -### 总体性能 - -总体性能比对结果以打屏的形式呈现。 - -| 字段 | 说明 | -| --------------------------------------- | ------------------------------------------------------------ | -| Cube Time(Num) | Cube算子总耗时,Num表示计算的次数。 | -| Vector Time(Num) | Vector算子总耗时,Num表示计算的次数。 | -| Conv Time(Forward)(Num) | conv前向算子耗时,Num表示计算的次数。 | -| Conv Time(Backward)(Num) | conv反向算子耗时,Num表示计算的次数。 | -| Flash Attention Time(Forward)(Num) | Flash Attention算子前向耗时,Num表示计算的次数。 | -| Flash Attention Time(Backward)(Num) | Flash Attention算子反向耗时,Num表示计算的次数。 | -| Paged Attention Time(Num) | Paged Attention算子耗时,Num表示计算的次数。 | -| Lccl Time(Num) | Lccl算子耗时,Num表示计算的次数。 | -| Computing Time | 计算流耗时,计算流所有event耗时总和。如果有多条并发计算,计算流耗时对重叠部分只会计算一次。 | -| Mem Usage | 内存使用。GPU上的内存使用可以使用nvidia-smi查看,NPU上的内存使用可以使用npu-smi查看,Profiling信息采集时打开profile_memory=True开关,mem usage显示的是memory_record里面的最大resevered值,一般来说是进程级内存。 | -| Uncovered Communication Time(Wait Time) | 通信未掩盖耗时,包含Wait Time(只有采集性能数据的Level等级为L1以上并且采集NPU数据时才会存在)为同步时间。 | -| SDMA Time(Num) | 拷贝类任务耗时,Num表示计算的次数。 | -| Free Time | 调度耗时 = E2E耗时 - 算子耗时 - 通信不可掩盖耗时。Free的定义为Device侧既不在通信又不在计算的时间,因此包含拷贝时间(SDMA Time)。 | -| E2E Time(Not minimal profiling) | E2E总耗时,计算流端到端耗时。当存在Not minimal profiling时,表示该时间存在性能膨胀,会影响通信和调度耗时。 | -| Other Time | AI CPU、DSA、TensorMove等其他算子耗时。 | - -可以采取最简性能数据采集的方式来减少E2E耗时的性能膨胀,示例代码如下: - -```python -with torch_npu.profiler.profile( - activities=[torch_npu.profiler.ProfilerActivity.NPU], - schedule=torch_npu.profiler.schedule(wait=1, warmup=1, active=1, repeat=1, skip_first=10), - on_trace_ready=torch_npu.profiler.tensorboard_trace_handler("./result"), -) as prof: - for step in range(steps): - train_one_step() - prof.step() -``` - -activities配置仅采集NPU数据,不配置experimental_config参数以及其他可选开关。 - -- 当Computing Time耗时增大,分析**算子性能**。 -- 当Uncovered Communication Time耗时增大,分析**通信性能**,若通信性能分析没有劣化的通信算子,代表通信与计算的并行度较差,继续进行NPU的集群性能分析。 -- 当Mem Usage增大,分析**算子内存**,若没有明显占用较大的算子,则代表算子内存申请量没有差异,问题在于内存的释放(持有时间过久),可以使用tensorboard或ascend insight继续进行NPU内存的分析。 - -### 算子性能 - -MindSpore场景暂不支持。 - -#### 比对数据无Python Function - -算子性能比对结果在performance_comparison_result_*.xlsx中OperatorCompare和OperatorCompareStatistic的sheet页呈现。 - -- OperatorCompareStatistic:算子为粒度的统计呈现,按照算子在device上的总耗时与基准算子的差距值(Diff Duration(ms)列)进行逆序。 -- OperatorCompare:算子比对的明细展示,可以查看每一个算子对应的kernel详情。 -- Diff Ratio:比较算子在device上执行总耗时 / 基准算子在device上执行总耗时,红色代表劣化。 -- Device Duration(us):该算子下发到device上执行的所有kernel耗时的总和。 - -步骤1:查看OperatorCompareStatistic页,找出耗时差距TOP的算子。 -步骤2:查看OperatorCompare页,搜索耗时差距TOP的算子,查看具体执行的kernel耗时,寻找可优化点。 - -#### 比对数据有Python Function - -算子性能比对结果在performance_comparison_result_*.xlsx中ModuleCompareStatistic、ModuleCompare的sheet页呈现。 - -当用户采集时开启with_stack开关,会上报python function事件,当比对的双方数据都存在python function的事件时,可进行模块级别的比对。 - -- Module Class:Module名,如nn.Module: Linear。 -- Module Level:Module的层级。 -- Module Name:Module唯一标识名,如/ DynamicNet_0/ Linear_0。 -- Operator Name:框架侧算子名,如aten::add。字段为[ TOTAL ]代表该module的总体情况。 -- Kernel Detail:算子详细信息。 -- Device Self Time(ms):该模块调用的算子(排除子模块)在device侧执行的总耗时,单位ms。 -- Number:该Module或算子被调用的次数。 -- Device Total Time(ms):该模块调用的算子(包含子模块)在device侧执行的总耗时,单位ms。 -- Device Total Time Diff(ms):GPU与NPU的Device Total Time(ms)差值。 -- Device Self Time Diff(ms):GPU与NPU的Device Self Time(ms)差值。 -- Total Time Ratio:GPU与NPU的Device Total Time(ms)比值。 -- Base Call Stack:基准文件模块的调用栈。 -- Comparison Call Stack:比较文件模块的调用栈。 - -ModuleCompare:模块及模块下算子比对的明细展示,可以查看每一个算子对应的kernel详情。 - -- Module Class:Module名,如nn.Module: Linear。 -- Module Level:Module的层级。 -- Module Name:Module唯一标识名,如/ DynamicNet_0/ Linear_0。 -- Operator Name:框架侧算子名,如aten::add。字段为[ TOTAL ]代表该module的总体情况。 -- Kernel Detail:算子详细信息。 -- Device Self Time(us):该模块调用的算子(排除子模块)在device侧执行的总耗时,单位us。 -- Device Total Time(us):该模块调用的算子(包含子模块)在device侧执行的总耗时,单位us。 -- Device Total Time Diff(us):GPU与NPU的Device Total Time(us)差值。 -- Device Self Time Diff(us):GPU与NPU的Device Self Time(us)差值。 -- Total Time Ratio:GPU与NPU的Device Total Time(us)比值。 -- Base Call Stack:有劣化的模块或算子,基准文件模块的调用栈。 -- Comparison Call Stack:有劣化的模块或算子,比较文件模块的调用栈。 - -步骤1:查看ModuleCompareStatistic页,找出耗时差距TOP的模块。 - -​ 筛选Operator Name字段为[ TOTAL ],将模块总体情况按照Device Self Time(ms)字段逆序,可识别出耗时差距TOP的模块。 - -​ 恢复数据,可按照Order Id字段升序。 - -步骤2:查看ModuleCompare页,查找耗时差距TOP模块下的劣化算子。 - -步骤3:通过调用栈找到对应的代码行。 - -### 通信性能 - -通信性能比对结果在performance_comparison_result_*.xlsx中CommunicationCompare的sheet页呈现。 - -- 第二行表头:通信算子的summary信息,包括通信算子名称、调用总次数、通信算子总耗时(单位:us)、通信算子平均耗时(单位:us)、通信算子最大耗时(单位:us)、通信算子最小耗时(单位:us)。 -- 无背景色的记录行:通信算子的detail信息,仅支持NPU,包含了该通信算子下的所有Task信息,包括Task名称、Task调用次数、Task总耗时(单位:us)、Task平均耗时(单位:us)、Task最大耗时(单位:us)、Task最小耗时(单位:us)。 -- Diff Ratio: 比较通信算子的总耗时 / 基准通信算子的总耗时,红色代表劣化。 - -### 算子内存 - -MindSpore场景暂不支持。 - -算子内存比对结果在performance_comparison_result_*.xlsx中MemoryCompare和MemoryCompareStatistic的sheet页呈现。 - -- MemoryCompareStatistic:算子为粒度的统计呈现,按照算子占用的总内存与基准算子的差距值(Diff Memory(MB))进行逆序。 - -- MemoryCompare:算子内存比对的明细展示,可以查看每一个算子申请内存的详情。 - -- Diff Ratio: 比较算子占用的总内存 / 基准算子占用的总内存,红色代表劣化。 - -- Size(KB):该算子占用的device内存大小,单位KB。 - -步骤1:查看MemoryCompareStatistic页,找出内存占用差距TOP的算子。 -步骤2:查看MemoryCompare页,搜索内存占用差距TOP的算子,查看具体占用的子算子。 diff --git a/profiler/compare_tools/__init__.py b/profiler/compare_tools/__init__.py deleted file mode 100644 index 8400fd5ecd..0000000000 --- a/profiler/compare_tools/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2023, Huawei Technologies Co., Ltd. -# All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/profiler/compare_tools/compare_backend/__init__.py b/profiler/compare_tools/compare_backend/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/comparator/__init__.py b/profiler/compare_tools/compare_backend/comparator/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/comparator/base_comparator.py b/profiler/compare_tools/compare_backend/comparator/base_comparator.py deleted file mode 100644 index 330fb871ee..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/base_comparator.py +++ /dev/null @@ -1,24 +0,0 @@ -from abc import ABC, abstractmethod - - -class BaseComparator(ABC): - def __init__(self, origin_data: any, bean: any): - self._sheet_name = bean.TABLE_NAME - self._headers = bean.HEADERS - self._overhead = bean.OVERHEAD - self._origin_data = origin_data - self._bean = bean - self._rows = [] - - def generate_data(self) -> dict: - ''' - generate one sheet(table) data - type: dict - sheet name as the dict key - ''' - self._compare() - return {self._sheet_name: {"headers": self._headers, "rows": self._rows, "overhead": self._overhead}} - - @abstractmethod - def _compare(self): - raise NotImplementedError("Function _compare need to be implemented.") diff --git a/profiler/compare_tools/compare_backend/comparator/communication_comparator.py b/profiler/compare_tools/compare_backend/comparator/communication_comparator.py deleted file mode 100644 index f7580bec89..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/communication_comparator.py +++ /dev/null @@ -1,20 +0,0 @@ -from compare_backend.comparator.base_comparator import BaseComparator -from compare_backend.compare_bean.communication_bean import CommunicationBean -from compare_backend.utils.constant import Constant -from compare_backend.utils.common_func import update_order_id - - -class CommunicationComparator(BaseComparator): - def __init__(self, origin_data: dict, bean: any): - super().__init__(origin_data, bean) - - def _compare(self): - base_data = self._origin_data.get(Constant.BASE_DATA, {}) - comparison_data = self._origin_data.get(Constant.COMPARISON_DATA, {}) - for comm_name, comm_data in base_data.items(): - comparison_comm_data = comparison_data.pop(comm_name, {}) - self._rows.extend(CommunicationBean(comm_name, comm_data, comparison_comm_data).rows) - for comm_name, comm_data in comparison_data.items(): - self._rows.extend(CommunicationBean(comm_name, {}, comm_data).rows) - update_order_id(self._rows) - diff --git a/profiler/compare_tools/compare_backend/comparator/module_comparetor.py b/profiler/compare_tools/compare_backend/comparator/module_comparetor.py deleted file mode 100644 index 49c50b53c5..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/module_comparetor.py +++ /dev/null @@ -1,36 +0,0 @@ -from compare_backend.comparator.base_comparator import BaseComparator -from compare_backend.utils.common_func import update_order_id -from compare_backend.utils.constant import Constant - - -class ModuleComparator(BaseComparator): - def __init__(self, origin_data: any, bean: any): - super().__init__(origin_data, bean) - - def _compare(self): - if not self._origin_data: - return - base_all_data = [data for data in self._origin_data if data[0]] # index 0 for base module - base_all_data.sort(key=lambda x: x[0].start_time) - base_none_data = [data for data in self._origin_data if not data[0]] # index 0 for base module - base_none_data.sort(key=lambda x: x[1].start_time) - index = 0 - for base_module, comparison_module in base_all_data: - if not comparison_module: - self._rows.extend(self._bean(base_module, comparison_module).rows) - continue - while index < len(base_none_data): - module = base_none_data[index][1] # index 1 for comparison module - if module.start_time < comparison_module.start_time: - self._rows.extend(self._bean(None, module).rows) - index += 1 - else: - break - self._rows.extend(self._bean(base_module, comparison_module).rows) - while index < len(base_none_data): - module = base_none_data[index][1] # index 1 for comparison module - self._rows.extend(self._bean(None, module).rows) - index += 1 - update_order_id(self._rows) - if not any(row[-1] != Constant.NA for row in self._rows): - print(f"[WARNING] If you want to see the operator's call stack, you must enable with_stack switch.") diff --git a/profiler/compare_tools/compare_backend/comparator/module_statistic_comparator.py b/profiler/compare_tools/compare_backend/comparator/module_statistic_comparator.py deleted file mode 100644 index e09108f3cb..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/module_statistic_comparator.py +++ /dev/null @@ -1,45 +0,0 @@ -from collections import OrderedDict - -from compare_backend.comparator.base_comparator import BaseComparator -from compare_backend.utils.common_func import update_order_id - - -class ModuleStatisticComparator(BaseComparator): - def __init__(self, origin_data: list, bean: any): - super().__init__(origin_data, bean) - - def _compare(self): - if not self._origin_data: - return - base_module_dict, comparison_module_dict = self._group_by_module_name() - for module_name, base_data in base_module_dict.items(): - comparison_data = comparison_module_dict.pop(module_name, []) - self._rows.extend(self._bean(module_name, base_data, comparison_data).rows) - for module_name, comparison_data in comparison_module_dict.items(): - self._rows.extend(self._bean(module_name, [], comparison_data).rows) - update_order_id(self._rows) - - def _group_by_module_name(self): - base_module_dict, comparison_module_dict = OrderedDict(), OrderedDict() - base_all_data = [data for data in self._origin_data if data[0]] # index 0 for base module - base_all_data.sort(key=lambda x: x[0].start_time) - base_none_data = [data for data in self._origin_data if not data[0]] # index 0 for base module - base_none_data.sort(key=lambda x: x[1].start_time) - index = 0 - for base_module, comparison_module in base_all_data: - base_module_dict.setdefault(base_module.module_name, []).append(base_module) - if not comparison_module: - continue - while index < len(base_none_data): - module = base_none_data[index][1] # index 1 for comparison module - if module.start_time < comparison_module.start_time: - comparison_module_dict.setdefault(module.module_name, []).append(module) - index += 1 - else: - break - comparison_module_dict.setdefault(comparison_module.module_name, []).append(comparison_module) - while index < len(base_none_data): - module = base_none_data[index][1] # index 1 for comparison module - comparison_module_dict.setdefault(module.module_name, []).append(module) - index += 1 - return base_module_dict, comparison_module_dict diff --git a/profiler/compare_tools/compare_backend/comparator/operator_comparator.py b/profiler/compare_tools/compare_backend/comparator/operator_comparator.py deleted file mode 100644 index cc475116ca..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/operator_comparator.py +++ /dev/null @@ -1,13 +0,0 @@ -from compare_backend.comparator.base_comparator import BaseComparator - - -class OperatorComparator(BaseComparator): - def __init__(self, origin_data: any, bean: any): - super().__init__(origin_data, bean) - - def _compare(self): - if not self._origin_data: - return - self._rows = [None] * (len(self._origin_data)) - for index, (base_op, comparison_op) in enumerate(self._origin_data): - self._rows[index] = self._bean(index, base_op, comparison_op).row diff --git a/profiler/compare_tools/compare_backend/comparator/operator_statistic_comparator.py b/profiler/compare_tools/compare_backend/comparator/operator_statistic_comparator.py deleted file mode 100644 index 73aecf6f12..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/operator_statistic_comparator.py +++ /dev/null @@ -1,28 +0,0 @@ -from compare_backend.comparator.base_comparator import BaseComparator -from compare_backend.utils.common_func import update_order_id - - -class OperatorStatisticComparator(BaseComparator): - def __init__(self, origin_data: list, bean: any): - super().__init__(origin_data, bean) - - def _compare(self): - if not self._origin_data: - return - base_op_dict, comparison_op_dict = self._group_by_op_name() - for op_name, base_data in base_op_dict.items(): - comparison_data = comparison_op_dict.pop(op_name, []) - self._rows.append(self._bean(op_name, base_data, comparison_data).row) - for op_name, comparison_data in comparison_op_dict.items(): - self._rows.append(self._bean(op_name, [], comparison_data).row) - self._rows.sort(key=lambda x: x[-2], reverse=True) # order by diff column - update_order_id(self._rows) - - def _group_by_op_name(self): - base_op_dict, comparison_op_dict = {}, {} - for base_op, comparison_op in self._origin_data: - if base_op: - base_op_dict.setdefault(base_op.name, []).append(base_op) - if comparison_op: - comparison_op_dict.setdefault(comparison_op.name, []).append(comparison_op) - return base_op_dict, comparison_op_dict diff --git a/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py b/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py deleted file mode 100644 index 7283c17b47..0000000000 --- a/profiler/compare_tools/compare_backend/comparator/overall_performance_comparator.py +++ /dev/null @@ -1,76 +0,0 @@ -from compare_backend.comparator.base_comparator import BaseComparator -from compare_backend.utils.constant import Constant - - -class OverallPerformanceComparator(BaseComparator): - def __init__(self, origin_data: dict, bean: any): - super().__init__(origin_data, bean) - - def _compare(self): - base_profiling_info = self._origin_data.get(Constant.BASE_DATA) - comp_profiling_info = self._origin_data.get(Constant.COMPARISON_DATA) - self._headers = [''] - base_col = [f'{base_profiling_info.profiling_type}'] - comp_col = [f'{comp_profiling_info.profiling_type}'] - if not base_profiling_info.hide_op_details and not comp_profiling_info.hide_op_details: - self._headers.extend(['Cube Time(Num)', 'Vector Time(Num)']) - base_col.extend([f'{base_profiling_info.cube_time:.3f}s({base_profiling_info.cube_num})', - f'{base_profiling_info.vec_time:.3f}s({base_profiling_info.vec_num})']) - comp_col.extend([f'{comp_profiling_info.cube_time:.3f}s({comp_profiling_info.cube_num})', - f'{comp_profiling_info.vec_time:.3f}s({comp_profiling_info.vec_num})']) - if base_profiling_info.conv_time_fwd or comp_profiling_info.conv_time_fwd: - self._headers.append('Conv Time(Forward)(Num)') - base_col.append(f'{base_profiling_info.conv_time_fwd:.3f}s({base_profiling_info.conv_num_fwd})') - comp_col.append(f'{comp_profiling_info.conv_time_fwd:.3f}s({comp_profiling_info.conv_num_fwd})') - if base_profiling_info.conv_time_bwd or comp_profiling_info.conv_time_bwd: - self._headers.append('Conv Time(Backward)(Num)') - base_col.append(f'{base_profiling_info.conv_time_bwd:.3f}s({base_profiling_info.conv_num_bwd})') - comp_col.append(f'{comp_profiling_info.conv_time_bwd:.3f}s({comp_profiling_info.conv_num_bwd})') - if base_profiling_info.fa_time_fwd or comp_profiling_info.fa_time_fwd: - self._headers.append('Flash Attention Time(Forward)(Num)') - base_col.append(f'{base_profiling_info.fa_time_fwd:.3f}s({base_profiling_info.fa_num_fwd})') - comp_col.append(f'{comp_profiling_info.fa_time_fwd:.3f}s({comp_profiling_info.fa_num_fwd})') - if base_profiling_info.fa_time_bwd or comp_profiling_info.fa_time_bwd: - self._headers.append('Flash Attention Time(Backward)(Num)') - base_col.append(f'{base_profiling_info.fa_time_bwd:.3f}s({base_profiling_info.fa_num_bwd})') - comp_col.append(f'{comp_profiling_info.fa_time_bwd:.3f}s({comp_profiling_info.fa_num_bwd})') - if base_profiling_info.pa_time or comp_profiling_info.pa_time: - self._headers.append('Paged Attention Time(Num)') - base_col.append(f'{base_profiling_info.pa_time:.3f}s({base_profiling_info.pa_num})') - comp_col.append(f'{comp_profiling_info.pa_time:.3f}s({comp_profiling_info.pa_num})') - if base_profiling_info.lccl_time or comp_profiling_info.lccl_time: - self._headers.append('Lccl Time(Num)') - base_col.append(f'{base_profiling_info.lccl_time:.3f}s({base_profiling_info.lccl_num})') - comp_col.append(f'{comp_profiling_info.lccl_time:.3f}s({comp_profiling_info.lccl_num})') - if base_profiling_info.other_time or comp_profiling_info.other_time: - self._headers.append('Other Time') - base_col.append(f'{base_profiling_info.other_time:.3f}s') - comp_col.append(f'{comp_profiling_info.other_time:.3f}s') - self._headers.extend(['Computing Time']) - base_col.extend([f'{base_profiling_info.compute_time:.3f}s']) - comp_col.extend([f'{comp_profiling_info.compute_time:.3f}s']) - if base_profiling_info.memory_used or comp_profiling_info.memory_used: - self._headers.append('Mem Usage') - base_col.append(f'{base_profiling_info.memory_used:.2f}G') - comp_col.append(f'{comp_profiling_info.memory_used:.2f}G') - self._headers.extend(['Uncovered Communication Time(Wait Time)']) - if base_profiling_info.wait_time: - base_col.extend( - [f'{base_profiling_info.communication_not_overlapped: .3f}s({base_profiling_info.wait_time:.3f}s)']) - else: - base_col.extend([f'{base_profiling_info.communication_not_overlapped: .3f}s( / )']) - if comp_profiling_info.is_level0: - comp_col.extend([f'{comp_profiling_info.communication_not_overlapped: .3f}s( / )']) - else: - comp_col.extend( - [f'{comp_profiling_info.communication_not_overlapped: .3f}s({comp_profiling_info.wait_time:.3f}s)']) - if base_profiling_info.sdma_time or comp_profiling_info.sdma_time: - self._headers.append('SDMA Time(Num)') - base_col.append(f'{base_profiling_info.sdma_time:.3f}s({base_profiling_info.sdma_num})') - comp_col.append(f'{comp_profiling_info.sdma_time:.3f}s({comp_profiling_info.sdma_num})') - cue = '(Not minimal profiling)' if base_profiling_info.is_not_minimal_profiling() or \ - comp_profiling_info.is_not_minimal_profiling() else '' - self._headers.extend(['Free Time', 'E2E Time' + cue]) - base_col.extend([f'{base_profiling_info.scheduling_time:.3f}s', f'{base_profiling_info.e2e_time:.3f}s']) - comp_col.extend([f'{comp_profiling_info.scheduling_time:.3f}s', f'{comp_profiling_info.e2e_time:.3f}s']) - self._rows = [base_col, comp_col] diff --git a/profiler/compare_tools/compare_backend/compare_bean/__init__.py b/profiler/compare_tools/compare_backend/compare_bean/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/compare_bean/communication_bean.py b/profiler/compare_tools/compare_backend/compare_bean/communication_bean.py deleted file mode 100644 index 94813193d6..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/communication_bean.py +++ /dev/null @@ -1,72 +0,0 @@ -from compare_backend.utils.constant import Constant -from compare_backend.utils.excel_config import ExcelConfig -from compare_backend.utils.common_func import calculate_diff_ratio - - -class CommunicationInfo: - - def __init__(self, name: str, data_list: list, is_task: bool): - self.comm_op_name = None - self.task_name = None - self.calls = None - self.total_duration = 0 - self.avg_duration = None - self.max_duration = None - self.min_duration = None - if data_list: - self.comm_op_name = "|" if is_task else name - self.task_name = name if is_task else None - self.calls = len(data_list) - self.total_duration = sum(data_list) - self.avg_duration = sum(data_list) / len(data_list) - self.max_duration = max(data_list) - self.min_duration = min(data_list) - - -class CommunicationBean: - TABLE_NAME = Constant.COMMUNICATION_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, name: str, base_comm_data: dict, comparison_comm_data: dict): - self._name = name - self._base_comm = base_comm_data - self._comparison_comm = comparison_comm_data - - @property - def rows(self): - rows = [] - base_comm = CommunicationInfo(self._name, self._base_comm.get("comm_list", []), is_task=False) - comparison_comm = CommunicationInfo(self._name, self._comparison_comm.get("comm_list", []), is_task=False) - rows.append(self._get_row(base_comm, comparison_comm, is_task=False)) - - base_task = self._base_comm.get("comm_task", {}) - comparison_task = self._comparison_comm.get("comm_task", {}) - if not base_task and not comparison_task: - return rows - - for task_name, task_list in base_task.items(): - base_task_info = CommunicationInfo(task_name, task_list, is_task=True) - comparison_task_info = CommunicationInfo("", [], is_task=True) - for _task_name, _task_list in comparison_task.items(): - comparison_task_info = CommunicationInfo(_task_name, _task_list, is_task=True) - comparison_task.pop(_task_name, None) - break - rows.append(self._get_row(base_task_info, comparison_task_info, is_task=True)) - for task_name, task_list in comparison_task.items(): - base_task_info = CommunicationInfo("", [], is_task=True) - comparison_task_info = CommunicationInfo(task_name, task_list, is_task=True) - rows.append(self._get_row(base_task_info, comparison_task_info, is_task=True)) - - return rows - - @classmethod - def _get_row(cls, base_info: CommunicationInfo, comparison_info: CommunicationInfo, is_task: bool) -> list: - row = [None, base_info.comm_op_name, base_info.task_name, base_info.calls, base_info.total_duration, - base_info.avg_duration, base_info.max_duration, base_info.min_duration, comparison_info.comm_op_name, - comparison_info.task_name, comparison_info.calls, comparison_info.total_duration, - comparison_info.avg_duration, comparison_info.max_duration, comparison_info.min_duration] - diff_fields = [None, None] if is_task else calculate_diff_ratio(base_info.total_duration, - comparison_info.total_duration) - row.extend(diff_fields) - return row diff --git a/profiler/compare_tools/compare_backend/compare_bean/memory_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/memory_compare_bean.py deleted file mode 100644 index e1baa17531..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/memory_compare_bean.py +++ /dev/null @@ -1,47 +0,0 @@ -from compare_backend.utils.common_func import calculate_diff_ratio -from compare_backend.utils.constant import Constant -from compare_backend.utils.excel_config import ExcelConfig -from compare_backend.utils.torch_op_node import TorchOpNode -from compare_backend.utils.tree_builder import TreeBuilder - - -class MemoryCompareBean: - TABLE_NAME = Constant.MEMORY_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, index: int, base_op: TorchOpNode, comparison_op: TorchOpNode): - self._index = index - self._base_op = MemoryInfo(base_op) - self._comparison_op = MemoryInfo(comparison_op) - - @property - def row(self): - row = [self._index + 1, self._base_op.operator_name, self._base_op.input_shape, self._base_op.input_type, - self._base_op.memory_details, self._base_op.size, self._comparison_op.operator_name, - self._comparison_op.input_shape, self._comparison_op.input_type, self._comparison_op.memory_details, - self._comparison_op.size] - diff_fields = calculate_diff_ratio(self._base_op.size, self._comparison_op.size) - row.extend(diff_fields) - return row - - -class MemoryInfo: - def __init__(self, torch_op: TorchOpNode): - self.operator_name = None - self.input_shape = None - self.input_type = None - self.size = 0 - self.memory_details = "" - self._memory_list = [] - if torch_op: - self.operator_name = torch_op.name - self.input_shape = torch_op.input_shape - self.input_type = torch_op.input_type - self._memory_list = TreeBuilder.get_total_memory(torch_op) - self._update_memory_fields() - - def _update_memory_fields(self): - for memory in self._memory_list: - self.size += memory.size - self.memory_details += memory.memory_details diff --git a/profiler/compare_tools/compare_backend/compare_bean/memory_statistic_bean.py b/profiler/compare_tools/compare_backend/compare_bean/memory_statistic_bean.py deleted file mode 100644 index 9ccc2cb76d..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/memory_statistic_bean.py +++ /dev/null @@ -1,38 +0,0 @@ -from compare_backend.utils.common_func import calculate_diff_ratio -from compare_backend.utils.constant import Constant -from compare_backend.utils.tree_builder import TreeBuilder -from compare_backend.utils.excel_config import ExcelConfig - - -class MemoryStatisticBean: - TABLE_NAME = Constant.MEMORY_TOP_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, name: str, base_data: list, comparison_data: list): - self._name = name - self._base_info = MemoryStatisticInfo(base_data) - self._comparison_info = MemoryStatisticInfo(comparison_data) - - @property - def row(self): - row = [None, self._name, self._base_info.duration_ms, self._base_info.size_mb, self._base_info.number, - self._comparison_info.duration_ms, self._comparison_info.size_mb, self._comparison_info.number] - diff_fields = calculate_diff_ratio(self._base_info.size_mb, self._comparison_info.size_mb) - row.extend(diff_fields) - return row - - -class MemoryStatisticInfo: - def __init__(self, data_list: list): - self._data_list = data_list - self.duration_ms = 0 - self.size_mb = 0 - self.number = len(data_list) - self._get_info() - - def _get_info(self): - for op_data in self._data_list: - memory_list = TreeBuilder.get_total_memory(op_data) - self.duration_ms += sum([memory.duration / Constant.US_TO_MS for memory in memory_list]) - self.size_mb += sum([memory.size / Constant.KB_TO_MB for memory in memory_list]) diff --git a/profiler/compare_tools/compare_backend/compare_bean/module_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/module_compare_bean.py deleted file mode 100644 index abfce00d83..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/module_compare_bean.py +++ /dev/null @@ -1,83 +0,0 @@ -from compare_backend.utils.common_func import longest_common_subsequence_matching, calculate_diff_ratio -from compare_backend.utils.constant import Constant -from compare_backend.utils.excel_config import ExcelConfig -from compare_backend.utils.module_node import ModuleNode -from compare_backend.utils.name_function import NameFunction -from compare_backend.utils.torch_op_node import TorchOpNode - - -class ModuleCompareBean: - TABLE_NAME = Constant.MODULE_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, base_module: ModuleNode, comparison_module: ModuleNode): - self._base_module = ModuleInfo(base_module) - self._comparison_module = ModuleInfo(comparison_module) - self.module_class = self._base_module.module_class if base_module else self._comparison_module.module_class - self.module_level = self._base_module.module_level if base_module else self._comparison_module.module_level - self.module_name = self._base_module.module_name if base_module else self._comparison_module.module_name - - @property - def rows(self): - return [self.get_total_row(), *self.get_detail_rows()] - - def get_total_row(self): - total_diff, total_ratio = calculate_diff_ratio(self._base_module.device_total_time, - self._comparison_module.device_total_time) - self_diff, _ = calculate_diff_ratio(self._base_module.device_self_time, - self._comparison_module.device_self_time) - return [None, self.module_class, self.module_level, self.module_name, "TOTAL", None, - self._base_module.device_self_time, self._base_module.device_total_time, "TOTAL", None, - self._comparison_module.device_self_time, self._comparison_module.device_total_time, total_diff, - self_diff, total_ratio, self._base_module.call_stack, self._comparison_module.call_stack] - - def get_detail_rows(self): - rows = [] - matched_ops = longest_common_subsequence_matching(self._base_module.top_layer_ops, - self._comparison_module.top_layer_ops, NameFunction.get_name) - for base_op, comparison_op in matched_ops: - base_op = OpInfo(base_op) - comparison_op = OpInfo(comparison_op) - self_diff, self_ratio = calculate_diff_ratio(base_op.device_self_time, comparison_op.device_self_time) - base_call_stack = base_op.call_stack if self_diff > 0 else None - comparison_call_stack = comparison_op.call_stack if self_diff > 0 else None - rows.append( - [None, self.module_class, self.module_level, self.module_name, base_op.operator_name, - base_op.kernel_details, base_op.device_self_time, None, comparison_op.operator_name, - comparison_op.kernel_details, comparison_op.device_self_time, None, None, self_diff, self_ratio, - base_call_stack, comparison_call_stack]) - return rows - - -class ModuleInfo: - def __init__(self, module: ModuleNode): - self.module_class = "" - self.module_level = "" - self.module_name = "" - self.device_self_time = 0 - self.device_total_time = 0 - self.top_layer_ops = [] - self.call_stack = "" - if module: - self.module_class = module.module_class - self.module_level = module.module_level - self.module_name = module.module_name.replace("nn.Module:", "") - self.device_self_time = module.device_self_dur - self.device_total_time = module.device_total_dur - self.top_layer_ops = module.toy_layer_api_list - self.call_stack = module.call_stack - - -class OpInfo: - def __init__(self, operator: TorchOpNode): - self.operator_name = "" - self.kernel_details = "" - self.device_self_time = 0 - self.call_stack = "" - if operator: - self.operator_name = operator.name - for kernel in operator.kernel_list: - self.device_self_time += kernel.device_dur - self.kernel_details += kernel.kernel_details - self.call_stack = operator.call_stack diff --git a/profiler/compare_tools/compare_backend/compare_bean/module_statistic_bean.py b/profiler/compare_tools/compare_backend/compare_bean/module_statistic_bean.py deleted file mode 100644 index 97fc98bdd3..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/module_statistic_bean.py +++ /dev/null @@ -1,98 +0,0 @@ -import re - -from compare_backend.utils.common_func import calculate_diff_ratio -from compare_backend.utils.constant import Constant -from compare_backend.utils.excel_config import ExcelConfig - - -class ModuleStatisticBean: - TABLE_NAME = Constant.MODULE_TOP_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, name: str, base_data: list, comparison_data: list): - self._module_name = name.replace("nn.Module:", "") - pattern = re.compile('_[0-9]+$') - self._module_class = pattern.sub('', name.split("/")[-1]) - self._module_level = name.count("/") - self._base_info = ModuleStatisticInfo(base_data) - self._comparison_info = ModuleStatisticInfo(comparison_data) - - @property - def rows(self): - rows = [self.get_total_row()] - rows.extend(self.get_detail_rows()) - return rows - - @staticmethod - def _get_kernel_detail_rows(base_kernel_dict, com_kernel_dict): - base_kernel_detals = "" - com_kernel_details = "" - for kernel_name, base_dur_list in base_kernel_dict.items(): - base_dur = "%.3f" % sum(base_dur_list) - base_kernel_detals += f"{kernel_name}, [number: {len(base_dur_list)}], [duration_ms: {base_dur}]\n" - for kernel_name, com_dur_list in com_kernel_dict.items(): - com_dur = "%.3f" % sum(com_dur_list) - com_kernel_details += f"{kernel_name}, [number: {len(com_dur_list)}], [duration_ms: {com_dur}]\n" - return [base_kernel_detals, com_kernel_details] - - def get_total_row(self): - total_diff, total_ratio = calculate_diff_ratio(self._base_info.device_total_dur_ms, - self._comparison_info.device_total_dur_ms) - self_diff, _ = calculate_diff_ratio(self._base_info.device_self_dur_ms, - self._comparison_info.device_self_dur_ms) - row = [None, self._module_class, self._module_level, self._module_name, "[ TOTAL ]", None, - self._base_info.device_self_dur_ms, self._base_info.number, self._base_info.device_total_dur_ms, - None, self._comparison_info.device_self_dur_ms, self._comparison_info.number, - self._comparison_info.device_total_dur_ms, total_diff, self_diff, - total_ratio, self._base_info.call_stack, self._comparison_info.call_stack] - return row - - def get_detail_rows(self): - rows = [] - for op_name, base_dur_dict in self._base_info.api_dict.items(): - base_dur_list = base_dur_dict.get("total", []) - com_dur_dict = self._comparison_info.api_dict.pop(op_name, {}) - com_dur_list = com_dur_dict.get("total", []) - base_kernel_detals, com_kernel_details = self._get_kernel_detail_rows(base_dur_dict.get("detail", {}), - com_dur_dict.get("detail", {})) - self_diff, self_ratio = calculate_diff_ratio(sum(base_dur_list), sum(com_dur_list)) - row = [None, self._module_class, self._module_level, self._module_name, op_name, base_kernel_detals, - sum(base_dur_list), len(base_dur_list), None, com_kernel_details, sum(com_dur_list), - len(com_dur_list), None, None, self_diff, self_ratio, None, None] - rows.append(row) - - for op_name, com_dur_dict in self._comparison_info.api_dict.items(): - com_dur_list = com_dur_dict.get("total", []) - base_kernel_detals, com_kernel_details = self._get_kernel_detail_rows({}, com_dur_dict.get("detail", {})) - self_diff, self_ratio = calculate_diff_ratio(0, sum(com_dur_list)) - row = [None, self._module_class, self._module_level, self._module_name, op_name, base_kernel_detals, 0, 0, - None, com_kernel_details, sum(com_dur_list), len(com_dur_list), None, None, self_diff, - self_ratio, None, None] - rows.append(row) - return rows - - -class ModuleStatisticInfo: - def __init__(self, data_list: list): - self._data_list = data_list - self.device_self_dur_ms = 0 - self.device_total_dur_ms = 0 - self.call_stack = "" - self.number = len(data_list) - self.api_dict = {} - self._get_info() - - def _get_info(self): - if self._data_list: - self.call_stack = self._data_list[0].call_stack - for module in self._data_list: - self.device_self_dur_ms += module.device_self_dur / Constant.US_TO_MS - self.device_total_dur_ms += module.device_total_dur / Constant.US_TO_MS - for torch_op in module.toy_layer_api_list: - self.api_dict.setdefault(torch_op.name, {}).setdefault("total", []).append( - torch_op.device_dur / Constant.US_TO_MS) - for kernel in torch_op.kernel_list: - self.api_dict.setdefault(torch_op.name, {}).setdefault("detail", {}).setdefault(kernel.kernel_name, - []).append( - kernel.device_dur / Constant.US_TO_MS) diff --git a/profiler/compare_tools/compare_backend/compare_bean/operator_compare_bean.py b/profiler/compare_tools/compare_backend/compare_bean/operator_compare_bean.py deleted file mode 100644 index e7ecfedddd..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/operator_compare_bean.py +++ /dev/null @@ -1,47 +0,0 @@ -from compare_backend.utils.common_func import calculate_diff_ratio -from compare_backend.utils.constant import Constant -from compare_backend.utils.excel_config import ExcelConfig -from compare_backend.utils.torch_op_node import TorchOpNode -from compare_backend.utils.tree_builder import TreeBuilder - - -class OperatorCompareBean: - TABLE_NAME = Constant.OPERATOR_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, index: int, base_op: TorchOpNode, comparison_op: TorchOpNode): - self._index = index - self._base_op = OperatorInfo(base_op) - self._comparison_op = OperatorInfo(comparison_op) - - @property - def row(self): - row = [self._index + 1, self._base_op.operator_name, self._base_op.input_shape, self._base_op.input_type, - self._base_op.kernel_details, self._base_op.device_dur, self._comparison_op.operator_name, - self._comparison_op.input_shape, self._comparison_op.input_type, self._comparison_op.kernel_details, - self._comparison_op.device_dur] - diff_fields = calculate_diff_ratio(self._base_op.device_dur, self._comparison_op.device_dur) - row.extend(diff_fields) - return row - - -class OperatorInfo: - def __init__(self, torch_op: TorchOpNode): - self.operator_name = None - self.input_shape = None - self.input_type = None - self.device_dur = 0 - self.kernel_details = "" - self._kernel_list = [] - if torch_op: - self.operator_name = torch_op.name - self.input_shape = torch_op.input_shape - self.input_type = torch_op.input_type - self._kernel_list = TreeBuilder.get_total_kernels(torch_op) - self._update_kernel_fields() - - def _update_kernel_fields(self): - for kernel in self._kernel_list: - self.device_dur += kernel.device_dur - self.kernel_details += kernel.kernel_details diff --git a/profiler/compare_tools/compare_backend/compare_bean/operator_statistic_bean.py b/profiler/compare_tools/compare_backend/compare_bean/operator_statistic_bean.py deleted file mode 100644 index 457ae55acb..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/operator_statistic_bean.py +++ /dev/null @@ -1,36 +0,0 @@ -from compare_backend.utils.common_func import calculate_diff_ratio -from compare_backend.utils.constant import Constant -from compare_backend.utils.excel_config import ExcelConfig -from compare_backend.utils.tree_builder import TreeBuilder - - -class OperatorStatisticBean: - TABLE_NAME = Constant.OPERATOR_TOP_TABLE - HEADERS = ExcelConfig.HEADERS.get(TABLE_NAME) - OVERHEAD = ExcelConfig.OVERHEAD.get(TABLE_NAME) - - def __init__(self, name: str, base_data: list, comparison_data: list): - self._name = name - self._base_info = OperatorStatisticInfo(base_data) - self._comparison_info = OperatorStatisticInfo(comparison_data) - - @property - def row(self): - row = [None, self._name, self._base_info.device_dur_ms, self._base_info.number, - self._comparison_info.device_dur_ms, self._comparison_info.number] - diff_fields = calculate_diff_ratio(self._base_info.device_dur_ms, self._comparison_info.device_dur_ms) - row.extend(diff_fields) - return row - - -class OperatorStatisticInfo: - def __init__(self, data_list: list): - self._data_list = data_list - self.device_dur_ms = 0 - self.number = len(data_list) - self._get_info() - - def _get_info(self): - for op_data in self._data_list: - kernel_list = TreeBuilder.get_total_kernels(op_data) - self.device_dur_ms += sum([kernel.device_dur / Constant.US_TO_MS for kernel in kernel_list]) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/__init__.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/compare_event.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/compare_event.py deleted file mode 100644 index 463e824308..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/compare_event.py +++ /dev/null @@ -1,79 +0,0 @@ -from decimal import Decimal - -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.utils.constant import Constant - - -class KernelEvent: - def __init__(self, event: TraceEventBean, device_type: str): - self._event = event - self._device_type = device_type - - @property - def kernel_name(self) -> str: - return self._event.name - - @property - def device_dur(self) -> float: - return self._event.dur - - @property - def task_id(self) -> int: - return self._event.task_id - - @property - def task_type(self) -> str: - return self._event.task_type - - @property - def kernel_details(self): - if self._device_type == Constant.GPU: - return f"{self.kernel_name} [duration: {self.device_dur}]\n" - return f"{self.kernel_name}, {self.task_id}, {self.task_type} [duration: {self.device_dur}]\n" - - -class MemoryEvent: - def __init__(self, event: dict): - self._event = event - self._name = "" - self._size = 0.0 - self._ts = Decimal(0) - self._release_time = Decimal(0) - self._allocation_time = Decimal(0) - self._duration = 0.0 - self.init() - - @property - def size(self) -> float: - return self._size - - @property - def duration(self) -> float: - return self._duration - - @property - def memory_details(self) -> str: - name = self._event.get(Constant.NAME, "") or self._name - return f"{name}, ({self._allocation_time}, {self._release_time}), " \ - f"[duration: {self._duration}], [size: {self._size}]\n" - - @property - def is_torch_op(self) -> bool: - return False - - @property - def start_time(self) -> Decimal: - return self._ts - - def set_name(self, name: str): - self._name = name - - def init(self): - self._size = self._event.get(Constant.SIZE, 0) - self._ts = self._event.get(Constant.TS, 0) - self._release_time = self._event.get(Constant.RELEASE_TIME) - self._allocation_time = self._event.get(Constant.ALLOCATION_TIME) - if not self._release_time or not self._allocation_time: - self._duration = 0.0 - else: - self._duration = float(self._release_time - self._allocation_time) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py deleted file mode 100644 index 122009b904..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/kernel_details_bean.py +++ /dev/null @@ -1,87 +0,0 @@ -import math - -import pandas as pd - -from compare_backend.utils.common_func import convert_to_float -from compare_backend.utils.constant import Constant - - -class KernelDetailsBean: - def __init__(self, data: dict): - self._data = data - self._op_type = "" - self._name = "" - self._aiv_vec_time = 0.0 - self._mac_time = 0.0 - self._duration = 0.0 - self.init() - - @property - def op_type(self) -> str: - return self._op_type - - @property - def name(self) -> str: - return self._name - - @property - def aiv_vec_time(self) -> float: - if self._aiv_vec_time == "" or self._aiv_vec_time == "N/A": - return float("nan") - return convert_to_float(self._aiv_vec_time) - - @property - def mac_time(self) -> float: - if self._mac_time == "" or self._mac_time == "N/A": - return float("nan") - return convert_to_float(self._mac_time) - - @property - def duration(self) -> float: - return convert_to_float(self._duration) - - def is_hide_op_pmu(self): - if "mac_time(us)" in self._data.keys() or "aiv_vec_time(us)" in self._data.keys(): - return False - return True - - def is_vector(self): - if not pd.isna(self.aiv_vec_time) and self.aiv_vec_time > 0: - return True - if not pd.isna(self.mac_time) and math.isclose(self.mac_time, 0.0): - return True - return False - - def is_invalid(self): - if pd.isna(self.aiv_vec_time) and pd.isna(self.mac_time): - return True - return False - - def is_fa_bwd(self): - return 'bwd' in self.op_type.lower() or 'grad' in self.op_type.lower() - - def is_sdma(self): - return self.name.lower().startswith("aclnninplacecopy") and "tensormove" in self.name.lower() - - def is_flash_attention(self): - return "flashattention" in self.op_type.lower() - - def is_cube(self): - return "matmul" in self.op_type.lower() - - def is_conv(self): - return self.op_type.lower().startswith("conv") - - def is_conv_bwd(self): - lower_op_type = self.op_type.lower() - return any(bwd in lower_op_type for bwd in Constant.BWD_LIST) - - def is_page_attention(self): - return "pagedattention" in self.op_type.lower() - - def init(self): - self._op_type = self._data.get('Type', "") - self._name = self._data.get('Name', "") - self._aiv_vec_time = self._data.get('aiv_vec_time(us)', "") - self._mac_time = self._data.get('mac_time(us)', "") - self._duration = self._data.get('Duration(us)', 0) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/memory_record_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/memory_record_bean.py deleted file mode 100644 index 50d14089fe..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/memory_record_bean.py +++ /dev/null @@ -1,15 +0,0 @@ -from compare_backend.utils.common_func import convert_to_float - - -class MemoryRecordBean: - def __init__(self, data: dict): - self._data = data - self._total_reserved_mb = 0.0 - self.init() - - @property - def total_reserved_mb(self) -> float: - return convert_to_float(self._total_reserved_mb) - - def init(self): - self._total_reserved_mb = self._data.get("Total Reserved(MB)", 0) diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py deleted file mode 100644 index 254b8629cd..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/operator_memory_bean.py +++ /dev/null @@ -1,43 +0,0 @@ -from decimal import Decimal - -from compare_backend.utils.common_func import convert_to_float, convert_to_decimal - - -class OperatorMemoryBean: - - def __init__(self, data: dict): - self._data = data - self._name = "" - self._size = 0.0 - self._allocation_time = Decimal(0) - self._release_time = Decimal(0) - self.init() - - @property - def name(self) -> str: - return self._name - - @property - def size(self) -> float: - return convert_to_float(self._size) - - @property - def allocation_time(self) -> Decimal: - if not self._allocation_time: - return Decimal(0) - return convert_to_decimal(self._allocation_time) - - @property - def release_time(self) -> Decimal: - if not self._release_time: - return Decimal(0) - return convert_to_decimal(self._release_time) - - def init(self): - self._name = self._data.get("Name", "") - self._size = self._data.get("Size(KB)", 0) - self._allocation_time = self._data.get("Allocation Time(us)", 0) - self._release_time = self._data.get("Release Time(us)", 0) - - def is_cann_op(self): - return "cann::" in self._name diff --git a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py b/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py deleted file mode 100644 index cef6bb0712..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/origin_data_bean/trace_event_bean.py +++ /dev/null @@ -1,216 +0,0 @@ -from decimal import Decimal - -from compare_backend.utils.common_func import convert_to_float, convert_to_decimal -from compare_backend.utils.constant import Constant - - -class TraceEventBean: - - def __init__(self, event: dict): - self._event = event - self._pid = 0 - self._tid = 0 - self._ts = Decimal(0) - self._dur = 0.0 - self._ph = "" - self._cat = "" - self._name = "" - self._args = {} - self._is_torch_op = False - self.init() - - @property - def pid(self) -> int: - return self._pid - - @property - def tid(self) -> int: - return self._tid - - @property - def dur(self) -> float: - return convert_to_float(self._dur) - - @property - def start_time(self) -> Decimal: - return convert_to_decimal(self._ts) - - @property - def end_time(self) -> Decimal: - return self.start_time + convert_to_decimal(self._dur) - - @property - def name(self) -> str: - return self._name - - @property - def lower_name(self) -> str: - return self._name.lower() - - @property - def lower_cat(self) -> str: - return self._cat.lower() - - @property - def args(self) -> dict: - return self._args - - @property - def id(self) -> str: - return self._event.get("id") - - @property - def stream_id(self) -> int: - return self._args.get('Stream Id') - - @property - def stream(self) -> int: - return self._args.get("stream") - - @property - def task_type(self) -> str: - return self._args.get('Task Type') - - @property - def task_id(self) -> int: - return self._args.get('Task Id') - - @property - def device_id(self) -> int: - try: - return int(self._args.get('Device Id', Constant.INVALID_VALUE)) - except Exception: - return Constant.INVALID_VALUE - - @property - def total_reserved(self): - return self._args.get('Total Reserved', 0) - - @property - def corr_id(self) -> int: - return self._args.get('correlation_id') - - @property - def process_name(self) -> int: - return self._args.get("name", "") - - @property - def bytes_kb(self) -> int: - return self._args.get("Bytes", 0) / Constant.BYTE_TO_KB - - @property - def addr(self) -> str: - return self._args.get("Addr") - - @property - def event(self) -> dict: - return self._event - - @property - def is_torch_op(self) -> bool: - return self._is_torch_op - - @is_torch_op.setter - def is_torch_op(self, value: bool): - self._is_torch_op = value - - def is_m_mode(self) -> bool: - return self._ph == "M" - - def is_x_mode(self) -> bool: - return self._ph == "X" - - def is_flow_start(self) -> bool: - return self._ph == "s" - - def is_flow_end(self) -> bool: - return self._ph == "f" - - def is_enqueue(self) -> bool: - return self.lower_cat == "enqueue" - - def is_dequeue(self) -> bool: - return self.lower_cat == "dequeue" - - def is_process_meta(self) -> bool: - return self.is_m_mode() and self._name == "process_name" - - def is_thread_meta(self) -> bool: - return self.is_m_mode() and self._name == "thread_name" - - def is_communication_op_thread(self) -> bool: - return self._args.get("name", "").find("Communication") != -1 - - def is_hccl_process_name(self) -> bool: - return self.process_name == "HCCL" - - def is_overlap_process_name(self) -> bool: - return self.process_name == "Overlap Analysis" - - def is_npu_process_name(self) -> bool: - return self.process_name == "Ascend Hardware" - - def is_computing_event(self): - return self._name == "Computing" - - def is_comm_not_overlap(self): - return self._name == 'Communication(Not Overlapped)' - - def is_dict(self): - return isinstance(self._event, dict) - - def is_kernel_cat(self): - return self.lower_cat == "kernel" - - def is_nccl_name(self): - return self.lower_name.startswith("nccl") - - def is_kernel_except_nccl(self): - return self.is_kernel_cat() and not self.is_nccl_name() - - def is_memory_event(self): - return self.lower_name == '[memory]' and self.device_id >= 0 - - def is_compute_event(self): - return self.task_type in ('AI_CORE', 'MIX_AIC', 'MIX_AIV', 'AI_CPU', 'AI_VECTOR_CORE', 'FFTS_PLUS') - - def is_sdma_event(self): - return self.task_type in ('SDMA_SQE', 'PCIE_DMA_SQE') - - def is_event_wait(self): - return self.task_type == 'EVENT_WAIT_SQE' - - def is_backward(self): - return any(bwd in self.lower_name for bwd in Constant.BWD_LIST) - - def is_python_function(self): - return self.lower_cat == "python_function" - - def is_optimizer(self): - return self.lower_name.startswith("optimizer") - - def is_fwdbwd(self): - return self.lower_cat == "fwdbwd" - - def is_step_profiler(self): - return self.name.find("ProfilerStep#") != -1 - - def reset_name(self, name): - self._name = name - - def is_conv(self): - return self.name.lower().startswith("aten::conv") - - def is_lccl(self): - return self.lower_name == "kernel_aivec" - - def init(self): - if isinstance(self._event, dict): - self._pid = self._event.get("pid", 0) - self._tid = self._event.get("tid", 0) - self._ts = self._event.get("ts", 0) - self._dur = self._event.get("dur", 0) - self._ph = self._event.get("ph", "") - self._cat = self._event.get("cat", "") - self._name = self._event.get("name", "") - self._args = self._event.get("args", {}) diff --git a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py b/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py deleted file mode 100644 index e5d9bf26e9..0000000000 --- a/profiler/compare_tools/compare_backend/compare_bean/profiling_info.py +++ /dev/null @@ -1,128 +0,0 @@ -from compare_backend.utils.constant import Constant - - -class ProfilingInfo: - TABLE_NAME = Constant.PERFORMANCE_TABLE - HEADERS = [] - OVERHEAD = [] - - def __init__(self, profiling_type: str): - self.profiling_type = profiling_type - self.cube_time = 0.0 - self.other_time = 0.0 - self.vec_time = 0.0 - self.cube_num = 0 - self.vec_num = 0 - self.sdma_num = 0 - self.fa_num_fwd = 0 - self.fa_num_bwd = 0 - self.pa_num = 0 - self.lccl_num = 0 - self.conv_time_fwd = 0.0 - self.conv_time_bwd = 0.0 - self.conv_num_fwd = 0 - self.conv_num_bwd = 0 - self.compute_time = 0.0 - self.communication_not_overlapped = 0.0 - self.wait_time = 0.0 - self.memory_used = 0.0 - self.e2e_time = 0.0 - self.sdma_time = 0.0 - self.scheduling_time = 0.0 - self.fa_time_bwd = 0.0 - self.pa_time = 0.0 - self.lccl_time = 0.0 - self.fa_time_fwd = 0.0 - self.minimal_profiling = False - self.hide_op_details = False - self.is_level0 = False - - def trans_time_to_s(self): - self.cube_time = self.cube_time / 10 ** 6 - self.other_time = self.other_time / 10 ** 6 - self.vec_time = self.vec_time / 10 ** 6 - self.compute_time = self.compute_time / 10 ** 6 - self.communication_not_overlapped = self.communication_not_overlapped / 10 ** 6 - self.wait_time = self.wait_time / 10 ** 6 - self.e2e_time = self.e2e_time / 10 ** 6 - self.sdma_time = self.sdma_time / 10 ** 6 - self.scheduling_time = self.scheduling_time / 10 ** 6 - self.fa_time_bwd = self.fa_time_bwd / 10 ** 6 - self.fa_time_fwd = self.fa_time_fwd / 10 ** 6 - self.pa_time = self.pa_time / 10 ** 6 - self.lccl_time = self.lccl_time / 10 ** 6 - self.conv_time_fwd = self.conv_time_fwd / 10 ** 6 - self.conv_time_bwd = self.conv_time_bwd / 10 ** 6 - - def calculate_other_time(self): - self.other_time = max( - [0, self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd - - self.pa_time - self.vec_time - self.conv_time_fwd - self.conv_time_bwd]) - - def calculate_vec_time(self): - self.vec_time = self.compute_time - self.cube_time - self.fa_time_fwd - self.fa_time_bwd \ - - self.conv_time_fwd - self.conv_time_bwd - - def calculate_schedule_time(self): - self.scheduling_time = (self.e2e_time - self.compute_time - self.lccl_time \ - - self.communication_not_overlapped) - - def update_fa_fwd_info(self, time: float): - self.fa_time_fwd += time - self.fa_num_fwd += 1 - - def update_fa_bwd_info(self, time: float): - self.fa_time_bwd += time - self.fa_num_bwd += 1 - - def update_pa_info(self, time: float): - self.pa_time += time - self.pa_num += 1 - - def update_lccl_info(self, time: float): - self.lccl_time += time - self.lccl_num += 1 - - def update_conv_fwd_info(self, time: float): - self.conv_time_fwd += time - self.conv_num_fwd += 1 - - def update_conv_bwd_info(self, time: float): - self.conv_time_bwd += time - self.conv_num_bwd += 1 - - def update_sdma_info(self, time: float, num: int = 1): - self.sdma_time += time - self.sdma_num += num - - def update_cube_info(self, time: float): - self.cube_time += time - self.cube_num += 1 - - def update_vec_info(self, time: float): - self.vec_time += time - self.vec_num += 1 - - def set_compute_time(self, time: float): - self.compute_time = time - - def update_compute_time(self, time: float): - self.compute_time += time - - def set_e2e_time(self, time: float): - self.e2e_time = time - - def set_comm_not_overlap(self, time: float): - self.communication_not_overlapped = time - - def update_comm_not_overlap(self, time: float): - self.communication_not_overlapped += time - - def update_comm_not_overlap_wait_time(self, time: float): - self.wait_time = time - - def set_memory_used(self, memory: float): - self.memory_used = memory - - def is_not_minimal_profiling(self) -> bool: - return self.profiling_type == Constant.NPU and not self.minimal_profiling diff --git a/profiler/compare_tools/compare_backend/comparison_generator.py b/profiler/compare_tools/compare_backend/comparison_generator.py deleted file mode 100644 index b07170b648..0000000000 --- a/profiler/compare_tools/compare_backend/comparison_generator.py +++ /dev/null @@ -1,44 +0,0 @@ -from compare_backend.generator.detail_performance_generator import DetailPerformanceGenerator -from compare_backend.generator.overall_performance_generator import OverallPerformanceGenerator -from compare_backend.interface.overall_interface import OverallInterface -from compare_backend.profiling_parser.gpu_profiling_parser import GPUProfilingParser -from compare_backend.profiling_parser.npu_profiling_parser import NPUProfilingParser -from compare_backend.utils.constant import Constant -from compare_backend.utils.args_manager import ArgsManager - - -class ComparisonGenerator: - PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} - INTERFACE_DICT = {Constant.OVERALL_COMPARE: OverallInterface} - - def __init__(self, args): - self._args_manager = ArgsManager() - self._args_manager.init(args) - self._data_dict = {} - - def run(self): - self.load_data() - self.generate_compare_result() - - def load_data(self): - self._data_dict[Constant.BASE_DATA] = self.PARSER_DICT.get(self._args_manager.base_profiling_type)( - self._args_manager.args, self._args_manager.base_path_dict).load_data() - self._data_dict[Constant.COMPARISON_DATA] = self.PARSER_DICT.get(self._args_manager.comparison_profiling_type)( - self._args_manager.args, self._args_manager.comparison_path_dict).load_data() - - def generate_compare_result(self): - overall_data = {Constant.BASE_DATA: self._data_dict.get(Constant.BASE_DATA).overall_metrics, - Constant.COMPARISON_DATA: self._data_dict.get(Constant.COMPARISON_DATA).overall_metrics} - generator_list = [OverallPerformanceGenerator(overall_data, self._args_manager.args), - DetailPerformanceGenerator(self._data_dict, self._args_manager.args)] - for generator in generator_list: - generator.start() - for generator in generator_list: - generator.join() - - def run_interface(self, compare_type: str) -> dict: - self.load_data() - interface = self.INTERFACE_DICT.get(compare_type) - if interface: - return interface(self._data_dict).run() - return {} diff --git a/profiler/compare_tools/compare_backend/data_prepare/__init__.py b/profiler/compare_tools/compare_backend/data_prepare/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/data_prepare/module_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/module_data_prepare.py deleted file mode 100644 index 84932366dd..0000000000 --- a/profiler/compare_tools/compare_backend/data_prepare/module_data_prepare.py +++ /dev/null @@ -1,99 +0,0 @@ -import copy -from queue import Queue - -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.profiling_parser.base_profiling_parser import ProfilingResult -from compare_backend.utils.constant import Constant -from compare_backend.utils.module_node import ModuleNode -from compare_backend.utils.tree_builder import TreeBuilder - - -class ModuleDataPrepare: - def __init__(self, profiling_data: ProfilingResult): - self.profiling_data = profiling_data - self._nn_module_list = [] - self._call_function = [] - for event in profiling_data.python_function_data: - if event.lower_name.startswith("nn.module:"): - self._nn_module_list.append(event) - else: - self._call_function.append(event) - self._bwd_dict = {} - self._bwd_pid = self._get_bwd_pid() - - @staticmethod - def update_module_node_info(fwd_root_node, bwd_root_node, func_root_node): - queue = Queue() - queue.put(fwd_root_node) - queue.put(bwd_root_node) - while not queue.empty(): - module_node = queue.get() - module_node.update_torch_op_kernel_list() - call_function = func_root_node.find_module_call(module_node.start_time) - if call_function: - module_node.reset_call_stack(call_function.call_stack) - for sub_module_node in module_node.child_nodes: - queue.put(sub_module_node) - - def build_module_tree(self): - if not self._nn_module_list: - return [None, None] - self._dispatch_torch_op() - event_list = [TraceEventBean({"ts": ts}) for ts in self.profiling_data.kernel_dict.keys()] - self._nn_module_list.extend(event_list) - root_node = TreeBuilder.build_module_tree(self._nn_module_list, self.profiling_data.kernel_dict) - func_root_node = TreeBuilder.build_module_tree(self._call_function, {}) - bwd_module_list = self.get_bwd_module(root_node) - if bwd_module_list: - bwd_module_list.extend(event_list) - bwd_root_node = TreeBuilder.build_module_tree(bwd_module_list, self.profiling_data.kernel_dict) - self.match_torch_op(root_node, bwd_root_node) - self.update_module_node_info(root_node, bwd_root_node, func_root_node) - return [root_node, bwd_root_node] - - def get_bwd_module(self, root_node: ModuleNode): - bwd_module_list = [] - for flow in self.profiling_data.fwdbwd_dict.values(): - start_point = flow.get("start") - end_point = flow.get("end") - if not start_point or not end_point: - continue - end_event = self._bwd_dict.get(end_point.start_time) - if not end_event: - continue - call_module = root_node.find_module_call(start_point.start_time) - if call_module: - bwd_event = copy.deepcopy(end_event) - bwd_event.reset_name(f"[ BACKWARD ]{call_module.module_name}") - bwd_module_list.append(bwd_event) - return bwd_module_list - - def match_torch_op(self, fwd_root_node, bwd_root_node): - torch_op_list = sorted(self.profiling_data.torch_op_data, key=lambda x: x.start_time) - for torch_op in torch_op_list: - if torch_op.is_optimizer(): - continue - if torch_op.is_step_profiler(): - continue - matched_module = fwd_root_node.find_module_call(torch_op.start_time) - if matched_module: - matched_module.find_torch_op_call(torch_op) - continue - matched_module = bwd_root_node.find_module_call(torch_op.start_time) - if matched_module: - matched_module.find_torch_op_call(torch_op) - - def _dispatch_torch_op(self): - for torch_op in self.profiling_data.torch_op_data: - if torch_op.is_optimizer(): - self._nn_module_list.append(torch_op) - continue - if torch_op.pid == self._bwd_pid: - self._bwd_dict[torch_op.start_time] = torch_op - - def _get_bwd_pid(self): - for flow in self.profiling_data.fwdbwd_dict.values(): - end_point = flow.get("end") - if end_point: - return end_point.pid - return Constant.INVALID_VALUE diff --git a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py b/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py deleted file mode 100644 index fdce23c6ab..0000000000 --- a/profiler/compare_tools/compare_backend/data_prepare/operator_data_prepare.py +++ /dev/null @@ -1,19 +0,0 @@ -from compare_backend.profiling_parser.base_profiling_parser import ProfilingResult -from compare_backend.utils.tree_builder import TreeBuilder - - -class OperatorDataPrepare: - def __init__(self, profiling_data: ProfilingResult): - self.profiling_data = profiling_data - - def get_top_layer_ops(self) -> any: - root_node = TreeBuilder.build_tree(self.profiling_data.torch_op_data, self.profiling_data.kernel_dict, - self.profiling_data.memory_list) - level1_child_nodes = root_node.child_nodes - result_data = [] - for level1_node in level1_child_nodes: - if level1_node.is_step_profiler(): - result_data.extend(level1_node.child_nodes) - else: - result_data.append(level1_node) - return result_data diff --git a/profiler/compare_tools/compare_backend/disaggregate/__init__.py b/profiler/compare_tools/compare_backend/disaggregate/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py b/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py deleted file mode 100644 index c89e845193..0000000000 --- a/profiler/compare_tools/compare_backend/disaggregate/overall_perf_interface.py +++ /dev/null @@ -1,34 +0,0 @@ -from common_func.path_manager import PathManager -from compare_backend.profiling_parser.gpu_profiling_parser import GPUProfilingParser -from compare_backend.profiling_parser.npu_profiling_parser import NPUProfilingParser -from compare_backend.utils.args_manager import ArgsManager -from compare_backend.utils.compare_args import Args -from compare_backend.utils.constant import Constant - - -class OverallPerfInterface: - PARSER_DICT = {Constant.NPU: NPUProfilingParser, Constant.GPU: GPUProfilingParser} - - def __init__(self, profiling_path: str): - self._profiling_path = profiling_path - self._profiling_path_dict = {} - self._result_data = {} - - def run(self): - self._check_path() - self._load_data() - self._generate_result() - return self._result_data - - def _check_path(self): - profiling_path = PathManager.get_realpath(self._profiling_path) - self._profiling_path_dict = ArgsManager().parse_profiling_path(profiling_path) - - def _load_data(self): - args = Args(enable_profiling_compare=True) - profiling_type = self._profiling_path_dict.get(Constant.PROFILING_TYPE, Constant.NPU) - self._profiling_data = self.PARSER_DICT.get(profiling_type)(args, self._profiling_path_dict).load_data() - - def _generate_result(self): - overall_data = self._profiling_data.overall_metrics - self._result_data = getattr(overall_data, "__dict__", {}) diff --git a/profiler/compare_tools/compare_backend/generator/__init__.py b/profiler/compare_tools/compare_backend/generator/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/generator/base_generator.py b/profiler/compare_tools/compare_backend/generator/base_generator.py deleted file mode 100644 index e77071b599..0000000000 --- a/profiler/compare_tools/compare_backend/generator/base_generator.py +++ /dev/null @@ -1,23 +0,0 @@ -from abc import ABC, abstractmethod -from collections import OrderedDict -from multiprocessing import Process - - -class BaseGenerator(Process, ABC): - def __init__(self, profiling_data_dict: dict, args: any): - super(BaseGenerator, self).__init__() - self._profiling_data_dict = profiling_data_dict - self._args = args - self._result_data = OrderedDict() - - def run(self): - self.compare() - self.generate_view() - - @abstractmethod - def compare(self): - raise NotImplementedError("Function compare need to be implemented.") - - @abstractmethod - def generate_view(self): - raise NotImplementedError("Function generate_view need to be implemented.") diff --git a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py b/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py deleted file mode 100644 index 5b93d888a4..0000000000 --- a/profiler/compare_tools/compare_backend/generator/detail_performance_generator.py +++ /dev/null @@ -1,161 +0,0 @@ -import os -from collections import deque -from datetime import datetime -from queue import Queue - -from compare_backend.comparator.communication_comparator import CommunicationComparator -from compare_backend.comparator.module_comparetor import ModuleComparator -from compare_backend.comparator.module_statistic_comparator import ModuleStatisticComparator -from compare_backend.comparator.operator_comparator import OperatorComparator -from compare_backend.comparator.operator_statistic_comparator import OperatorStatisticComparator -from compare_backend.compare_bean.communication_bean import CommunicationBean -from compare_backend.compare_bean.memory_compare_bean import MemoryCompareBean -from compare_backend.compare_bean.memory_statistic_bean import MemoryStatisticBean -from compare_backend.compare_bean.module_compare_bean import ModuleCompareBean -from compare_backend.compare_bean.module_statistic_bean import ModuleStatisticBean -from compare_backend.compare_bean.operator_compare_bean import OperatorCompareBean -from compare_backend.compare_bean.operator_statistic_bean import OperatorStatisticBean -from compare_backend.data_prepare.module_data_prepare import ModuleDataPrepare -from compare_backend.data_prepare.operator_data_prepare import OperatorDataPrepare -from compare_backend.generator.base_generator import BaseGenerator -from compare_backend.utils.common_func import longest_common_subsequence_matching -from compare_backend.utils.constant import Constant -from compare_backend.utils.module_node import ModuleNode -from compare_backend.utils.name_function import NameFunction -from compare_backend.utils.torch_op_node import TorchOpNode -from compare_backend.view.excel_view import ExcelView - - -class DetailPerformanceGenerator(BaseGenerator): - def __init__(self, profiling_data_dict: dict, args: any): - super().__init__(profiling_data_dict, args) - - @classmethod - def _match_none_subsequence(cls, base_ops: list, comparison_ops: list) -> list: - op_compare_result = [[op, None] for op in iter(base_ops)] - op_compare_result.extend([[None, op] for op in iter(comparison_ops)]) - return op_compare_result - - def compare(self): - if self._args.enable_operator_compare or self._args.enable_memory_compare or \ - self._args.enable_communication_compare: - print("[INFO] Start to compare performance detail data, please wait.") - comparator_list = self._create_comparator() - for comparator in comparator_list: - self._result_data.update(comparator.generate_data()) - - def generate_view(self): - if not self._result_data: - return - dir_path = self._args.output_path if self._args.output_path else "./" - file_name = "performance_comparison_result_{}.xlsx".format(datetime.utcnow().strftime("%Y%m%d%H%M%S")) - result_file_path = os.path.realpath(os.path.join(dir_path, file_name)) - ExcelView(self._result_data, result_file_path, self._args).generate_view() - print(f"[INFO] The comparison result file has been generated: {result_file_path}") - - def _create_comparator(self): - comparator_list = [] - - op_compare_result = [] - if self._args.enable_operator_compare: - module_compare_result = self.match_nn_module() if self._profiling_data_dict.get( - Constant.BASE_DATA).python_function_data and self._profiling_data_dict.get( - Constant.COMPARISON_DATA).python_function_data else [] - if not module_compare_result: - op_compare_result = self.match_torch_op() - - if self._args.enable_memory_compare and not op_compare_result: - op_compare_result = self.match_torch_op() - - if self._args.enable_communication_compare: - communication_data = { - Constant.BASE_DATA: self._profiling_data_dict.get(Constant.BASE_DATA).communication_dict, - Constant.COMPARISON_DATA: self._profiling_data_dict.get(Constant.COMPARISON_DATA).communication_dict} - comparator_list.append(CommunicationComparator(communication_data, CommunicationBean)) - - if self._args.enable_operator_compare: - if module_compare_result: - comparator_list.append(ModuleStatisticComparator(module_compare_result, ModuleStatisticBean)) - if not self._args.disable_details: - comparator_list.append(ModuleComparator(module_compare_result, ModuleCompareBean)) - else: - comparator_list.append(OperatorStatisticComparator(op_compare_result, OperatorStatisticBean)) - if not self._args.disable_details: - comparator_list.append(OperatorComparator(op_compare_result, OperatorCompareBean)) - if self._args.enable_memory_compare: - comparator_list.append(OperatorStatisticComparator(op_compare_result, MemoryStatisticBean)) - if not self._args.disable_details: - comparator_list.append(OperatorComparator(op_compare_result, MemoryCompareBean)) - return comparator_list - - def match_torch_op(self) -> list: - base_ops = OperatorDataPrepare(self._profiling_data_dict.get(Constant.BASE_DATA)).get_top_layer_ops() - comparison_ops = OperatorDataPrepare( - self._profiling_data_dict.get(Constant.COMPARISON_DATA)).get_top_layer_ops() - if not base_ops and not comparison_ops: - return [] - name_func = NameFunction(self._args).get_name_func() - op_compare_result = longest_common_subsequence_matching(base_ops, comparison_ops, name_func) \ - if not self._args.disable_details else self._match_none_subsequence(base_ops, comparison_ops) - if self._args.max_kernel_num is not None: - op_compare_result = self._drill_down(op_compare_result, name_func) - return op_compare_result - - def _drill_down(self, compare_result_data: list, name_func: any) -> list: - drill_down_result = [] - compare_result_data.reverse() - op_deque = deque(compare_result_data) - while op_deque: - match_data = op_deque.pop() - base_op = match_data[0] if match_data[0] else TorchOpNode() - comparison_op = match_data[1] if match_data[1] else TorchOpNode() - if not base_op.child_nodes or not comparison_op.child_nodes: - drill_down_result.append(match_data) - continue - if max(base_op.kernel_num, comparison_op.kernel_num) <= self._args.max_kernel_num: - drill_down_result.append(match_data) - continue - match_list = longest_common_subsequence_matching(base_op.child_nodes, - comparison_op.child_nodes, - name_func) \ - if not self._args.disable_details else self._match_none_subsequence(base_op.child_nodes, - comparison_op.child_nodes) - match_list.reverse() - for data in match_list: - op_deque.append(data) - - return drill_down_result - - def match_nn_module(self) -> list: - module_compare_result = [] - base_root_node = ModuleDataPrepare(self._profiling_data_dict.get(Constant.BASE_DATA)).build_module_tree() - comparison_root_node = ModuleDataPrepare( - self._profiling_data_dict.get(Constant.COMPARISON_DATA)).build_module_tree() - for index, base_node in enumerate(base_root_node): - comparison_node = comparison_root_node[index] if index < len(comparison_root_node) else None - if not base_node or not comparison_node: - continue - module_compare_result.extend(self._matching_all_modules(base_node, comparison_node)) - return module_compare_result - - def _matching_all_modules(self, base_node: ModuleNode, comparison_node: ModuleNode): - all_matched_modules = [] - matched_queue = Queue() - matched_queue.put([base_node, comparison_node]) - while not matched_queue.empty(): - matched_base_node, matched_comparison_node = matched_queue.get() - matched_node_list = self._matching_common_subsequence(matched_base_node, matched_comparison_node) - all_matched_modules.extend(matched_node_list) - for matched_node in matched_node_list: - matched_queue.put(matched_node) - return all_matched_modules - - def _matching_common_subsequence(self, base_node: ModuleNode, comparison_node: ModuleNode): - base_modules = base_node.child_nodes if base_node else [] - comparison_modules = comparison_node.child_nodes if comparison_node else [] - if not base_modules and not comparison_modules: - return [] - name_func = NameFunction(self._args).get_module_name - result = longest_common_subsequence_matching(base_modules, comparison_modules, name_func) \ - if not self._args.disable_details else self._match_none_subsequence(base_modules, comparison_modules) - return result diff --git a/profiler/compare_tools/compare_backend/generator/overall_performance_generator.py b/profiler/compare_tools/compare_backend/generator/overall_performance_generator.py deleted file mode 100644 index 9fe31d0ea5..0000000000 --- a/profiler/compare_tools/compare_backend/generator/overall_performance_generator.py +++ /dev/null @@ -1,19 +0,0 @@ -from compare_backend.comparator.overall_performance_comparator import OverallPerformanceComparator -from compare_backend.compare_bean.profiling_info import ProfilingInfo -from compare_backend.generator.base_generator import BaseGenerator -from compare_backend.view.screen_view import ScreenView - - -class OverallPerformanceGenerator(BaseGenerator): - def __init__(self, profiling_data_dict: dict, args: any): - super().__init__(profiling_data_dict, args) - - def compare(self): - if not self._args.enable_profiling_compare: - return - self._result_data = OverallPerformanceComparator(self._profiling_data_dict, ProfilingInfo).generate_data() - - def generate_view(self): - if not self._result_data: - return - ScreenView(self._result_data).generate_view() diff --git a/profiler/compare_tools/compare_backend/interface/__init__.py b/profiler/compare_tools/compare_backend/interface/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/interface/overall_interface.py b/profiler/compare_tools/compare_backend/interface/overall_interface.py deleted file mode 100644 index fb549007f6..0000000000 --- a/profiler/compare_tools/compare_backend/interface/overall_interface.py +++ /dev/null @@ -1,13 +0,0 @@ -from compare_backend.comparator.overall_performance_comparator import OverallPerformanceComparator -from compare_backend.compare_bean.profiling_info import ProfilingInfo -from compare_backend.utils.constant import Constant - - -class OverallInterface: - def __init__(self, overall_data: dict): - self._overall_data = overall_data - - def run(self): - data = {Constant.BASE_DATA: self._overall_data.get(Constant.BASE_DATA).overall_metrics, - Constant.COMPARISON_DATA: self._overall_data.get(Constant.COMPARISON_DATA).overall_metrics} - return OverallPerformanceComparator(data, ProfilingInfo).generate_data() diff --git a/profiler/compare_tools/compare_backend/profiling_parser/__init__.py b/profiler/compare_tools/compare_backend/profiling_parser/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py deleted file mode 100644 index 2127ff5e75..0000000000 --- a/profiler/compare_tools/compare_backend/profiling_parser/base_profiling_parser.py +++ /dev/null @@ -1,211 +0,0 @@ -from abc import abstractmethod, ABC -from decimal import Decimal - -from compare_backend.compare_bean.origin_data_bean.compare_event import KernelEvent, MemoryEvent -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.compare_bean.profiling_info import ProfilingInfo -from compare_backend.utils.constant import Constant -from compare_backend.utils.file_reader import FileReader - - -class ProfilingResult: - - def __init__(self, profiling_type): - self._profiling_type = profiling_type - self.torch_op_data = [] - self.kernel_dict = {} - self.memory_list = [] - self.communication_dict = {} - self.overall_metrics = ProfilingInfo(profiling_type) - self.python_function_data = [] - self.fwdbwd_dict = {} - - def update_torch_op_data(self, event: TraceEventBean): - event.is_torch_op = True - self.torch_op_data.append(event) - - def update_python_function_data(self, event: TraceEventBean): - self.python_function_data.append(event) - - def update_fwdbwd_data(self, flow_type: str, event: TraceEventBean): - self.fwdbwd_dict.setdefault(event.id, {})[flow_type] = event - - def update_kernel_dict(self, start_time: Decimal, kernel_event: TraceEventBean): - self.kernel_dict.setdefault(start_time, []).append(KernelEvent(kernel_event, self._profiling_type)) - - def update_memory_list(self, memory_data: dict): - self.memory_list.append(MemoryEvent(memory_data)) - - def update_communication_dict(self, comm_name: str, comm_dur: float): - self.communication_dict.setdefault(comm_name, {}).setdefault("comm_list", []).append(comm_dur) - - def update_comm_task_data(self, comm_name: str, task_event: TraceEventBean): - self.communication_dict.setdefault(comm_name, {}).setdefault("comm_task", {}).setdefault( - task_event.name, []).append(task_event.dur) - - -class BaseProfilingParser(ABC): - - def __init__(self, args: any, path_dict: dict): - self._args = args - self._profiling_type = path_dict.get(Constant.PROFILING_TYPE) - self._profiling_path = path_dict.get(Constant.PROFILING_PATH) - self._json_path = path_dict.get(Constant.TRACE_PATH) - self._trace_events = [] if self._profiling_path == Constant.NPU else {} - self._enable_profiling_compare = args.enable_profiling_compare - self._enable_operator_compare = args.enable_operator_compare - self._enable_memory_compare = args.enable_memory_compare - self._enable_communication_compare = args.enable_communication_compare - self._dispatch_func = self._get_dispatch_func() - self._result_data = ProfilingResult(self._profiling_type) - self._memory_events = [] - self._flow_dict = {} - self._fwdbwd_dict = {} - self._all_kernels = {} - self._comm_task_list = [] - self._comm_list = [] - self._read_trace_event() - self._cur_func_index = 0 - - @abstractmethod - def _update_memory_list(self): - raise NotImplementedError("Function _update_memory_list need to be implemented.") - - @abstractmethod - def _update_overall_metrics(self): - raise NotImplementedError("Function _update_overall_metrics need to be implemented.") - - @abstractmethod - def _is_kernel_event(self, event: TraceEventBean): - raise NotImplementedError("Function _is_kernel_event need to be implemented.") - - @abstractmethod - def _is_flow_event(self, event: TraceEventBean): - raise NotImplementedError("Function _is_flow_event need to be implemented.") - - @abstractmethod - def _is_torch_op_event(self, event: TraceEventBean): - raise NotImplementedError("Function _is_torch_op_event need to be implemented.") - - @abstractmethod - def _get_dispatch_func(self): - raise NotImplementedError("Function _get_dispatch_func need to be implemented.") - - def load_data(self) -> ProfilingResult: - self._dispatch_events() - self._update_kernel_dict() - self._update_communication_dict() - if self._enable_memory_compare: - self._update_memory_list() - if self._enable_profiling_compare: - self._update_overall_metrics() - self._check_result_data() - return self._result_data - - def _dispatch_events(self): - if not self._dispatch_func: - return - index_list = list(range(0, len(self._dispatch_func))) * 2 - for event in self._trace_events: - if not event.is_dict(): - continue - if event.is_m_mode(): - continue - self.__picking_event(event, index_list) - - def __picking_event(self, event: TraceEventBean, index_list: list): - for index in range(self._cur_func_index, self._cur_func_index + len(self._dispatch_func)): - func_index = index_list[index] - res = self._dispatch_func[func_index](event) - if res: - self._cur_func_index = func_index - break - - def _picking_torch_op_event(self, event: TraceEventBean): - if self._is_torch_op_event(event): - self._result_data.update_torch_op_data(event) - return True - return False - - def _picking_kernel_event(self, event: TraceEventBean): - if self._is_kernel_event(event): - self._all_kernels[f"{event.pid}-{event.tid}-{event.start_time}"] = event - return True - return False - - def _picking_flow_event(self, event: TraceEventBean): - if self._is_flow_event(event): - if event.is_flow_start(): - self._flow_dict.setdefault(event.id, {})["start"] = event - elif event.is_flow_end(): - self._flow_dict.setdefault(event.id, {})["end"] = event - return True - return False - - def _picking_python_function_event(self, event: TraceEventBean): - if event.is_python_function(): - self._result_data.update_python_function_data(event) - return True - return False - - def _picking_fwdbwd_flow_event(self, event: TraceEventBean): - if event.is_fwdbwd(): - if event.is_flow_start(): - self._result_data.update_fwdbwd_data("start", event) - elif event.is_flow_end(): - self._result_data.update_fwdbwd_data("end", event) - return True - return False - - def _update_kernel_dict(self): - if self._profiling_type == Constant.NPU: - for comm in self._comm_list: - self._all_kernels[f"{comm.pid}-{comm.tid}-{comm.start_time}"] = comm - for flow_event in self._flow_dict.values(): - start_event = flow_event.get("start") - end_event = flow_event.get("end") - if not start_event or not end_event: - continue - kernel_event = self._all_kernels.get(f"{end_event.pid}-{end_event.tid}-{end_event.start_time}") - if not kernel_event: - continue - self._result_data.update_kernel_dict(start_event.start_time, kernel_event) - - def _update_communication_dict(self): - if self._profiling_type == Constant.GPU: - self._comm_list = list(filter(lambda x: x.is_nccl_name(), self._all_kernels.values())) - self._comm_list.sort(key=lambda x: x.start_time) - self._comm_task_list.sort(key=lambda x: x.start_time) - task_index = 0 - for communication_op in self._comm_list: - name_list = communication_op.lower_name.split("_") - if len(name_list) < 2: - continue - comm_name = name_list[1] - self._result_data.update_communication_dict(comm_name, communication_op.dur) - while task_index < len(self._comm_task_list): - task_event = self._comm_task_list[task_index] - if task_event.start_time < communication_op.start_time: - task_index += 1 - continue - if task_event.start_time > communication_op.end_time: - break - self._result_data.update_comm_task_data(comm_name, task_event) - task_index += 1 - - def _check_result_data(self): - if self._enable_operator_compare or self._enable_memory_compare: - if not self._result_data.torch_op_data: - print(f"[WARNING] Can't find any torch op in the file: {self._profiling_path}") - if self._enable_operator_compare and not self._result_data.kernel_dict: - print(f"[WARNING] Can't find any flow event in the file: {self._profiling_path}") - if self._enable_memory_compare and not self._result_data.memory_list: - print(f"[WARNING] Can't find any memory event in the file: {self._profiling_path}") - if self._enable_communication_compare and not self._result_data.communication_dict: - print(f"[WARNING] Can't find any communication op in the file: {self._profiling_path}") - - def _read_trace_event(self): - try: - self._trace_events = FileReader.read_trace_file(self._json_path) - except Exception: - print(f"[ERROR] Failed to read the file: {self._json_path}") diff --git a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py deleted file mode 100644 index c4089aec9b..0000000000 --- a/profiler/compare_tools/compare_backend/profiling_parser/gpu_profiling_parser.py +++ /dev/null @@ -1,189 +0,0 @@ -import sys -from collections import defaultdict, Counter - -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.profiling_parser.base_profiling_parser import BaseProfilingParser -from compare_backend.utils.constant import Constant - - -class GPUProfilingParser(BaseProfilingParser): - CUBE_MARK = ['gemm', 'conv', 'cutlass', 'wgrad'] - FA_MARK_LIST = [['fmha', 'kernel'], ['flash', 'kernel'], ['attention', 'kernel']] - SDMA_MARK_LIST = ['htod', 'dtod', 'dtoh', 'memset (device)'] - FLOW_CAT = ("async_gpu", "async_cpu_to_gpu", "ac2g", "async") - TORCH_OP_CAT = ("cpu_op", "user_annotation", "cuda_runtime", "operator", "runtime") - - def __init__(self, args: any, path_dict: dict): - super().__init__(args, path_dict) - self._trace_events = [TraceEventBean(event) for event in self._trace_events.get("traceEvents", [])] - self._flow_cat = (args.gpu_flow_cat,) if args.gpu_flow_cat else self.FLOW_CAT - self._compute_stream_id = self._infer_compute_stream_id() - self._marks = defaultdict(int) - self._aten_index = 0 - - @classmethod - def __is_flash_attention(cls, name: str): - for fa_mark in cls.FA_MARK_LIST: - if not [1 for mark in fa_mark if mark not in name.lower()]: - return True - return False - - @classmethod - def __is_sdma_time(cls, name: str): - for mark in cls.SDMA_MARK_LIST: - if mark in name.lower(): - return True - return False - - def _update_memory_list(self): - if not self._enable_memory_compare: - return - self._memory_events.sort(key=lambda x: x.start_time) - addr_dict = {} - for memory_event in self._memory_events: - allocate_bytes = memory_event.bytes_kb - record = addr_dict.get(memory_event.addr) - if allocate_bytes > 0: - if record: - self._result_data.update_memory_list(record) - addr_dict[memory_event.addr] = {Constant.SIZE: allocate_bytes, - Constant.TS: memory_event.start_time, - Constant.ALLOCATION_TIME: memory_event.start_time} - if allocate_bytes < 0 and record: - if abs(allocate_bytes) == record.get(Constant.SIZE): - record[Constant.RELEASE_TIME] = memory_event.start_time - self._result_data.update_memory_list(record) - del addr_dict[memory_event.addr] - for record in addr_dict.values(): - self._result_data.update_memory_list(record) - - def _update_overall_metrics(self): - self._calculate_performance_time() - self.__parse_memory_reserved() - self._result_data.overall_metrics.calculate_vec_time() - self._result_data.overall_metrics.calculate_schedule_time() - self._result_data.overall_metrics.trans_time_to_s() - - def _calculate_performance_time(self): - min_ts = sys.float_info.max - max_ts = sys.float_info.min - self._trace_events.sort(key=lambda x: x.start_time) - aten_events = list(filter(lambda x: x.name.startswith("aten::"), self._trace_events)) - flow_dict_new = {} - for flow_event in self._flow_dict.values(): - start_event = flow_event.get("start") - end_event = flow_event.get("end") - if start_event and end_event: - flow_dict_new[end_event.start_time] = start_event.start_time - for event in self._trace_events: - if event.stream: - min_ts = min(event.start_time, min_ts) - max_ts = max(event.end_time, max_ts) - if event.stream == self._compute_stream_id and self.__is_sdma_time(event.name): - self._result_data.overall_metrics.update_sdma_info(event.dur) - continue - if not event.is_kernel_cat(): - continue - self.__add_marks(event) - if event.is_nccl_name(): - continue - self.__add_compute_time(event, aten_events, flow_dict_new) - self._aten_events = None - self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts)) - self.__add_compute_and_overlap_time() - - def __add_compute_and_overlap_time(self): - compute_time = len([_ for _, value in self._marks.items() if value < 0]) - communication_not_overlapped = len([_ for _, value in self._marks.items() if value > 0]) - self._result_data.overall_metrics.set_compute_time(compute_time) - self._result_data.overall_metrics.set_comm_not_overlap(communication_not_overlapped) - - def __add_marks(self, event: TraceEventBean): - if event.is_nccl_name(): - for timestep in range(int(event.start_time + 1), int(event.end_time + 1)): - self._marks[str(timestep)] += 1 # mark this timestep in communication stream - else: - for timestep in range(int(event.start_time + 1), int(event.end_time + 1)): - self._marks[str(timestep)] += -100 # mark this timestep in compute stream - - def __add_compute_time(self, event: TraceEventBean, aten_events: list, flow_dict_new: dict): - if self.__is_flash_attention(event.name): - if event.is_backward(): - self._result_data.overall_metrics.update_fa_bwd_info(event.dur) - else: - self._result_data.overall_metrics.update_fa_fwd_info(event.dur) - elif any(cube_mark in event.lower_name for cube_mark in self.CUBE_MARK): - is_conv = self.__check_is_conv(event, aten_events, flow_dict_new) - if is_conv == "conv_fwd": - self._result_data.overall_metrics.update_conv_fwd_info(event.dur) - elif is_conv == "conv_bwd": - self._result_data.overall_metrics.update_conv_bwd_info(event.dur) - else: - self._result_data.overall_metrics.update_cube_info(event.dur) - else: - self._result_data.overall_metrics.update_vec_info(event.dur) - - def __check_is_conv(self, event: TraceEventBean, aten_events: list, flow_dict_new: dict) -> str: - flow_start_time = flow_dict_new.get(event.start_time) - if not flow_start_time: - return "" - aten_len = len(aten_events) - while self._aten_index < aten_len: - cur_aten = aten_events[self._aten_index] - if cur_aten.end_time < flow_start_time: - self._aten_index += 1 - continue - if cur_aten.start_time < flow_start_time: - if cur_aten.is_conv(): - return "conv_bwd" if cur_aten.is_backward() else "conv_fwd" - return "" - - def _picking_memory_event(self, event: TraceEventBean): - if event.is_memory_event(): - self._memory_events.append(event) - return True - return False - - def _is_torch_op_event(self, event: TraceEventBean): - return event.lower_cat in self.TORCH_OP_CAT - - def _is_kernel_event(self, event: TraceEventBean): - return event.is_kernel_cat() - - def _is_flow_event(self, event: TraceEventBean): - return event.lower_cat in self._flow_cat - - def __parse_memory_reserved(self): - if not self._memory_events: - print("[INFO] Gpu profiling data doesn't contain memory info.") - return - memory_used = max([event.total_reserved for event in self._memory_events]) / 1024 ** 3 - self._result_data.overall_metrics.set_memory_used(memory_used) - - def _get_dispatch_func(self): - func_set = set() - if self._enable_memory_compare or self._enable_operator_compare: - func_set.add(self._picking_torch_op_event) - if self._enable_communication_compare: - func_set.add(self._picking_kernel_event) - if self._enable_operator_compare: - func_set.add(self._picking_python_function_event) - func_set.add(self._picking_fwdbwd_flow_event) - if self._enable_operator_compare or self._args.max_kernel_num: - func_set.add(self._picking_kernel_event) - func_set.add(self._picking_flow_event) - if self._enable_memory_compare or self._enable_profiling_compare: - func_set.add(self._picking_memory_event) - return list(func_set) - - def _infer_compute_stream_id(self): - if not self._enable_profiling_compare: - return -1 - kernel_stream_ids = [] - for event in self._trace_events: - if event.is_kernel_except_nccl() and event.stream: - kernel_stream_ids.append(event.stream) - if not kernel_stream_ids: - raise RuntimeError('[ERROR] The profiling data does not contain kernel running data.') - counter = Counter(kernel_stream_ids) - return counter.most_common(1)[0][0] diff --git a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py b/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py deleted file mode 100644 index 70ce44b44e..0000000000 --- a/profiler/compare_tools/compare_backend/profiling_parser/npu_profiling_parser.py +++ /dev/null @@ -1,323 +0,0 @@ -import os -import sys -from math import ceil - -from compare_backend.compare_bean.origin_data_bean.kernel_details_bean import KernelDetailsBean -from compare_backend.compare_bean.origin_data_bean.memory_record_bean import MemoryRecordBean -from compare_backend.compare_bean.origin_data_bean.operator_memory_bean import OperatorMemoryBean -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.profiling_parser.base_profiling_parser import BaseProfilingParser -from compare_backend.utils.constant import Constant -from compare_backend.utils.file_reader import FileReader - - -class NPUProfilingParser(BaseProfilingParser): - FLOW_CAT = "async_npu" - TORCH_OP_CAT = "cpu_op" - ACTIVE_CPU = "ProfilerActivity.CPU" - LEVEL_0 = "Level0" - - def __init__(self, args: any, path_dict: dict): - super().__init__(args, path_dict) - self._operator_memory_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "operator_memory.csv") - self._memory_record_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "memory_record.csv") - self._kernel_detail_path = os.path.join(path_dict.get(Constant.ASCEND_OUTPUT_PATH, ""), "kernel_details.csv") - self._info_json_path = path_dict.get(Constant.INFO_JSON_PATH, "") - self._trace_events = [TraceEventBean(event) for event in self._trace_events] - self._hccl_pid = None - self._hccl_op_tid_list = [] - self._kernel_pid = None - self._overlap_pid = None - self._enqueue_dict = {} - self._dequeue_data = [] - self._overlap_analysis = [] - self._dispatch_func = self._get_dispatch_func() - self._filter_meta_id() - - def _get_dispatch_func(self): - func_list = set() - if self._enable_memory_compare or self._enable_operator_compare: - func_list.add(self._picking_torch_op_event) - if self._enable_operator_compare or self._args.max_kernel_num: - func_list.add(self._picking_kernel_event) - func_list.add(self._picking_flow_event) - if self._enable_operator_compare: - func_list.add(self._picking_python_function_event) - func_list.add(self._picking_fwdbwd_flow_event) - if self._enable_memory_compare: - func_list.add(self._picking_task_queue_data) - if self._enable_communication_compare: - func_list.add(self._picking_hccl_event) - if self._enable_profiling_compare: - func_list.add(self._picking_overlap_analysis_data) - func_list.add(self._picking_kernel_event) - func_list.add(self._picking_hccl_event) - return list(func_list) - - def _update_memory_list(self): - try: - memory_data = FileReader.read_csv_file(self._operator_memory_path, OperatorMemoryBean) - except FileNotFoundError: - print("[WARNING] The file operator_memory.csv does not exist.") - return - except Exception: - print("[ERROR] Failed to read operator_memory.csv.") - return - if memory_data: - self._dequeue_data.sort(key=lambda x: x.start_time) - for data in memory_data: - if not data.allocation_time: - continue - if data.is_cann_op(): - matched_corr_id = self.__match_dequeue_data(data.allocation_time) - if matched_corr_id == Constant.INVALID_VALUE: - continue - self._result_data.update_memory_list({Constant.SIZE: data.size, - Constant.TS: self._enqueue_dict.get(matched_corr_id, 0), - Constant.NAME: data.name, - Constant.ALLOCATION_TIME: data.allocation_time, - Constant.RELEASE_TIME: data.release_time}) - else: - self._result_data.update_memory_list({Constant.SIZE: data.size, - Constant.TS: data.allocation_time, - Constant.ALLOCATION_TIME: data.allocation_time, - Constant.RELEASE_TIME: data.release_time}) - - def __match_dequeue_data(self, ts_time: float) -> int: - if not self._dequeue_data: - return Constant.INVALID_VALUE - left, right = 0, len(self._dequeue_data) - 1 - while right > left: - mid = left + ceil((right - left) / 2) - if ts_time >= self._dequeue_data[mid].start_time: - left = mid - else: - right = mid - 1 - return self._dequeue_data[left].corr_id if self._dequeue_data[left].start_time <= ts_time <= \ - self._dequeue_data[left].end_time else Constant.INVALID_VALUE - - def _update_overall_metrics(self): - self.__parse_info_json() - self.__parse_mem_csv() - self.__parse_kernel_csv() - self.__add_lccl_time() - self.__add_sdma_time() - self.__add_overlap_analysis_time() - self._picking_notify_wait_event_and_not_overlap_event() - self.__add_overlap_wait_time() - self._result_data.overall_metrics.calculate_other_time() - self._result_data.overall_metrics.calculate_schedule_time() - self._result_data.overall_metrics.trans_time_to_s() - - def _picking_notify_wait_event_and_not_overlap_event(self): - self.notify_event_cache = [] - self._not_overlaped_commu_event = [] - for event in self._comm_task_list: - if event.name == 'Notify_Wait' and event.args.get('rdma_type', 0) != 'RDMA_PAYLOAD_CHECK' \ - and event.args.get('rdma_type', 0) != 'RDMA_PAYLOAD_ACK': - self.notify_event_cache.append(event) - for event in self._overlap_analysis: - if event.is_comm_not_overlap(): - self._not_overlaped_commu_event.append(event) - self._not_overlaped_commu_event.sort(key=lambda x: x.start_time) - - def __add_overlap_wait_time(self): - notify_wait_event_dict = dict() - for notify_event in self.notify_event_cache: - if notify_event.tid in notify_wait_event_dict: - notify_wait_event_dict[notify_event.tid].append(notify_event) - else: - notify_wait_event_dict[notify_event.tid] = [notify_event] - - if self._result_data.overall_metrics.is_level0: - return - - total_time = 0 - for commu_event in self._not_overlaped_commu_event: - wait_time_list = [0] - commu_event_start_time = float(commu_event.start_time) - commu_event_end_time = float(commu_event.start_time) + commu_event.dur - - for plane_id, events in notify_wait_event_dict.items(): - wait_time = 0 - idx = 0 - for notify_event in events: - notify_event_start_time = float(notify_event.start_time) - notify_event_end_time = float(notify_event.start_time) + notify_event.dur - if notify_event_start_time < commu_event_start_time and notify_event_end_time > \ - commu_event_end_time: - wait_time = commu_event_end_time - commu_event_start_time - break - elif notify_event_start_time < commu_event_start_time <= notify_event_end_time <= \ - commu_event_end_time: - wait_time += notify_event_end_time - commu_event_start_time - idx += 1 - elif commu_event_start_time <= notify_event_start_time <= commu_event_end_time < \ - notify_event_end_time: - wait_time += commu_event_end_time - notify_event_start_time - break - elif notify_event_start_time >= commu_event_start_time and notify_event_end_time <= \ - commu_event_end_time: - wait_time += notify_event_end_time - notify_event_start_time - idx += 1 - elif notify_event_end_time < commu_event_start_time: - idx += 1 - else: - break - - wait_time_list.append(wait_time) - notify_wait_event_dict[plane_id] = notify_wait_event_dict[plane_id][idx:] - total_time += max(wait_time_list) - self._result_data.overall_metrics.update_comm_not_overlap_wait_time(total_time) - - def _picking_hccl_event(self, event: TraceEventBean): - if event.pid != self._hccl_pid or not event.is_x_mode(): - return False - if event.tid in self._hccl_op_tid_list: - self._comm_list.append(event) - else: - self._comm_task_list.append(event) - return True - - def _picking_task_queue_data(self, event: TraceEventBean): - if event.is_enqueue(): - self._enqueue_dict[event.corr_id] = event.start_time - return True - elif event.is_dequeue(): - self._dequeue_data.append(event) - return True - return False - - def _picking_overlap_analysis_data(self, event: TraceEventBean): - if event.pid == self._overlap_pid and event.is_x_mode(): - self._overlap_analysis.append(event) - return True - return False - - def _is_kernel_event(self, event: TraceEventBean): - return event.pid == self._kernel_pid and event.is_x_mode() - - def _is_flow_event(self, event: TraceEventBean): - return event.lower_cat == self.FLOW_CAT - - def _is_torch_op_event(self, event: TraceEventBean): - return event.lower_cat == self.TORCH_OP_CAT - - def _filter_meta_id(self): - for event in self._trace_events: - if not event.is_process_meta(): - continue - if event.is_hccl_process_name(): - self._hccl_pid = event.pid - elif event.is_npu_process_name(): - self._kernel_pid = event.pid - elif event.is_overlap_process_name(): - self._overlap_pid = event.pid - if not self._enable_communication_compare: - return - for event in self._trace_events: - if not event.is_thread_meta(): - continue - if event.pid == self._hccl_pid and event.is_communication_op_thread(): - self._hccl_op_tid_list.append(event.tid) - - def __parse_info_json(self): - try: - json_data = FileReader.read_trace_file(self._info_json_path) - except Exception: - print('[WARNING] Failed to read profiler_info.json.') - return - if not isinstance(json_data, dict) or not json_data: - print('[WARNING] Invalid profiler info.') - return - level = json_data.get('config', {}).get('experimental_config', {}).get('_profiler_level', '') - if self.LEVEL_0 != level: - return - self._result_data.overall_metrics.is_level0 = True - if self.ACTIVE_CPU in json_data.get('config', {}).get('common_config', {}).get('activities', []): - return - self._result_data.overall_metrics.minimal_profiling = True - - def __add_lccl_time(self): - for event in self._all_kernels.values(): - if event.is_lccl(): - self._result_data.overall_metrics.update_lccl_info(event.dur) - - def __parse_kernel_csv(self): - try: - kernel_details = FileReader.read_csv_file(self._kernel_detail_path, KernelDetailsBean) - except Exception: - print('[WARNING] Npu kernel details csv file is not available.') - return - if not kernel_details or kernel_details[0].is_hide_op_pmu(): - self._result_data.overall_metrics.hide_op_details = True - return - for kernel in kernel_details: - if kernel.is_invalid(): - continue - if kernel.is_flash_attention(): - if kernel.is_fa_bwd(): - self._result_data.overall_metrics.update_fa_bwd_info(kernel.duration) - else: - self._result_data.overall_metrics.update_fa_fwd_info(kernel.duration) - elif kernel.is_conv(): - if kernel.is_conv_bwd(): - self._result_data.overall_metrics.update_conv_bwd_info(kernel.duration) - else: - self._result_data.overall_metrics.update_conv_fwd_info(kernel.duration) - elif kernel.is_cube(): - self._result_data.overall_metrics.update_cube_info(kernel.duration) - elif kernel.is_sdma(): - self._result_data.overall_metrics.update_sdma_info(kernel.duration) - elif kernel.is_page_attention(): - self._result_data.overall_metrics.update_pa_info(kernel.duration) - elif kernel.is_vector(): - self._result_data.overall_metrics.update_vec_info(kernel.duration) - else: - self._result_data.overall_metrics.update_cube_info(kernel.duration) - - def __parse_mem_csv(self): - try: - memory_record = FileReader.read_csv_file(self._memory_record_path, MemoryRecordBean) - except FileNotFoundError: - print('[INFO] Npu memory record csv file is not available.') - except Exception: - print('[WARNING] Load memory info failed.') - else: - memory_used = max([memory.total_reserved_mb for memory in memory_record]) / 1024 - self._result_data.overall_metrics.set_memory_used(memory_used) - - def __add_overlap_analysis_time(self): - if not self._overlap_analysis: - print('[ERROR] Failed to get overlap analysis data.') - return - min_ts = sys.float_info.max - max_ts = sys.float_info.min - for event in self._overlap_analysis: - if event.is_computing_event(): - self._result_data.overall_metrics.update_compute_time(event.dur) - min_ts = min(event.start_time, min_ts) - max_ts = max(event.end_time, max_ts) - elif event.is_comm_not_overlap(): - self._result_data.overall_metrics.update_comm_not_overlap(event.dur) - min_ts = min(event.start_time, min_ts) - max_ts = max(event.end_time, max_ts) - self._result_data.overall_metrics.set_e2e_time(float(max_ts - min_ts)) - - def __add_sdma_time(self) -> (float, int): - event_wait_stream, ai_core_stream = set(), set() - sdma_dict = {} - for event in self._all_kernels.values(): - stream_id = event.stream_id - if not stream_id: - continue - if event.is_event_wait(): - event_wait_stream.add(stream_id) - elif event.is_sdma_event(): - sdma_dict.setdefault(stream_id, []).append(event.dur) - elif event.is_compute_event(): - ai_core_stream.add(stream_id) - compute_stream = event_wait_stream & ai_core_stream if event_wait_stream else ai_core_stream - for stream in compute_stream: - dur_list = sdma_dict.get(stream, []) - self._result_data.overall_metrics.update_sdma_info(sum(dur_list), len(dur_list)) diff --git a/profiler/compare_tools/compare_backend/utils/__init__.py b/profiler/compare_tools/compare_backend/utils/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/utils/args_manager.py b/profiler/compare_tools/compare_backend/utils/args_manager.py deleted file mode 100644 index 4b5947fa7b..0000000000 --- a/profiler/compare_tools/compare_backend/utils/args_manager.py +++ /dev/null @@ -1,136 +0,0 @@ -import os.path -import re - -from common_func.path_manager import PathManager -from compare_backend.utils.constant import Constant -from compare_backend.utils.file_reader import FileReader - - -class Singleton(object): - def __init__(self, cls): - self._cls = cls - self._instance = {} - - def __call__(self): - if self._cls not in self._instance: - self._instance[self._cls] = self._cls() - return self._instance[self._cls] - - -@Singleton -class ArgsManager: - - def __init__(self): - self._args = None - self._base_path_dict = {} - self._comparison_path_dict = {} - - @property - def args(self): - return self._args - - @property - def base_profiling_type(self): - return self._base_path_dict.get(Constant.PROFILING_TYPE) - - @property - def comparison_profiling_type(self): - return self._comparison_path_dict.get(Constant.PROFILING_TYPE) - - @property - def base_profiling_path(self): - return self._args.base_profiling_path - - @property - def comparison_profiling_path(self): - return self._args.comparison_profiling_path_dict - - @property - def base_path_dict(self): - return self._base_path_dict - - @property - def comparison_path_dict(self): - return self._comparison_path_dict - - @property - def enable_profiling_compare(self): - return self._args.enable_profiling_compare - - @property - def enable_operator_compare(self): - return self._args.enable_operator_compare - - @property - def enable_memory_compare(self): - return self._args.enable_memory_compare - - @property - def enable_communication_compare(self): - return self._args.enable_communication_compare - - @classmethod - def check_profiling_path(cls, file_path: str): - PathManager.input_path_common_check(file_path) - PathManager.check_path_owner_consistent(file_path) - - @classmethod - def check_output_path(cls, output_path: str): - PathManager.check_input_directory_path(output_path) - PathManager.make_dir_safety(output_path) - PathManager.check_path_writeable(output_path) - - def parse_profiling_path(self, file_path: str): - self.check_profiling_path(file_path) - if os.path.isfile(file_path): - (split_file_path, split_file_name) = os.path.split(file_path) - (shot_name, extension) = os.path.splitext(split_file_name) - if extension != ".json": - msg = f"Invalid profiling path suffix: {file_path}" - raise RuntimeError(msg) - json_type = FileReader.check_json_type(file_path) - return {Constant.PROFILING_TYPE: json_type, Constant.PROFILING_PATH: file_path, - Constant.TRACE_PATH: file_path} - ascend_output = os.path.join(file_path, "ASCEND_PROFILER_OUTPUT") - profiler_output = ascend_output if os.path.isdir(ascend_output) else file_path - json_path = os.path.join(profiler_output, "trace_view.json") - if not os.path.isfile(json_path): - msg = (f"The data is not collected by PyTorch Adaptor mode or the data is not parsed. " - f"Invalid profiling path: {profiler_output}") - raise RuntimeError(msg) - path_dict = {Constant.PROFILING_TYPE: Constant.NPU, Constant.PROFILING_PATH: file_path, - Constant.TRACE_PATH: json_path, Constant.ASCEND_OUTPUT_PATH: profiler_output} - sub_dirs = os.listdir(file_path) - for dir_name in sub_dirs: - if dir_name == "profiler_info.json" or re.match(r"profiler_info_[0-9]+\.json", dir_name): - path_dict.update({Constant.INFO_JSON_PATH: os.path.join(file_path, dir_name)}) - return path_dict - - def init(self, args: any): - self._args = args - if self._args.max_kernel_num is not None and self._args.max_kernel_num <= Constant.LIMIT_KERNEL: - msg = f"Invalid param, --max_kernel_num has to be greater than {Constant.LIMIT_KERNEL}" - raise RuntimeError(msg) - if not isinstance(self._args.op_name_map, dict): - raise RuntimeError( - "Invalid param, --op_name_map must be dict, for example: --op_name_map={'name1':'name2'}") - if self._args.gpu_flow_cat and len(self._args.gpu_flow_cat) > Constant.MAX_FLOW_CAT_LEN: - msg = f"Invalid param, --gpu_flow_cat exceeded the maximum value {Constant.MAX_FLOW_CAT_LEN}" - raise RuntimeError(msg) - - if not any([self._args.enable_profiling_compare, self._args.enable_operator_compare, - self._args.enable_memory_compare, self._args.enable_communication_compare]): - self._args.enable_profiling_compare = True - self._args.enable_operator_compare = True - self._args.enable_memory_compare = True - self._args.enable_communication_compare = True - - base_profiling_path = PathManager.get_realpath(self._args.base_profiling_path) - self.check_profiling_path(base_profiling_path) - self._base_path_dict = self.parse_profiling_path(base_profiling_path) - comparison_profiling_path = PathManager.get_realpath(self._args.comparison_profiling_path) - self.check_profiling_path(comparison_profiling_path) - self._comparison_path_dict = self.parse_profiling_path(comparison_profiling_path) - - if self._args.output_path: - self.check_output_path(PathManager.get_realpath(self._args.output_path)) diff --git a/profiler/compare_tools/compare_backend/utils/common_func.py b/profiler/compare_tools/compare_backend/utils/common_func.py deleted file mode 100644 index 68a1ab584f..0000000000 --- a/profiler/compare_tools/compare_backend/utils/common_func.py +++ /dev/null @@ -1,95 +0,0 @@ -from decimal import Decimal - -import numpy - - -def calculate_diff_ratio(base_value: float, comparison_value: float): - if not base_value and not comparison_value: - ratio = 1.0 - else: - ratio = float('inf') if not base_value else comparison_value / base_value - return [comparison_value - base_value, ratio] - - -def update_order_id(data_list: list): - for index, data in enumerate(data_list): - if data: - data[0] = index + 1 - - -def convert_to_float(data: any) -> float: - try: - float_value = float(data) - except Exception: - print('[ERROR] Invalid profiling data which failed to convert data to float.') - return 0.0 - return float_value - - -def convert_to_decimal(data: any) -> Decimal: - try: - decimal_value = Decimal(data) - except Exception: - print('[ERROR] Invalid profiling data which failed to convert data to decimal.') - return 0.0 - return decimal_value - - -def longest_common_subsequence_matching(base_ops: list, comparison_ops: list, name_func: any) -> list: - if not comparison_ops: - result_data = [None] * len(base_ops) - for index, value in enumerate(base_ops): - result_data[index] = [value, None] - return result_data - - comparison_len, base_len = len(comparison_ops), len(base_ops) - if comparison_len * base_len > 50 * 10 ** 8: - print('[WARNING] The comparison time is expected to exceed 30 minutes, if you want to see the results quickly, ' - 'you can restart comparison task and turn on the switch --disable_details.') - dp_flag = set() # flag for only comparison op - pre_list = [0] * (base_len + 1) - cur_list = [0] * (base_len + 1) - - comparison_index = 1 - iter_comparison_data = iter(comparison_ops) - for comparison_data in iter_comparison_data: - base_index = 1 - iter_base_data = iter(base_ops) - for base_data in iter_base_data: - if name_func(comparison_data) == name_func(base_data): - cur_list[base_index] = pre_list[base_index - 1] + 1 - else: - only_base = cur_list[base_index - 1] - only_comparison = pre_list[base_index] - if only_base < only_comparison: - dp_flag.add(comparison_index * base_len + base_index) - cur_list[base_index] = only_comparison - else: - cur_list[base_index] = only_base - base_index += 1 - pre_list = cur_list - comparison_index += 1 - - matched_op = [] - comparison_index, base_index = comparison_len, base_len - while comparison_index > 0 and base_index > 0: - base_data = base_ops[base_index - 1] - comparison_data = comparison_ops[comparison_index - 1] - if name_func(base_data) == name_func(comparison_data): - matched_op.append([base_data, comparison_data]) - comparison_index -= 1 - base_index -= 1 - elif (comparison_index * base_len + base_index) in dp_flag: - matched_op.append([None, comparison_data]) - comparison_index -= 1 - else: - matched_op.append([base_data, None]) - base_index -= 1 - while comparison_index > 0: - matched_op.append([None, comparison_ops[comparison_index - 1]]) - comparison_index -= 1 - while base_index > 0: - matched_op.append([base_ops[base_index - 1], None]) - base_index -= 1 - matched_op.reverse() - return matched_op diff --git a/profiler/compare_tools/compare_backend/utils/compare_args.py b/profiler/compare_tools/compare_backend/utils/compare_args.py deleted file mode 100644 index ab9bc364f4..0000000000 --- a/profiler/compare_tools/compare_backend/utils/compare_args.py +++ /dev/null @@ -1,24 +0,0 @@ -class Args: - def __init__(self, - base_profiling_path: str = "", - comparison_profiling_path: str = "", - enable_profiling_compare: bool = False, - enable_operator_compare: bool = False, - enable_memory_compare: bool = False, - enable_communication_compare: bool = False, - output_path: str = "", - max_kernel_num: int = None, - op_name_map: dict = {}, - use_input_shape: bool = False, - gpu_flow_cat: str = ""): - self.base_profiling_path = base_profiling_path - self.comparison_profiling_path = comparison_profiling_path - self.enable_profiling_compare = enable_profiling_compare - self.enable_operator_compare = enable_operator_compare - self.enable_memory_compare = enable_memory_compare - self.enable_communication_compare = enable_communication_compare - self.output_path = output_path - self.max_kernel_num = max_kernel_num - self.op_name_map = op_name_map - self.use_input_shape = use_input_shape - self.gpu_flow_cat = gpu_flow_cat diff --git a/profiler/compare_tools/compare_backend/utils/constant.py b/profiler/compare_tools/compare_backend/utils/constant.py deleted file mode 100644 index 1b77b214c8..0000000000 --- a/profiler/compare_tools/compare_backend/utils/constant.py +++ /dev/null @@ -1,80 +0,0 @@ -class Constant(object): - GPU = "GPU" - NPU = "NPU" - NA = 'N/A' - LIMIT_KERNEL = 3 - MAX_PATH_LENGTH = 4096 - MAX_FLOW_CAT_LEN = 20 - MAX_FILE_SIZE = 1024 * 1024 * 1024 * 5 - BYTE_TO_KB = 1024 - YELLOW_COLOR = "FFFF00" - GREEN_COLOR = "00FF00" - RED_COLOR = "FF0000" - BLUE_COLOR = "00BFFF" - US_TO_MS = 1000 - KB_TO_MB = 1024 - INVALID_VALUE = -1 - - # epsilon - EPS = 1e-15 - - # autority - FILE_AUTHORITY = 0o640 - DIR_AUTHORITY = 0o750 - - PROFILING_TYPE = "profiling type" - - # path - PROFILING_PATH = "profiling_path" - TRACE_PATH = "trace_path" - MEMORY_DATA_PATH = "memory_data_path" - ASCEND_OUTPUT_PATH = "ascend_output" - INFO_JSON_PATH = "info_path" - - # excel headers - BASE_PROFILING = 'Base Profiling: ' - COMPARISON_PROFILING = 'Comparison Profiling: ' - - # compare type - OPERATOR_COMPARE = "OperatorCompare" - MEMORY_COMPARE = "MemoryCompare" - - # sheet name - OPERATOR_SHEET = "OperatorCompare" - MEMORY_SHEET = "MemoryCompare" - OPERATOR_TOP_SHEET = "OperatorCompareStatistic" - MEMORY_TOP_SHEET = "MemoryCompareStatistic" - COMMUNICATION_SHEET = "CommunicationCompare" - - # table name - OPERATOR_TABLE = "OperatorCompare" - MEMORY_TABLE = "MemoryCompare" - OPERATOR_TOP_TABLE = "OperatorCompareStatistic" - MEMORY_TOP_TABLE = "MemoryCompareStatistic" - COMMUNICATION_TABLE = "CommunicationCompare" - PERFORMANCE_TABLE = "Model Profiling Time Distribution" - MODULE_TABLE = "ModuleCompare" - MODULE_TOP_TABLE = "ModuleCompareStatistic" - - # memory - SIZE = "Size(KB)" - TS = "ts" - ALLOCATION_TIME = "Allocation Time(us)" - RELEASE_TIME = "Release Time(us)" - NAME = "Name" - - OP_KEY = "op_name" - DEVICE_DUR = "dur" - - BASE_DATA = "base_data" - COMPARISON_DATA = "comparison_data" - OVERALL_METRICS = "overall_metrics" - TORCH_OP = "torch_op" - KERNEL_DICT = "kernel_dict" - MEMORY_LIST = "memory_list" - COMMUNICATION_DICT = "comm_dict" - - #compare type - OVERALL_COMPARE = "overall" - - BWD_LIST = ["bwd", "backward", "back"] diff --git a/profiler/compare_tools/compare_backend/utils/excel_config.py b/profiler/compare_tools/compare_backend/utils/excel_config.py deleted file mode 100644 index 306abcdfec..0000000000 --- a/profiler/compare_tools/compare_backend/utils/excel_config.py +++ /dev/null @@ -1,185 +0,0 @@ -from compare_backend.utils.constant import Constant - - -class CellFormatType: - DEFAULT = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'num_format': '#,##0'} # 数字显示整数,无背景色 - DEFAULT_FLOAT = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'num_format': '#,##0.00'} # 保留2位小数,无背景色 - DEFAULT_RATIO = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', - 'border': True, 'num_format': '0.00%'} # 百分比显示,保留2位小数,无背景色 - RED_RATIO = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', - 'border': True, 'num_format': '0.00%', "fg_color": Constant.RED_COLOR} # 百分比显示,保留2位小数,单元格背景色为红色 - BOLD_STR = {"font_name": "Arial", 'font_size': 11, 'align': 'left', 'valign': 'vcenter', 'border': True, - 'bold': True} # 字符串,无背景色,字体加粗 - BLUE_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.BLUE_COLOR, 'align': 'left', - 'valign': 'vcenter', 'bold': True, 'border': True} # 蓝色背景,加粗 - GREEN_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.GREEN_COLOR, 'align': 'left', - 'valign': 'vcenter', 'bold': True, 'border': True} # 绿色背景,加粗 - YELLOW_BOLD = {"font_name": "Arial", 'font_size': 11, 'fg_color': Constant.YELLOW_COLOR, 'align': 'left', - 'valign': 'vcenter', 'bold': True, 'border': True} # 黄色背景,加粗 - - -class ExcelConfig(object): - ORDER = "Order Id" - OPERATOR_NAME = "Operator Name" - INPUT_SHAPE = "Input Shape" - INPUT_TYPE = "Input Type" - KERNEL_DETAILS = "Kernel Details" - MEMORY_DETAILS = "Allocated Details" - DEVICE_DURATION = "Device Duration(us)" - DIFF_RATIO = "Diff Ratio" - DIFF_DUR = "Diff Duration(us)" - DIFF_SIZE = "Diff Size(KB)" - SIZE = "Size(KB)" - TOP = "Top" - BASE_DEVICE_DURATION = "Base Device Duration(ms)" - COMPARISON_DEVICE_DURATION = "Comparison Device Duration(ms)" - BASE_OPERATOR_NUMBER = "Base Operator Number" - COMPARISON_OPERATOR_NUMBER = "Comparison Operator Number" - DIFF_TIME = "Diff Duration(ms)" - BASE_ALLOCATED_TIMES = "Base Allocated Duration(ms)" - COMPARISON_ALLOCATED_TIMES = "Comparison Allocated Duration(ms)" - BASE_ALLOCATED_MEMORY = "Base Allocated Memory(MB)" - COMPARISON_ALLOCATED_MEMORY = "Comparison Allocated Memory(MB)" - DIFF_MEMORY = "Diff Memory(MB)" - COMM_OP_NAME = "Communication OP Name" - TASK_NAME = "Task Name" - CALLS = "Calls" - TOTAL_DURATION = "Total Duration(us)" - AVG_DURATION = "Avg Duration(us)" - MAX_DURATION = "Max Duration(us)" - MIN_DURATION = "Min Duration(us)" - MODULE_CLASS = "Module Class" - MODULE_NAME = "Module Name" - DEVICE_SELF_TIME = "Device Self Time(ms)" - DEVICE_TOTAL_TIME = "Device Total Time(ms)" - DIFF_SELF_TIME = "Device Self Time Diff(ms)" - DIFF_TOTAL_RATIO = "Total Diff Ratio" - DIFF_TOTAL_TIME = "Device Total Time Diff(ms)" - DEVICE_SELF_TIME_US = "Device Self Time(us)" - DEVICE_TOTAL_TIME_US = "Device Total Time(us)" - DIFF_SELF_TIME_US = "Device Self Time Diff(us)" - DIFF_TOTAL_TIME_US = "Device Total Time Diff(us)" - NUMBER = "Number" - MODULE_LEVEL = "Module Level" - BASE_CALL_STACK = "Base Call Stack" - COMPARISON_CALL_STACK = "Comparison Call Stack" - - HEADERS = { - Constant.OPERATOR_TABLE: [ - {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, - {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, - {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_DUR, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} - ], - Constant.MEMORY_TABLE: [ - {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, - {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": MEMORY_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, - {"name": INPUT_SHAPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": INPUT_TYPE, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": MEMORY_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_SIZE, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} - ], - Constant.OPERATOR_TOP_TABLE: [ - {"name": TOP, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, - {"name": BASE_DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 25}, - {"name": BASE_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, - {"name": COMPARISON_DEVICE_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 30}, - {"name": COMPARISON_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 30}, - {"name": DIFF_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} - ], - Constant.MEMORY_TOP_TABLE: [ - {"name": TOP, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": OPERATOR_NAME, "type": CellFormatType.BOLD_STR, "width": 30}, - {"name": BASE_ALLOCATED_TIMES, "type": CellFormatType.DEFAULT_FLOAT, "width": 25}, - {"name": BASE_ALLOCATED_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 30}, - {"name": BASE_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, - {"name": COMPARISON_ALLOCATED_TIMES, "type": CellFormatType.DEFAULT_FLOAT, "width": 27}, - {"name": COMPARISON_ALLOCATED_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 33}, - {"name": COMPARISON_OPERATOR_NUMBER, "type": CellFormatType.DEFAULT, "width": 25}, - {"name": DIFF_MEMORY, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} - ], - Constant.COMMUNICATION_TABLE: [ - {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": COMM_OP_NAME, "type": CellFormatType.BOLD_STR, "width": 25}, - {"name": TASK_NAME, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": CALLS, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": COMM_OP_NAME, "type": CellFormatType.BOLD_STR, "width": 25}, - {"name": TASK_NAME, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": CALLS, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": TOTAL_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": AVG_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": MAX_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": MIN_DURATION, "type": CellFormatType.DEFAULT_FLOAT, "width": 17}, - {"name": DIFF_DUR, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 20} - ], - Constant.MODULE_TOP_TABLE: [ - {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": MODULE_CLASS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": MODULE_LEVEL, "type": CellFormatType.DEFAULT, "width": 15}, - {"name": MODULE_NAME, "type": CellFormatType.DEFAULT, "width": 35}, - {"name": OPERATOR_NAME, "type": CellFormatType.DEFAULT, "width": 25}, - {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": DEVICE_SELF_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": DEVICE_TOTAL_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": DEVICE_SELF_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": NUMBER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": DEVICE_TOTAL_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_TOTAL_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_SELF_TIME, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 15}, - {"name": BASE_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30}, - {"name": COMPARISON_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30} - ], - Constant.MODULE_TABLE: [ - {"name": ORDER, "type": CellFormatType.DEFAULT, "width": 10}, - {"name": MODULE_CLASS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": MODULE_LEVEL, "type": CellFormatType.DEFAULT, "width": 15}, - {"name": MODULE_NAME, "type": CellFormatType.DEFAULT, "width": 35}, - {"name": OPERATOR_NAME, "type": CellFormatType.DEFAULT, "width": 25}, - {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": DEVICE_SELF_TIME_US, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DEVICE_TOTAL_TIME_US, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": OPERATOR_NAME, "type": CellFormatType.DEFAULT, "width": 25}, - {"name": KERNEL_DETAILS, "type": CellFormatType.DEFAULT, "width": 20}, - {"name": DEVICE_SELF_TIME_US, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DEVICE_TOTAL_TIME_US, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_TOTAL_TIME_US, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_SELF_TIME_US, "type": CellFormatType.DEFAULT_FLOAT, "width": 20}, - {"name": DIFF_TOTAL_RATIO, "type": CellFormatType.DEFAULT_RATIO, "width": 15}, - {"name": BASE_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30}, - {"name": COMPARISON_CALL_STACK, "type": CellFormatType.DEFAULT, "width": 30} - ] - } - - OVERHEAD = {Constant.OPERATOR_TABLE: ["B1:F1", "G1:K1"], Constant.MEMORY_TABLE: ["B1:F1", "G1:K1"], - Constant.COMMUNICATION_TABLE: ["B1:H1", "I1:O1"], Constant.OPERATOR_TOP_TABLE: ["C1:D1", "E1:F1"], - Constant.MEMORY_TOP_TABLE: ["C1:E1", "F1:H1"], Constant.MODULE_TOP_TABLE: ["F1:I1", "J1:M1"], - Constant.MODULE_TABLE: ["E1:H1", "I1:L1"]} diff --git a/profiler/compare_tools/compare_backend/utils/file_reader.py b/profiler/compare_tools/compare_backend/utils/file_reader.py deleted file mode 100644 index b4ae786388..0000000000 --- a/profiler/compare_tools/compare_backend/utils/file_reader.py +++ /dev/null @@ -1,64 +0,0 @@ -import csv -import json -import os - -from common_func.path_manager import PathManager -from compare_backend.utils.constant import Constant - - -class FileReader: - - @classmethod - def read_trace_file(cls, file_path: str) -> any: - PathManager.check_path_readable(file_path) - if not os.path.isfile(file_path): - raise FileNotFoundError("File not exists.") - file_size = os.path.getsize(file_path) - if file_size <= 0: - return [] - if file_size > Constant.MAX_FILE_SIZE: - check_msg = input( - f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") - if check_msg.lower() != "y": - print(f"[WARNING] The user choose not to read the file: {file_path}") - return [] - try: - with open(file_path, "rt") as file: - json_data = json.loads(file.read()) - except Exception as e: - msg = f"Can't read file: {file_path}" - raise RuntimeError(msg) from e - return json_data - - @classmethod - def read_csv_file(cls, file_path: str, bean_class: any = None) -> any: - PathManager.check_path_readable(file_path) - if not os.path.isfile(file_path): - raise FileNotFoundError("File not exists.") - file_size = os.path.getsize(file_path) - if file_size <= 0: - return [] - if file_size > Constant.MAX_FILE_SIZE: - check_msg = input( - f"The file({file_path}) size exceeds the preset max value. Continue reading the file? [y/n]") - if check_msg.lower() != "y": - print(f"[WARNING] The user choose not to read the file: {file_path}") - return [] - result_data = [] - try: - with open(file_path, newline="") as csv_file: - reader = csv.DictReader(csv_file) - for row in reader: - row_data = bean_class(row) if bean_class else row - result_data.append(row_data) - except Exception as e: - msg = f"Failed to read the file: {file_path}" - raise RuntimeError(msg) from e - return result_data - - @classmethod - def check_json_type(cls, file_path: str) -> str: - json_data = cls.read_trace_file(file_path) - if isinstance(json_data, dict): - return Constant.GPU - return Constant.NPU diff --git a/profiler/compare_tools/compare_backend/utils/module_node.py b/profiler/compare_tools/compare_backend/utils/module_node.py deleted file mode 100644 index f85606094e..0000000000 --- a/profiler/compare_tools/compare_backend/utils/module_node.py +++ /dev/null @@ -1,171 +0,0 @@ -import re -from math import ceil - -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.utils.torch_op_node import TorchOpNode - - -class ModuleNode: - ts = "ts" - kernels = "kernels" - - def __init__(self, event: TraceEventBean, parent_node=None): - self._event = event - self._parent_node = parent_node - self._child_nodes = [] - self._module_name = f"{parent_node.module_name}/{event.name}" if parent_node else event.name - self._module_level = parent_node.module_level + 1 if parent_node else 1 - self._kernel_self_list = [] - self._kernel_total_list = [] - self._call_stack = f"{parent_node.call_stack};\n{event.name}" if parent_node and parent_node.call_stack \ - else event.name - self._root_torch_op_node = TorchOpNode() - self._cur_torch_op_node = self._root_torch_op_node - - @property - def module_name(self): - return self._module_name - - @property - def module_class(self): - pattern = re.compile('_[0-9]+$') - return pattern.sub('', self.name.split("/")[-1]) - - @property - def module_level(self): - return self._module_level - - @property - def name(self): - return self._event.name - - @property - def parent_node(self): - return self._parent_node - - @property - def child_nodes(self): - return self._child_nodes - - @property - def dur(self): - return self._event.dur - - @property - def start_time(self): - return self._event.start_time - - @property - def end_time(self): - return self._event.end_time - - @property - def host_self_dur(self): - return self.dur - sum([node.dur for node in self.child_nodes]) - - @property - def device_self_dur(self): - dur = 0 - for kernel_dict in self._kernel_self_list: - kernel_list = kernel_dict.get(self.kernels, []) - dur += sum([kernel.device_dur for kernel in kernel_list]) - return dur - - @property - def device_total_dur(self): - dur = 0 - for kernel_dict in self._kernel_total_list: - kernel_list = kernel_dict.get(self.kernels, []) - dur += sum([kernel.device_dur for kernel in kernel_list]) - return dur - - @property - def kernel_details(self): - kernel_details = "" - for kernel_dict in self._kernel_self_list: - kernel_list = kernel_dict.get(self.kernels, []) - for kernel in kernel_list: - kernel_details += kernel.kernel_details - return kernel_details - - @property - def toy_layer_api_list(self): - return self._root_torch_op_node.child_nodes - - @property - def call_stack(self): - return self._call_stack - - @staticmethod - def _binary_search(ts_time, parent_node): - if not parent_node.child_nodes: - return None - right = len(parent_node.child_nodes) - 1 - left = 0 - while right > left: - mid = left + ceil((right - left) / 2) - if ts_time >= parent_node.child_nodes[mid].start_time: - left = mid - else: - right = mid - 1 - if parent_node.child_nodes[left].start_time < ts_time < parent_node.child_nodes[left].end_time: - return parent_node.child_nodes[left] - return None - - def reset_call_stack(self, call_stack): - self._call_stack = call_stack - - def update_child_nodes(self, node): - self._child_nodes.append(node) - - def update_kernel_list(self, ts, kernel_list: list): - self._update_kernel_self_list(ts, kernel_list) - node = self - while node.parent_node: - node._update_kernel_total_list(ts, kernel_list) - node = node.parent_node - - def _update_kernel_self_list(self, ts, kernel_list: list): - self._kernel_self_list.append({self.ts: ts, self.kernels: kernel_list}) - - def _update_kernel_total_list(self, ts, kernel_list: list): - self._kernel_total_list.append({self.ts: ts, self.kernels: kernel_list}) - - def find_module_call(self, ts_time): - call_module = self._binary_search(ts_time, self) - while call_module: - module = self._binary_search(ts_time, call_module) - if not module: - return call_module - call_module = module - return call_module - - def find_torch_op_call(self, event): - while self._cur_torch_op_node: - if self._cur_torch_op_node != self._root_torch_op_node and \ - event.start_time > self._cur_torch_op_node.end_time: - self._cur_torch_op_node = self._cur_torch_op_node.parent - continue - tree_node = TorchOpNode(event, self._cur_torch_op_node) - self._cur_torch_op_node.add_child_node(tree_node) - self._cur_torch_op_node = tree_node - break - - def update_torch_op_kernel_list(self): - top_node_list = self._root_torch_op_node.child_nodes - if not top_node_list: - return - top_node_list.sort(key=lambda x: x.start_time) - cur_index = 0 - self._kernel_self_list.sort(key=lambda x: x.get(self.ts, 0)) - for kernel_dict in self._kernel_self_list: - ts = kernel_dict.get(self.ts, 0) - kernel_list = kernel_dict.get(self.kernels, []) - while cur_index < len(top_node_list): - if ts > top_node_list[cur_index].end_time: - cur_index += 1 - continue - if ts < top_node_list[cur_index].start_time: - break - top_node_list[cur_index].update_kernel_list(kernel_list) - break diff --git a/profiler/compare_tools/compare_backend/utils/name_function.py b/profiler/compare_tools/compare_backend/utils/name_function.py deleted file mode 100644 index cd79e8a03f..0000000000 --- a/profiler/compare_tools/compare_backend/utils/name_function.py +++ /dev/null @@ -1,52 +0,0 @@ -from compare_backend.utils.module_node import ModuleNode -from compare_backend.utils.torch_op_node import TorchOpNode - - -class NameFunction: - def __init__(self, args: any): - self.args = args - - @classmethod - def get_name(cls, op_node: TorchOpNode) -> str: - return op_node.name - - @classmethod - def get_full_name(cls, op_node: TorchOpNode) -> str: - if isinstance(op_node.origin_input_shape, list): - data = [] - for dim in op_node.origin_input_shape: - data.append(','.join([str(x) for x in dim])) - input_shape = ';\r\n'.join(data) - return f'{op_node.name}{input_shape}' - return f'{op_node.name}{op_node.input_shape}' - - def get_name_func(self): - if not self.args.op_name_map and not self.args.use_input_shape: - name_func = self.get_name - elif self.args.op_name_map and not self.args.use_input_shape: - name_func = self.get_map_name - elif self.args.op_name_map and not self.args.use_input_shape: - name_func = self.get_full_name - else: - name_func = self.get_full_map_name - return name_func - - def get_map_name(self, op_node: TorchOpNode) -> str: - return self.args.op_name_map.get(op_node.name, op_node.name) - - def get_full_map_name(self, op_node: TorchOpNode) -> str: - if isinstance(op_node.origin_input_shape, list): - data = [] - for dim in op_node.origin_input_shape: - data.append(','.join([str(x) for x in dim])) - input_shape = ';\r\n'.join(data) - return f'{self.args.op_name_map.get(op_node.name, op_node.name)}{input_shape}' - return f'{self.args.op_name_map.get(op_node.name, op_node.name)}{op_node.input_shape}' - - def get_module_name(self, module: ModuleNode) -> str: - if not self.args.op_name_map: - return module.module_name - module = module.module_name - for old_name, new_name in self.args.op_name_map.items(): - module.replace(old_name, new_name) - return module diff --git a/profiler/compare_tools/compare_backend/utils/torch_op_node.py b/profiler/compare_tools/compare_backend/utils/torch_op_node.py deleted file mode 100644 index 690c46cd51..0000000000 --- a/profiler/compare_tools/compare_backend/utils/torch_op_node.py +++ /dev/null @@ -1,92 +0,0 @@ -from compare_backend.compare_bean.origin_data_bean.compare_event import MemoryEvent -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.utils.constant import Constant - - -class TorchOpNode: - def __init__(self, event=TraceEventBean, parent_node=None): - self._event = event - self._parent_node = parent_node - self._child_nodes = [] - self._kernel_list = [] - self._kernel_num = 0 - self._memory_allocated_list = [] - - @property - def start_time(self): - return self._event.start_time - - @property - def end_time(self): - return self._event.end_time - - @property - def name(self): - return self._event.name - - @property - def input_shape(self): - return str(self._event.args.get("Input Dims", Constant.NA)) - - @property - def origin_input_shape(self): - return self._event.args.get("Input Dims", Constant.NA) - - @property - def input_type(self): - return str(self._event.args.get("Input type", Constant.NA)) - - @property - def call_stack(self): - return str(self._event.args.get("Call stack", Constant.NA)) - - @property - def parent(self): - return self._parent_node - - @property - def child_nodes(self): - return self._child_nodes - - @property - def kernel_list(self): - return self._kernel_list - - @property - def kernel_num(self): - return self._kernel_num - - @property - def memory_allocated(self): - return self._memory_allocated_list - - @property - def device_dur(self): - return sum([kernel.device_dur for kernel in self._kernel_list]) - - def add_child_node(self, child_node): - self._child_nodes.append(child_node) - - def set_kernel_list(self, kernel_list: list): - if not kernel_list: - return - self._kernel_list.extend(kernel_list) - kernel_num = len(kernel_list) - cur_node = self - while cur_node._parent_node: - cur_node._kernel_num += kernel_num - cur_node = cur_node._parent_node - - def update_kernel_list(self, kernel_list: list): - if not kernel_list: - return - self._kernel_list.extend(kernel_list) - - def set_memory_allocated(self, memory_allocated: MemoryEvent): - self._memory_allocated_list.append(memory_allocated) - - def is_step_profiler(self) -> bool: - return self._event.is_step_profiler() - - def get_op_info(self) -> list: - return [self.name, self.input_shape, self.input_type, self.call_stack] diff --git a/profiler/compare_tools/compare_backend/utils/tree_builder.py b/profiler/compare_tools/compare_backend/utils/tree_builder.py deleted file mode 100644 index 34c1fe1a1f..0000000000 --- a/profiler/compare_tools/compare_backend/utils/tree_builder.py +++ /dev/null @@ -1,82 +0,0 @@ -from queue import Queue - -from compare_backend.compare_bean.origin_data_bean.trace_event_bean import TraceEventBean -from compare_backend.utils.module_node import ModuleNode -from compare_backend.utils.torch_op_node import TorchOpNode - - -class TreeBuilder: - @classmethod - def build_tree(cls, event_list: list, kernel_dict: dict, memory_list: list) -> TorchOpNode: - root_node = TorchOpNode() - all_event_list = [] - all_event_list.extend(event_list) - all_event_list.extend(memory_list) - all_event_list.sort(key=lambda x: x.start_time) - last_node = root_node - for event in all_event_list: - while last_node: - if last_node != root_node and event.start_time > last_node.end_time: - last_node = last_node.parent - continue - if event.is_torch_op: - tree_node = TorchOpNode(event, last_node) - last_node.add_child_node(tree_node) - last_node = tree_node - tree_node.set_kernel_list(kernel_dict.get(event.start_time, [])) - else: - event.set_name(last_node.name) - last_node.set_memory_allocated(event) - break - return root_node - - @classmethod - def get_total_kernels(cls, root_node: TorchOpNode) -> list: - result_list = [] - result_list.extend(root_node.kernel_list) - node_queue = Queue() - for child_node in root_node.child_nodes: - node_queue.put(child_node) - while not node_queue.empty(): - tree_node = node_queue.get() - result_list.extend(tree_node.kernel_list) - for child_node in tree_node.child_nodes: - node_queue.put(child_node) - return result_list - - @classmethod - def get_total_memory(cls, root_node: TorchOpNode) -> list: - result_list = [] - result_list.extend(root_node.memory_allocated) - node_queue = Queue() - for child_node in root_node.child_nodes: - node_queue.put(child_node) - while not node_queue.empty(): - tree_node = node_queue.get() - result_list.extend(tree_node.memory_allocated) - for child_node in tree_node.child_nodes: - node_queue.put(child_node) - return result_list - - @classmethod - def build_module_tree(cls, event_list: list, kernel_dict: dict): - root_node = ModuleNode(TraceEventBean({})) - event_list.sort(key=lambda x: x.start_time) - last_node = root_node - for event in event_list: - while last_node: - if last_node != root_node and event.start_time > last_node.end_time: - last_node = last_node.parent_node - continue - if event.is_x_mode(): - tree_node = ModuleNode(event, last_node) - last_node.update_child_nodes(tree_node) - last_node = tree_node - break - if last_node == root_node: - break - kernel_list = kernel_dict.get(event.start_time, []) - if kernel_list: - last_node.update_kernel_list(event.start_time, kernel_list) - break - return root_node diff --git a/profiler/compare_tools/compare_backend/view/__init__.py b/profiler/compare_tools/compare_backend/view/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_backend/view/base_view.py b/profiler/compare_tools/compare_backend/view/base_view.py deleted file mode 100644 index d18980b7de..0000000000 --- a/profiler/compare_tools/compare_backend/view/base_view.py +++ /dev/null @@ -1,10 +0,0 @@ -from abc import ABC, abstractmethod - - -class BaseView(ABC): - def __init__(self, data_dict: dict): - self._data_dict = data_dict - - @abstractmethod - def generate_view(self): - raise NotImplementedError("Function generate_view need to be implemented.") diff --git a/profiler/compare_tools/compare_backend/view/excel_view.py b/profiler/compare_tools/compare_backend/view/excel_view.py deleted file mode 100644 index 73b82b1cd3..0000000000 --- a/profiler/compare_tools/compare_backend/view/excel_view.py +++ /dev/null @@ -1,22 +0,0 @@ -import os - -from xlsxwriter import Workbook - -from compare_backend.view.base_view import BaseView -from compare_backend.view.work_sheet_creator import WorkSheetCreator -from compare_backend.utils.constant import Constant - - -class ExcelView(BaseView): - - def __init__(self, data_dict: dict, file_path: str, args: any): - super().__init__(data_dict) - self._file_path = file_path - self._args = args - - def generate_view(self): - workbook = Workbook(self._file_path) - for sheet_name, data in self._data_dict.items(): - WorkSheetCreator(workbook, sheet_name, data, self._args).create_sheet() - workbook.close() - os.chmod(self._file_path, Constant.FILE_AUTHORITY) diff --git a/profiler/compare_tools/compare_backend/view/screen_view.py b/profiler/compare_tools/compare_backend/view/screen_view.py deleted file mode 100644 index 150b36c6fe..0000000000 --- a/profiler/compare_tools/compare_backend/view/screen_view.py +++ /dev/null @@ -1,19 +0,0 @@ -from prettytable import PrettyTable - -from compare_backend.view.base_view import BaseView - - -class ScreenView(BaseView): - def __init__(self, data_dict: dict): - super().__init__(data_dict) - - def generate_view(self): - for sheet_name, data in self._data_dict.items(): - if not data.get("rows", []): - return - table = PrettyTable() - table.title = sheet_name - table.field_names = data.get("headers", []) - for row in data.get("rows", []): - table.add_row(row) - print(table) diff --git a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py b/profiler/compare_tools/compare_backend/view/work_sheet_creator.py deleted file mode 100644 index 7a33168da3..0000000000 --- a/profiler/compare_tools/compare_backend/view/work_sheet_creator.py +++ /dev/null @@ -1,60 +0,0 @@ -from xlsxwriter import Workbook - -from compare_backend.utils.excel_config import ExcelConfig, CellFormatType - - -class WorkSheetCreator: - def __init__(self, work_book: Workbook, sheet_name: str, data: dict, args: any): - self._work_book = work_book - self._sheet_name = sheet_name - self._data = data - self._args = args - self._work_sheet = None - self._row_id = 1 - self._field_format = {} - self._diff_ratio_index = None - self._col_ids = "ABCDEFGHIJKLMNOPQRSTUVW" - - def create_sheet(self): - if not self._data.get("rows", []): - return - self._work_sheet = self._work_book.add_worksheet(self._sheet_name) - self._write_headers() - self._write_data() - - def _write_headers(self): - base_header_format = self._work_book.add_format(CellFormatType.GREEN_BOLD) - com_header_format = self._work_book.add_format(CellFormatType.YELLOW_BOLD) - com_index_range = [-1, -1] - overhead = self._data.get("overhead", []) - if overhead: - base_path = f"Base Profiling: {self._args.base_profiling_path}" - self._work_sheet.merge_range(overhead[0], base_path, base_header_format) - com_index_range = [self._col_ids.index(overhead[1].split(":")[0][0]), - self._col_ids.index(overhead[1].split(":")[1][0])] - comparison_path = f"Comparison Profiling: {self._args.comparison_profiling_path}" - self._work_sheet.merge_range(overhead[1], comparison_path, com_header_format) - self._row_id += 2 - for index, header in enumerate(self._data.get("headers")): - if index in range(com_index_range[0], com_index_range[1] + 1): - header_format = com_header_format - else: - header_format = base_header_format - col_id = self._col_ids[index] - self._work_sheet.set_column(f"{col_id}:{col_id}", header.get("width")) - self._work_sheet.write(f"{col_id}{self._row_id}", header.get("name"), header_format) - self._field_format[index] = self._work_book.add_format(header.get("type")) - if header.get("name") in (ExcelConfig.DIFF_RATIO, ExcelConfig.DIFF_TOTAL_RATIO): - self._diff_ratio_index = index - self._row_id += 1 - - def _write_data(self): - red_ratio_format = self._work_book.add_format(CellFormatType.RED_RATIO) - for data in self._data.get("rows"): - for index, cell_data in enumerate(data): - cell_format = self._field_format.get(index) - if index == self._diff_ratio_index and cell_data and cell_data > 1: - cell_format = red_ratio_format - cell_data = "INF" if cell_data == float('inf') else cell_data - self._work_sheet.write(f"{self._col_ids[index]}{self._row_id}", cell_data, cell_format) - self._row_id += 1 diff --git a/profiler/compare_tools/compare_interface/__init__.py b/profiler/compare_tools/compare_interface/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/profiler/compare_tools/compare_interface/comparison_interface.py b/profiler/compare_tools/compare_interface/comparison_interface.py deleted file mode 100644 index 919095b310..0000000000 --- a/profiler/compare_tools/compare_interface/comparison_interface.py +++ /dev/null @@ -1,31 +0,0 @@ -import sys -import os - -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "cluster_analyse")) -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from compare_backend.comparison_generator import ComparisonGenerator -from compare_backend.disaggregate.overall_perf_interface import OverallPerfInterface -from compare_backend.utils.compare_args import Args -from compare_backend.utils.constant import Constant - - -class ComparisonInterface: - def __init__(self, base_profiling_path: str, comparison_profiling_path: str = ""): - self.base_profiling_path = base_profiling_path - if comparison_profiling_path: - self._args = Args(base_profiling_path=base_profiling_path, - comparison_profiling_path=comparison_profiling_path) - - def compare(self, compare_type: str) -> dict: - if compare_type == Constant.OVERALL_COMPARE: - self._args.enable_profiling_compare = True - - return ComparisonGenerator(self._args).run_interface(compare_type) - - def disaggregate_perf(self, compare_type: str) -> dict: - if compare_type != Constant.OVERALL_COMPARE: - print('[ERROR] Invalid compare_type value: {compare_type} which not supported.') - return {} - return OverallPerfInterface(self.base_profiling_path).run() diff --git a/profiler/compare_tools/performance_compare.py b/profiler/compare_tools/performance_compare.py deleted file mode 100644 index 8de0a72cbd..0000000000 --- a/profiler/compare_tools/performance_compare.py +++ /dev/null @@ -1,37 +0,0 @@ -import argparse -import ast -import datetime -import os.path -import sys - -sys.path.append( - os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "cluster_analyse")) - -from compare_backend.comparison_generator import ComparisonGenerator - - -def main(): - parser = argparse.ArgumentParser(description="Compare trace of GPU and NPU") - parser.add_argument("base_profiling_path", type=str, default='', help="基准性能数据的文件路径") - parser.add_argument("comparison_profiling_path", type=str, default='', help="比较性能数据的文件路径") - parser.add_argument("--enable_profiling_compare", default=False, action='store_true', help="开启总体性能比较") - parser.add_argument("--enable_operator_compare", default=False, action='store_true', help="开启算子性能比较") - parser.add_argument("--enable_memory_compare", default=False, action='store_true', help="开启算子内存比较") - parser.add_argument("--enable_communication_compare", default=False, action='store_true', help="开启通信性能比较") - parser.add_argument("--disable_details", default=False, action='store_true', help="不展示比对明细") - parser.add_argument("--output_path", type=str, default='', help="性能数据比对结果的存放路径") - parser.add_argument("--max_kernel_num", type=int, help="每个torch op的kernel数量限制") - parser.add_argument("--op_name_map", type=ast.literal_eval, default={}, - help="配置GPU与NPU等价的算子名称映射关系,以字典的形式传入") - parser.add_argument("--use_input_shape", default=False, action='store_true', help="开启算子的精准匹配") - parser.add_argument("--gpu_flow_cat", type=str, default='', help="gpu flow event的分类标识") - args = parser.parse_args() - - ComparisonGenerator(args).run() - - -if __name__ == "__main__": - start_time = datetime.datetime.now() - main() - end_time = datetime.datetime.now() - print(f'[INFO] The comparison task has been completed in a total time of {end_time - start_time}') -- Gitee