From 98c3523ea97b7ddee2337a8f2e39b2002d92f8d7 Mon Sep 17 00:00:00 2001 From: yu-liang-bin Date: Mon, 30 Jun 2025 11:08:32 +0800 Subject: [PATCH] update profiler docs --- docs/sample_code/profiler/dynamic_profiler.py | 23 +++++++---- tutorials/source_en/debug/profiler.md | 41 +++++++++++-------- tutorials/source_zh_cn/debug/profiler.md | 41 +++++++++++-------- 3 files changed, 66 insertions(+), 39 deletions(-) diff --git a/docs/sample_code/profiler/dynamic_profiler.py b/docs/sample_code/profiler/dynamic_profiler.py index d86e14679b..4dee054828 100644 --- a/docs/sample_code/profiler/dynamic_profiler.py +++ b/docs/sample_code/profiler/dynamic_profiler.py @@ -52,16 +52,25 @@ if __name__ == '__main__': data_cfg = { "start_step": 2, "stop_step": 5, - "aic_metrics": -1, - "profiler_level": 0, - "activities": 0, - "export_type": 0, + "aic_metrics": "AiCoreNone", + "profiler_level": "Level0", + "analyse_mode": 0, + "activities": ["CPU", "NPU"], + "export_type": ["text"], "profile_memory": False, "mstx": False, - "analyse_mode": 0, "parallel_strategy": False, "with_stack": False, - "data_simplification": True + "data_simplification": True, + "l2_cache": False, + "analyse": True, + "record_shape": False, + "prof_path": "./data", + "mstx_domain_include": [], + "mstx_domain_exclude": [], + "host_sys": [], + "sys_io": False, + "sys_interconnection": False } output_path = "./cfg_path" cfg_path = os.path.join(output_path, "profiler_config.json") @@ -73,7 +82,7 @@ if __name__ == '__main__': # Define a network of training models net = Net() STEP_NUM = 15 - dp = DynamicProfilerMonitor(cfg_path=output_path, output_path=output_path) + dp = DynamicProfilerMonitor(cfg_path=output_path) for i in range(STEP_NUM): train(net) # Call step collection diff --git a/tutorials/source_en/debug/profiler.md b/tutorials/source_en/debug/profiler.md index 8fd491f142..4d8bd8675d 100644 --- a/tutorials/source_en/debug/profiler.md +++ b/tutorials/source_en/debug/profiler.md @@ -14,7 +14,7 @@ This tutorial introduces how to use MindSpore Profiler for performance tuning on 3. Run the training script; -4. View the performance data through [MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/msinsightug/msascendinsightug/AscendInsight_0002.html). +4. View the performance data through [MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/msinsightug/msascendinsightug/AscendInsight_0002.html). ## Usage @@ -111,16 +111,25 @@ JSON configuration example as follows: { "start_step": 2, "stop_step": 5, - "aic_metrics": -1, - "profiler_level": 0, - "activities": 0, - "export_type": 0, + "aic_metrics": "AiCoreNone", + "profiler_level": "Level0", + "analyse_mode": 0, + "activities": ["CPU", "NPU"], + "export_type": ["text"], "profile_memory": false, "mstx": false, - "analyse_mode": 0, "parallel_strategy": false, "with_stack": false, - "data_simplification": true + "data_simplification": true, + "l2_cache": false, + "analyse": true, + "record_shape": false, + "prof_path": "./data", + "mstx_domain_include": [], + "mstx_domain_exclude": [], + "host_sys": [], + "sys_io": false, + "sys_interconnection": false } ``` @@ -189,7 +198,7 @@ When using MindSpore to train a model, in order to analyze performance bottlenec After collecting performance data, the original data will be stored according to the following directory structure: -> - The following data files are not required to be opened and viewed by users. Users can refer to the [MindStudio Insight user guide](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/msinsightug/msascendinsightug/AscendInsight_0002.html) for viewing and analyzing performance data. +> - The following data files are not required to be opened and viewed by users. Users can refer to the [MindStudio Insight user guide](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/msinsightug/msascendinsightug/AscendInsight_0002.html) for viewing and analyzing performance data. > - The following is the full set of result files, the actual file number and content depend on the user's parameter configuration and the actual training scenario, if the user does not configure the related parameters or does not involve the related scenarios in the training, the corresponding data files will not be generated. ```sh @@ -239,25 +248,25 @@ MindSpore Profiler interface will associate and integrate the framework side dat The `ascend_mindspore_profiler_{Rank_ID}.db` file is controlled by the `ExportType.Db` switch and mainly collects all performance data in .db format. -For detailed introduction, refer to [ascend_mindspore_profiler_{Rank_ID}.db](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0026.html). +For detailed introduction, refer to [ascend_mindspore_profiler_{Rank_ID}.db](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0026.html). ### communication_analyzer.db `communication_analyzer.db` file is controlled by the `ExportType.Db` switch, the file is mainly unified communication class segment time, copy information, bandwidth and other information, in order to carry out communication data analysis. Communication data exists only in multi-card, multi-node, or cluster scenarios. -For detailed introduction, refer to [communication_analyzer.db](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0027.html). +For detailed introduction, refer to [communication_analyzer.db](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0027.html). ### communication.json `communication.json` file records detailed information such as communication time consumption and bandwidth of communication class operators. -For detailed introduction, refer to [communication.json](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0027.html). +For detailed introduction, refer to [communication.json](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0027.html). ### communication_matrix.json `communication_matrix.json` file records the basic information of the communication small operator, including communication size, communication bandwidth, communication rank and other information. -For detailed introduction, refer to [communication_matrix.json](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0027.html). +For detailed introduction, refer to [communication_matrix.json](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0027.html). ### dataset.csv @@ -277,7 +286,7 @@ For detailed introduction, refer to [communication_matrix.json](https://www.hias The difference from the data collected by the Ascend PyTorch Profiler interface is that when the `with_stack` switch is turned on, MindSpore Profiler will concatenate the stack information to the `Name` field. -For other fields, see [kernel_details.csv](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0035.html). +For other fields, see [kernel_details.csv](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0035.html). ### minddata_pipeline_raw_{Rank_ID}.csv @@ -325,11 +334,11 @@ For other fields, see [kernel_details.csv](https://www.hiascend.com/document/det `trace_view.json` is recommended to be opened using MindStudio Insight tool or chrome://tracing/. MindSpore Profiler does not support the record_shapes and GC functions. -For detailed introduction, refer to [trace_view.json](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0035.html). +For detailed introduction, refer to [trace_view.json](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0035.html). ### Other Performance Data -The specific field and meaning of other performance data files can be referred to [Ascend official documentation](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0035.html). +The specific field and meaning of other performance data files can be referred to [Ascend official documentation](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0035.html). ## Performance Tuning Case @@ -339,7 +348,7 @@ In the process of large model training, due to some unpredictable introduction, The most important thing in performance tuning is to apply the right medicine to the problem, delimit the problem first, and then perform targeted tuning to the problem. -The first to use [MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/useguide/firstpage_0003.html) visualization tools and bound performance issues. The results of delimiting are usually divided into three aspects: computation, scheduling and communication. +The first to use [MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/useguide/firstpage_0003.html) visualization tools and bound performance issues. The results of delimiting are usually divided into three aspects: computation, scheduling and communication. Finally, users can tune performance based on expert advice from MindStudio Insight. Re-run the training after each tuning, collect performance data, and use the MindStudio Insight tool to see if the tuning method produced results. Repeat this process until the performance issue is resolved. diff --git a/tutorials/source_zh_cn/debug/profiler.md b/tutorials/source_zh_cn/debug/profiler.md index 0923219e9e..f9921b334e 100644 --- a/tutorials/source_zh_cn/debug/profiler.md +++ b/tutorials/source_zh_cn/debug/profiler.md @@ -14,7 +14,7 @@ 3. 运行训练脚本; -4. 通过[MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/msinsightug/msascendinsightug/AscendInsight_0002.html)软件查看性能数据。 +4. 通过[MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/msinsightug/msascendinsightug/AscendInsight_0002.html)软件查看性能数据。 ## 使用方法 @@ -111,16 +111,25 @@ JSON配置样例如下: { "start_step": 2, "stop_step": 5, - "aic_metrics": -1, - "profiler_level": 0, - "activities": 0, - "export_type": 0, + "aic_metrics": "AiCoreNone", + "profiler_level": "Level0", + "analyse_mode": 0, + "activities": ["CPU", "NPU"], + "export_type": ["text"], "profile_memory": false, "mstx": false, - "analyse_mode": 0, "parallel_strategy": false, "with_stack": false, - "data_simplification": true + "data_simplification": true, + "l2_cache": false, + "analyse": true, + "record_shape": false, + "prof_path": "./data", + "mstx_domain_include": [], + "mstx_domain_exclude": [], + "host_sys": [], + "sys_io": false, + "sys_interconnection": false } ``` @@ -189,7 +198,7 @@ analyse("./profiler_data_path") # './profiler_data_path'为离线解析数据路 性能数据采集完成后,原始数据会按照以下目录结构进行存储: -> - 以下数据文件用户无需打开查看,可根据[MindStudio Insight用户指南](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/msinsightug/msascendinsightug/AscendInsight_0002.html)指导进行性能数据的查看和分析。 +> - 以下数据文件用户无需打开查看,可根据[MindStudio Insight用户指南](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/msinsightug/msascendinsightug/AscendInsight_0002.html)指导进行性能数据的查看和分析。 > - 以下是结果文件全集,实际文件数量和内容根据用户的参数配置以及实际的训练场景生成。如果用户没有使能相关参数或是训练中没有涉及到相关场景,则不会生成对应的数据文件。 ```sh @@ -238,22 +247,22 @@ MindSpore Profiler接口将框架侧的数据与CANN Profling的数据关联整 ### ascend_mindspore_profiler_{Rank_ID}.db `ascend_mindspore_profiler_{Rank_ID}.db` 文件由 `ExportType.Db` 开关控制,文件主要汇总所有性能数据的.db格式文件。 -详细介绍请参考[ascend_mindspore_profiler_{Rank_ID}.db](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0026.html)。 +详细介绍请参考[ascend_mindspore_profiler_{Rank_ID}.db](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0026.html)。 ### communication_analyzer.db `communication_analyzer.db` 文件由 `ExportType.Db` 开关控制,文件主要统一通信类的分段耗时、拷贝信息、带宽等信息,以便进行通信类数据分析。通信类数据只有在多卡、多节点或集群场景下存在。 -详细介绍请参考[communication_analyzer.db](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0027.html)。 +详细介绍请参考[communication_analyzer.db](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0027.html)。 ### communication.json `communication.json` 文件记录通信类算子的通信耗时、带宽等详细信息。 -详细介绍请参考[communication.json](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0027.html)。 +详细介绍请参考[communication.json](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0027.html)。 ### communication_matrix.json `communication_matrix.json` 文件记录通信小算子基本的信息,包含通信size、通信带宽、通信rank等信息。 -详细介绍请参考[communication_matrix.json](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0027.html)。 +详细介绍请参考[communication_matrix.json](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0027.html)。 ### dataset.csv @@ -273,7 +282,7 @@ MindSpore Profiler接口将框架侧的数据与CANN Profling的数据关联整 与Ascend PyTorch Profiler接口采集数据结果的不同之处在于:当 `with_stack` 开关开启之后,MindSpore Profiler会将堆栈信息拼接到 `Name` 字段中。 -其他字段请参考[kernel_details.csv](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0035.html)。 +其他字段请参考[kernel_details.csv](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0035.html)。 ### minddata_pipeline_raw_{Rank_ID}.csv @@ -320,11 +329,11 @@ MindSpore Profiler接口将框架侧的数据与CANN Profling的数据关联整 ### trace_view.json `trace_view.json` 建议使用MindStudio Insight工具或 chrome://tracing/ 打开。MindSpore Profiler暂时不支持record_shapes与GC功能。 -详细介绍请参考[trace_view.json](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0035.html)。 +详细介绍请参考[trace_view.json](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0035.html)。 ### 其他性能数据 -其他性能数据文件的具体字段与含义可以参考[昇腾官网文档](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/T&ITools/Profiling/atlasprofiling_16_0035.html)。 +其他性能数据文件的具体字段与含义可以参考[昇腾官网文档](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0035.html)。 ## 性能调优案例 @@ -334,7 +343,7 @@ MindSpore Profiler接口将框架侧的数据与CANN Profling的数据关联整 性能调优最重要的就是对症下药,先定界问题,再对问题进行针对性调优。 -首先使用[MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/70RC3/useguide/firstpage_0003.html)可视化工具定界性能问题,定界结果通常分为计算、调度、通信三个方向的问题。 +首先使用[MindStudio Insight](https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/useguide/firstpage_0003.html)可视化工具定界性能问题,定界结果通常分为计算、调度、通信三个方向的问题。 然后,用户可以根据MindStudio Insight进行性能调优,每次调优后重跑训练,采集性能数据,并使用MindStudio Insight工具查看调优手段是否产生效果。重复这个过程,直到解决性能问题。 -- Gitee