From ce63f06bf69930ae486807294f5d909f11faff04 Mon Sep 17 00:00:00 2001
From: mookie <zhanghaoyong1@huawei.com>
Date: Thu, 12 Jun 2025 15:16:28 +0800
Subject: [PATCH] =?UTF-8?q?ppchart=E6=B7=BB=E5=8A=A0=E9=9D=9Edualpipe?=
 =?UTF-8?q?=E7=9A=84=E9=80=82=E9=85=8D=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../msprof_analyze/cluster_analyse/README.md  | 34 ++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/profiler/msprof_analyze/cluster_analyse/README.md b/profiler/msprof_analyze/cluster_analyse/README.md
index 48f67b790dd..cef71d91d4a 100644
--- a/profiler/msprof_analyze/cluster_analyse/README.md
+++ b/profiler/msprof_analyze/cluster_analyse/README.md
@@ -741,8 +741,26 @@ output_dir
 
 #### 打点
 
-以DualpipeV2为例，找到前反向代码，在dualpipev_schedules.py里面添加如下代码(仅为示例，需要注意这段代码添加的位置)：
+1. 传统pipeline（不开dualpipe），在```megatron/core/pipeline_parallel/schedules.py```里面添加如下代码（添加在```backward_step```函数定义的后面）：
+```python
+import torch_npu
+def step_wrapper(func, msg: str):
+    def wrapper(*args, **kwargs):
+        new_msg = {"name": msg}
+        mstx_state_step_range_id = torch_npu.npu.mstx.range_start(str(new_msg), torch_npu.npu.current_stream())
+        out = func(*args, **kwargs)
+        if mstx_state_step_range_id is not None:
+            torch_npu.npu.mstx.range_end(mstx_state_step_range_id)
+            mstx_state_step_range_id = None
+        return out
+    return wrapper
+
+forward_step = step_wrapper(forward_step, "forward_step")
+backward_step = step_wrapper(backward_step, "backward_step")
 ```
+
+2. DualpipeV2，找到前反向代码，在```mindspeed/core/pipeline_parallel/dualpipev/dualpipev_schedules.py```里面添加如下代码(添加在```forward_backward_pipeline_with_cutinhalf```函数定义的前面)：
+```python
 import torch_npu
 def step_wrapper(func, msg: str):
     def wrapper(*args, **kwargs):
@@ -769,8 +787,7 @@ backward_step = step_wrapper(backward_step, "backward_step")
 WeightGradStore.pop = step_wrapper(WeightGradStore.pop, "WeightGradStore.pop")
 ```
 
-同时，采集profiling数据时，需要添加metadata：
-
+同时，采集profiling数据时，如果使用的是MindSpeed，未使用MindSpeed-LLM，需要在prof定义（```prof = torch_npu.profiler.profile(...```）的后面添加metadata代码：
 ```
 prof.add_metadata('pp_info', json.dumps(
     {
@@ -778,7 +795,16 @@ prof.add_metadata('pp_info', json.dumps(
         'microbatch_num': 10,
     }
 ))
-# microbatch_num需要替换成实际的值
+# microbatch_num根据公式计算实际的值：microbatch_num = global_batch_size // micrio_batch_size // data_parallel_size
+```
+如果使用MindSpeed-LLM，在```mindspeed-llm/training/trainning.py```中```prof.add_metadata_json('distributed_args'...```的后面添加metadata代码：
+```
+prof.add_metadata('pp_info', json.dumps(
+    {
+        'pp_type': args.schedules_method,
+        'microbatch_num': args.global_batch_size // args.micrio_batch_size // args.data_parallel_size
+    }
+))
 ```
 
 #### StepTaskInfo
-- 
Gitee