From c75232e455c2104c39cd5627f78c5ab086ac6876 Mon Sep 17 00:00:00 2001
From: liutongtong27 <liutongtong15@h-partners.com>
Date: Wed, 14 May 2025 15:12:01 +0800
Subject: [PATCH] add training scripts for verl LLM GRPO and update README

---
 .../built-in/rl/VeRL_for_PyTorch/README.md    |  92 +++++++++--
 ...rain_qwen2_5_32b_instruct_GRPO_full_32p.sh | 150 +++++++++++++++++
 ...en2_5_32b_instruct_GRPO_performance_32p.sh | 150 +++++++++++++++++
 ...train_qwen2_5_7b_instruct_GRPO_full_16p.sh | 151 ++++++++++++++++++
 ...wen2_5_7b_instruct_GRPO_performance_16p.sh | 151 ++++++++++++++++++
 ...sh => train_qwen2_5_vl_3b_GRPO_full_8p.sh} |   2 +-
 ...rain_qwen2_5_vl_3b_GRPO_performance_8p.sh} |   2 +-
 ...h => train_qwen2_5_vl_7b_GRPO_full_16p.sh} |   2 +-
 ...ain_qwen2_5_vl_7b_GRPO_performance_16p.sh} |   2 +-
 9 files changed, 687 insertions(+), 15 deletions(-)
 create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh
 create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh
 create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh
 create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh
 rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_3b_full_8p.sh => train_qwen2_5_vl_3b_GRPO_full_8p.sh} (98%)
 rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_3b_performance_8p.sh => train_qwen2_5_vl_3b_GRPO_performance_8p.sh} (98%)
 rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_7b_full_16p.sh => train_qwen2_5_vl_7b_GRPO_full_16p.sh} (98%)
 rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_7b_performance_16p.sh => train_qwen2_5_vl_7b_GRPO_performance_16p.sh} (98%)

diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md
index a7575f9e56..9c29f6fdff 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md
@@ -120,15 +120,21 @@ verl‌是一个集SFT（监督学习）与RL（强化学习）于一体的灵
 
 ## 准备数据集
 
-  使用geo3k数据集，在模型根目录下执行命令，下载并处理数据集，`--local_dir`为可选参数，不设置默认下载位置为`~/data/geo3k`。
+  VL模型使用geo3k数据集，在模型根目录下执行命令，下载并处理数据集，`--local_dir`为可选参数，不设置默认下载位置为`~/data/geo3k`。
 
   ```shell
   python examples/data_preprocess/geo3k.py --local_dir=xxx
   ```
 
+  LLM模型使用gsm8k数据集，在模型根目录下执行命令，下载并处理数据集，`--local_dir`为可选参数，不设置默认下载位置为`~/data/gsm8k`。
+
+  ```shell
+  python examples/data_preprocess/gsm8k.py --local_dir=xxx
+  ```
+
 ## 获取预训练模型
 
-  用户自行下载`Qwen2.5-VL-7B-Instruct`与`Qwen2.5-VL-3B-Instruct`模型。
+  用户自行下载`Qwen2.5-VL-7B-Instruct`、`Qwen2.5-VL-3B-Instruct`、`Qwen2.5-7B-Instruct`和`Qwen2.5-32B-Instruct`模型。
 
 # 开始训练
 
@@ -141,21 +147,51 @@ verl‌是一个集SFT（监督学习）与RL（强化学习）于一体的灵
    ```
    cd /${模型文件夹名称} 
    ```
+   
+2. 双机运行环境配置（单机环境请忽略）。
+
+    1. 主从节点保证模型和数据集路径完全相同。
+
+    2. 主从节点分别执行以下命令获取节点ip对应的网口名称：
+       ```shell
+       ifconfig
+       ```
+
+    3. 主从节点分别设置以下环境变量：
+       ```shell
+       export GLOO_SOCKET_IFNAME=网口名称
+       export NCCL_SOCKET_IFNAME=网口名称 
+       ```
 
-2. 运行训练脚本。
+    4. 主节点执行以下命令启动ray集群：
+       ```shell
+       ray start --head
+       ```
+
+    5. 从节点执行以下命令加入ray集群：
+       ```shell
+       ray start --address='主节点ip:6379'
+       ```
+
+    6. 从节点执行以下命令确认双机已互联：
+       ```shell
+       ray status
+       ```
+
+3. 运行训练脚本。
 
    `Qwen2.5-VL-3B-Instruct`模型支持单机8卡训练。
 
    - 单机8卡训练
 
      ```shell
-     bash test/train_qwen2_5_vl_3b_full_8p.sh --data_path=xxx --model_path=xxx  # 8卡训练
+     bash test/train_qwen2_5_vl_3b_GRPO_full_8p.sh --data_path=xxx --model_path=xxx  # 8卡训练
      ```
      
    - 单机8卡性能
    
      ```shell
-     bash test/train_qwen2_5_vl_3b_performance_8p.sh --data_path=xxx --model_path=xxx   # 8卡性能
+     bash test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh --data_path=xxx --model_path=xxx   # 8卡性能
      ```
      
     `Qwen2.5-VL-7B-Instruct`模型支持单机16卡训练。
@@ -163,13 +199,43 @@ verl‌是一个集SFT（监督学习）与RL（强化学习）于一体的灵
    - 单机16卡训练
 
      ```shell
-     bash test/train_qwen2_5_vl_7b_full_16p.sh --data_path=xxx --model_path=xxx   # 16卡训练
+     bash test/train_qwen2_5_vl_7b_GRPO_full_16p.sh --data_path=xxx --model_path=xxx   # 16卡训练
      ```
      
    - 单机16卡性能
    
      ```shell
-     bash test/train_qwen2_5_vl_7b_performance_16p.sh --data_path=xxx --model_path=xxx   # 16卡性能
+     bash test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh --data_path=xxx --model_path=xxx   # 16卡性能
+     ```
+
+    `Qwen2.5-7B-Instruct`模型支持单机16卡训练。
+
+   - 单机16卡训练
+
+     ```shell
+     bash test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh --data_path=xxx --model_path=xxx   # 16卡训练
+     ```
+     
+   - 单机16卡性能
+   
+     ```shell
+     bash test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh --data_path=xxx --model_path=xxx   # 16卡性能
+     ```
+
+    `Qwen2.5-32B-Instruct`模型支持双机32卡训练。
+
+   - 双机32卡训练
+
+     ```shell
+     # 主节点执行
+     bash test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh --data_path=xxx --model_path=xxx   # 32卡训练
+     ```
+     
+   - 双机32卡性能
+   
+     ```shell
+     # 主节点执行
+     bash test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh --data_path=xxx --model_path=xxx   # 32卡性能
      ```
    
    训练完成后，训练日志保存在`test/output`路径下，并输出模型训练精度和性能信息。
@@ -180,10 +246,14 @@ verl‌是一个集SFT（监督学习）与RL（强化学习）于一体的灵
 
 | MODEL                  | NAME                    | throughput | MAX Training TimeSteps |
 |:-----------------------|:------------------------|:----------:|:----------------------:|
-| Qwen2.5-VL-3B-Instruct | 8p-竞品A                  |   763.34   |           60           |
-| Qwen2.5-VL-3B-Instruct | 8P Atlas 200T A2 Box16  |   270.99   |           60           |
-| Qwen2.5-VL-7B-Instruct | 8p-竞品A                  |  555.342   |           60           |
-| Qwen2.5-VL-7B-Instruct | 16P Atlas 200T A2 Box16 |  134.832   |           60           |
+| Qwen2.5-VL-3B-Instruct | 8p-竞品A                  |  739.453   |           60           |
+| Qwen2.5-VL-3B-Instruct | 8P Atlas 200T A2 Box16  |  349.013   |           60           |
+| Qwen2.5-VL-7B-Instruct | 8p-竞品A                  |  568.452   |           60           |
+| Qwen2.5-VL-7B-Instruct | 16P Atlas 200T A2 Box16 |  216.796   |           60           |
+| Qwen2.5-7B-Instruct    | 8p-竞品A                  |  323.872   |           35           |
+| Qwen2.5-7B-Instruct    | 16P Atlas 200T A2 Box16 |  190.617   |           35           |
+| Qwen2.5-32B-Instruct   | 16p-竞品A                  |   79.022   |          105           |
+| Qwen2.5-32B-Instruct   | 32P Atlas 200T A2 Box16 |   54.162   |          105           |
 
 # 公网地址说明
 
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh
new file mode 100644
index 0000000000..2e327fdc19
--- /dev/null
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_32b_instruct_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+ENGINE=vllm
+
+nohup python3 -m verl.trainer.main_ppo \
+    algorithm.adv_estimator=grpo \
+    data.train_files=$data_path/train.parquet \
+    data.val_files=$data_path/test.parquet \
+    data.train_batch_size=1024 \
+    data.max_prompt_length=1024 \
+    data.max_response_length=1024 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    actor_rollout_ref.model.path=$model_path \
+    actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.use_remove_padding=False \
+    actor_rollout_ref.actor.ppo_mini_batch_size=128 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=8 \
+    actor_rollout_ref.rollout.name=$ENGINE \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
+    actor_rollout_ref.rollout.n=5 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console'] \
+    trainer.project_name='verl_grpo_example_gsm8k' \
+    trainer.experiment_name='qwen2_5_32b_function_rm' \
+    trainer.n_gpus_per_node=16 \
+    trainer.nnodes=2 \
+    trainer.save_freq=-1 \
+    trainer.test_freq=10 \
+    trainer.total_epochs=15 > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_grpo_full.log 2>&1 &
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_grpo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'32p'_'full'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh
new file mode 100644
index 0000000000..13050adae1
--- /dev/null
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_32b_instruct_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+ENGINE=vllm
+
+nohup python3 -m verl.trainer.main_ppo \
+    algorithm.adv_estimator=grpo \
+    data.train_files=$data_path/train.parquet \
+    data.val_files=$data_path/test.parquet \
+    data.train_batch_size=1024 \
+    data.max_prompt_length=1024 \
+    data.max_response_length=1024 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    actor_rollout_ref.model.path=$model_path \
+    actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.actor.optim.lr=1e-6 \
+    actor_rollout_ref.model.use_remove_padding=False \
+    actor_rollout_ref.actor.ppo_mini_batch_size=128 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=8 \
+    actor_rollout_ref.rollout.name=$ENGINE \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
+    actor_rollout_ref.rollout.n=5 \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console'] \
+    trainer.project_name='verl_grpo_example_gsm8k' \
+    trainer.experiment_name='qwen2_5_32b_function_rm' \
+    trainer.n_gpus_per_node=16 \
+    trainer.nnodes=2 \
+    trainer.save_freq=-1 \
+    trainer.test_freq=10 \
+    trainer.total_epochs=1 > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_grpo_perf.log 2>&1 &
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_grpo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'32p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh
new file mode 100644
index 0000000000..3dbc768943
--- /dev/null
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_7b_instruct_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+ENGINE=vllm
+
+nohup python3 -m verl.trainer.main_ppo \
+    algorithm.adv_estimator=grpo \
+    data.train_files=$data_path/train.parquet \
+    data.val_files=$data_path/test.parquet \
+    data.train_batch_size=1024 \
+    data.max_prompt_length=1024 \
+    data.max_response_length=1024 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    actor_rollout_ref.model.path=$model_path \
+    actor_rollout_ref.actor.optim.lr=5e-8 \
+    actor_rollout_ref.model.use_remove_padding=False \
+    actor_rollout_ref.actor.ppo_mini_batch_size=32 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
+    actor_rollout_ref.rollout.name=$ENGINE \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \
+    actor_rollout_ref.rollout.n=5 \
+    actor_rollout_ref.rollout.enable_chunked_prefill=False \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console'] \
+    trainer.project_name='verl_grpo_example_gsm8k' \
+    trainer.experiment_name='qwen2_5_7b_function_rm' \
+    trainer.n_gpus_per_node=16 \
+    trainer.nnodes=1 \
+    trainer.save_freq=-1 \
+    trainer.test_freq=5 \
+    trainer.total_epochs=5 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_grpo_full.log 2>&1 &
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_grpo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'16p'_'full'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh
new file mode 100644
index 0000000000..9a4b342be3
--- /dev/null
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+
+###############指定训练脚本执行路径###############
+# cd到与test文件夹同层级目录下执行脚本，提高兼容性；test_path_dir为包含test文件夹的路径
+cur_path=`pwd`
+cur_path_last_dirname=${cur_path##*/}
+if [ x"${cur_path_last_dirname}" == x"test" ];then
+    test_path_dir=${cur_path}
+    cd ..
+    cur_path=`pwd`
+else
+    test_path_dir=${cur_path}/test
+fi
+
+# 数据集路径,保持为空,不需要修改
+data_path=""
+model_path=""
+
+#基础参数，需要模型审视修改
+#网络名称，同目录名称
+Network="Qwen2_5_7b_instruct_for_PyTorch"
+
+# 帮助信息，不需要修改
+if [[ $1 == --help || $1 == -h ]];then
+    echo"usage:./test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh <args>"
+    echo " "
+    echo "parameter explain:
+    --data_path		           source data of training
+    --model_path		         model path for GRPO
+    -h/--help		             show help message
+    "
+    exit 1
+fi
+
+#参数校验，不需要修改
+for para in $*
+do
+    if [[ $para == --data_path* ]];then
+        data_path=`echo ${para#*=}`
+    elif [[ $para == --model_path* ]];then
+        model_path=`echo ${para#*=}`
+    fi
+done
+
+#校验是否传入data_path,不需要修改
+if [[ $data_path == "" ]];then
+    echo "[Error] para \"data_path\" must be confing"
+    exit 1
+fi
+if [[ $model_path == "" ]];then
+    echo "[Error] para \"model_path\" must be confing"
+    exit 1
+fi
+
+#非平台场景时source 环境变量
+check_etp_flag=`env | grep etp_running_flag`
+etp_flag=`echo ${check_etp_flag#*=}`
+if [ x"${etp_flag}" != x"true" ];then
+    source  ${test_path_dir}/env_npu.sh
+fi
+
+#训练开始时间，不需要修改
+start_time=$(date +%s)
+
+#进入训练脚本目录，需要模型审视修改
+cd $cur_path
+
+if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then
+    rm -rf ${test_path_dir}/output
+    mkdir -p ${test_path_dir}/output
+else
+    mkdir -p ${test_path_dir}/output
+fi
+
+ENGINE=vllm
+
+nohup python3 -m verl.trainer.main_ppo \
+    algorithm.adv_estimator=grpo \
+    data.train_files=$data_path/train.parquet \
+    data.val_files=$data_path/test.parquet \
+    data.train_batch_size=1024 \
+    data.max_prompt_length=1024 \
+    data.max_response_length=1024 \
+    data.filter_overlong_prompts=True \
+    data.truncation='error' \
+    actor_rollout_ref.model.path=$model_path \
+    actor_rollout_ref.actor.optim.lr=5e-8 \
+    actor_rollout_ref.model.use_remove_padding=False \
+    actor_rollout_ref.actor.ppo_mini_batch_size=32 \
+    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.actor.use_kl_loss=True \
+    actor_rollout_ref.actor.entropy_coeff=0 \
+    actor_rollout_ref.actor.kl_loss_coef=0.001 \
+    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+    actor_rollout_ref.model.enable_gradient_checkpointing=True \
+    actor_rollout_ref.actor.use_torch_compile=False \
+    actor_rollout_ref.actor.fsdp_config.param_offload=False \
+    actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.rollout.tensor_model_parallel_size=4 \
+    actor_rollout_ref.rollout.name=$ENGINE \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \
+    actor_rollout_ref.rollout.n=5 \
+    actor_rollout_ref.rollout.enable_chunked_prefill=False \
+    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \
+    actor_rollout_ref.ref.fsdp_config.param_offload=True \
+    algorithm.use_kl_in_reward=False \
+    trainer.critic_warmup=0 \
+    trainer.logger=['console'] \
+    trainer.project_name='verl_grpo_example_gsm8k' \
+    trainer.experiment_name='qwen2_5_7b_function_rm' \
+    trainer.n_gpus_per_node=16 \
+    trainer.nnodes=1 \
+    trainer.save_freq=-1 \
+    trainer.test_freq=5 \
+    trainer.total_epochs=1 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_grpo_perf.log 2>&1 &
+wait
+
+#训练结束时间，不需要修改
+end_time=$(date +%s)
+e2e_time=$(( $end_time - $start_time ))
+#结果打印，不需要修改
+echo "------------------ Final result ------------------"
+#输出性能FPS，需要模型审视修改
+FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_grpo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'`
+
+#排除功能问题导致计算溢出的异常，增加健壮性
+if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then
+    FPS=""
+fi
+#打印，不需要修改
+echo "Final Performance images/sec : $FPS"
+
+#打印，不需要修改
+echo "E2E Training Duration sec : $e2e_time"
+
+#性能看护结果汇总
+#训练用例信息，不需要修改
+DeviceType=`uname -m`
+CaseName=${Network}_'16p'_'perf'
+
+##获取性能数据，不需要修改
+#吞吐量
+ActualFPS=${FPS}
+
+#关键信息打印到${CaseName}.log中，不需要修改
+echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log
+echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log
+echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log
+echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log
+echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_full_8p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_full_8p.sh
similarity index 98%
rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_full_8p.sh
rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_full_8p.sh
index cc649111ec..401c4b0dda 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_full_8p.sh
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_full_8p.sh
@@ -22,7 +22,7 @@ Network="Qwen2_5_vl_3b_for_PyTorch"
 
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
-    echo"usage:./test/train_qwen2_5_vl_3b_full_8p.sh <args>"
+    echo"usage:./test/train_qwen2_5_vl_3b_GRPO_full_8p.sh <args>"
     echo " "
     echo "parameter explain:
     --data_path		           source data of training
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_performance_8p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh
similarity index 98%
rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_performance_8p.sh
rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh
index 14f5a83710..09db8e28ee 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_performance_8p.sh
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh
@@ -22,7 +22,7 @@ Network="Qwen2_5_vl_3b_for_PyTorch"
 
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
-    echo"usage:./test/train_qwen2_5_vl_3b_performance_8p.sh <args>"
+    echo"usage:./test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh <args>"
     echo " "
     echo "parameter explain:
     --data_path		           source data of training
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_full_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_full_16p.sh
similarity index 98%
rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_full_16p.sh
rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_full_16p.sh
index 5d38458968..09cb666966 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_full_16p.sh
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_full_16p.sh
@@ -22,7 +22,7 @@ Network="Qwen2_5_vl_7b_for_PyTorch"
 
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
-    echo"usage:./test/train_qwen2_5_vl_7b_full_16p.sh <args>"
+    echo"usage:./test/train_qwen2_5_vl_7b_GRPO_full_16p.sh <args>"
     echo " "
     echo "parameter explain:
     --data_path		           source data of training
diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_performance_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh
similarity index 98%
rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_performance_16p.sh
rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh
index 7622bdf1ed..2776939d73 100644
--- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_performance_16p.sh
+++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh
@@ -22,7 +22,7 @@ Network="Qwen2_5_vl_7b_for_PyTorch"
 
 # 帮助信息，不需要修改
 if [[ $1 == --help || $1 == -h ]];then
-    echo"usage:./test/train_qwen2_5_vl_7b_performance_16p.sh <args>"
+    echo"usage:./test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh <args>"
     echo " "
     echo "parameter explain:
     --data_path		           source data of training
-- 
Gitee