From c75232e455c2104c39cd5627f78c5ab086ac6876 Mon Sep 17 00:00:00 2001 From: liutongtong27 Date: Wed, 14 May 2025 15:12:01 +0800 Subject: [PATCH] add training scripts for verl LLM GRPO and update README --- .../built-in/rl/VeRL_for_PyTorch/README.md | 92 +++++++++-- ...rain_qwen2_5_32b_instruct_GRPO_full_32p.sh | 150 +++++++++++++++++ ...en2_5_32b_instruct_GRPO_performance_32p.sh | 150 +++++++++++++++++ ...train_qwen2_5_7b_instruct_GRPO_full_16p.sh | 151 ++++++++++++++++++ ...wen2_5_7b_instruct_GRPO_performance_16p.sh | 151 ++++++++++++++++++ ...sh => train_qwen2_5_vl_3b_GRPO_full_8p.sh} | 2 +- ...rain_qwen2_5_vl_3b_GRPO_performance_8p.sh} | 2 +- ...h => train_qwen2_5_vl_7b_GRPO_full_16p.sh} | 2 +- ...ain_qwen2_5_vl_7b_GRPO_performance_16p.sh} | 2 +- 9 files changed, 687 insertions(+), 15 deletions(-) create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh create mode 100644 PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_3b_full_8p.sh => train_qwen2_5_vl_3b_GRPO_full_8p.sh} (98%) rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_3b_performance_8p.sh => train_qwen2_5_vl_3b_GRPO_performance_8p.sh} (98%) rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_7b_full_16p.sh => train_qwen2_5_vl_7b_GRPO_full_16p.sh} (98%) rename PyTorch/built-in/rl/VeRL_for_PyTorch/test/{train_qwen2_5_vl_7b_performance_16p.sh => train_qwen2_5_vl_7b_GRPO_performance_16p.sh} (98%) diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md index a7575f9e56..9c29f6fdff 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/README.md @@ -120,15 +120,21 @@ verl‌是一个集SFT(监督学习)与RL(强化学习)于一体的灵 ## 准备数据集 - 使用geo3k数据集,在模型根目录下执行命令,下载并处理数据集,`--local_dir`为可选参数,不设置默认下载位置为`~/data/geo3k`。 + VL模型使用geo3k数据集,在模型根目录下执行命令,下载并处理数据集,`--local_dir`为可选参数,不设置默认下载位置为`~/data/geo3k`。 ```shell python examples/data_preprocess/geo3k.py --local_dir=xxx ``` + LLM模型使用gsm8k数据集,在模型根目录下执行命令,下载并处理数据集,`--local_dir`为可选参数,不设置默认下载位置为`~/data/gsm8k`。 + + ```shell + python examples/data_preprocess/gsm8k.py --local_dir=xxx + ``` + ## 获取预训练模型 - 用户自行下载`Qwen2.5-VL-7B-Instruct`与`Qwen2.5-VL-3B-Instruct`模型。 + 用户自行下载`Qwen2.5-VL-7B-Instruct`、`Qwen2.5-VL-3B-Instruct`、`Qwen2.5-7B-Instruct`和`Qwen2.5-32B-Instruct`模型。 # 开始训练 @@ -141,21 +147,51 @@ verl‌是一个集SFT(监督学习)与RL(强化学习)于一体的灵 ``` cd /${模型文件夹名称} ``` + +2. 双机运行环境配置(单机环境请忽略)。 + + 1. 主从节点保证模型和数据集路径完全相同。 + + 2. 主从节点分别执行以下命令获取节点ip对应的网口名称: + ```shell + ifconfig + ``` + + 3. 主从节点分别设置以下环境变量: + ```shell + export GLOO_SOCKET_IFNAME=网口名称 + export NCCL_SOCKET_IFNAME=网口名称 + ``` -2. 运行训练脚本。 + 4. 主节点执行以下命令启动ray集群: + ```shell + ray start --head + ``` + + 5. 从节点执行以下命令加入ray集群: + ```shell + ray start --address='主节点ip:6379' + ``` + + 6. 从节点执行以下命令确认双机已互联: + ```shell + ray status + ``` + +3. 运行训练脚本。 `Qwen2.5-VL-3B-Instruct`模型支持单机8卡训练。 - 单机8卡训练 ```shell - bash test/train_qwen2_5_vl_3b_full_8p.sh --data_path=xxx --model_path=xxx # 8卡训练 + bash test/train_qwen2_5_vl_3b_GRPO_full_8p.sh --data_path=xxx --model_path=xxx # 8卡训练 ``` - 单机8卡性能 ```shell - bash test/train_qwen2_5_vl_3b_performance_8p.sh --data_path=xxx --model_path=xxx # 8卡性能 + bash test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh --data_path=xxx --model_path=xxx # 8卡性能 ``` `Qwen2.5-VL-7B-Instruct`模型支持单机16卡训练。 @@ -163,13 +199,43 @@ verl‌是一个集SFT(监督学习)与RL(强化学习)于一体的灵 - 单机16卡训练 ```shell - bash test/train_qwen2_5_vl_7b_full_16p.sh --data_path=xxx --model_path=xxx # 16卡训练 + bash test/train_qwen2_5_vl_7b_GRPO_full_16p.sh --data_path=xxx --model_path=xxx # 16卡训练 ``` - 单机16卡性能 ```shell - bash test/train_qwen2_5_vl_7b_performance_16p.sh --data_path=xxx --model_path=xxx # 16卡性能 + bash test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh --data_path=xxx --model_path=xxx # 16卡性能 + ``` + + `Qwen2.5-7B-Instruct`模型支持单机16卡训练。 + + - 单机16卡训练 + + ```shell + bash test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh --data_path=xxx --model_path=xxx # 16卡训练 + ``` + + - 单机16卡性能 + + ```shell + bash test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh --data_path=xxx --model_path=xxx # 16卡性能 + ``` + + `Qwen2.5-32B-Instruct`模型支持双机32卡训练。 + + - 双机32卡训练 + + ```shell + # 主节点执行 + bash test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh --data_path=xxx --model_path=xxx # 32卡训练 + ``` + + - 双机32卡性能 + + ```shell + # 主节点执行 + bash test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh --data_path=xxx --model_path=xxx # 32卡性能 ``` 训练完成后,训练日志保存在`test/output`路径下,并输出模型训练精度和性能信息。 @@ -180,10 +246,14 @@ verl‌是一个集SFT(监督学习)与RL(强化学习)于一体的灵 | MODEL | NAME | throughput | MAX Training TimeSteps | |:-----------------------|:------------------------|:----------:|:----------------------:| -| Qwen2.5-VL-3B-Instruct | 8p-竞品A | 763.34 | 60 | -| Qwen2.5-VL-3B-Instruct | 8P Atlas 200T A2 Box16 | 270.99 | 60 | -| Qwen2.5-VL-7B-Instruct | 8p-竞品A | 555.342 | 60 | -| Qwen2.5-VL-7B-Instruct | 16P Atlas 200T A2 Box16 | 134.832 | 60 | +| Qwen2.5-VL-3B-Instruct | 8p-竞品A | 739.453 | 60 | +| Qwen2.5-VL-3B-Instruct | 8P Atlas 200T A2 Box16 | 349.013 | 60 | +| Qwen2.5-VL-7B-Instruct | 8p-竞品A | 568.452 | 60 | +| Qwen2.5-VL-7B-Instruct | 16P Atlas 200T A2 Box16 | 216.796 | 60 | +| Qwen2.5-7B-Instruct | 8p-竞品A | 323.872 | 35 | +| Qwen2.5-7B-Instruct | 16P Atlas 200T A2 Box16 | 190.617 | 35 | +| Qwen2.5-32B-Instruct | 16p-竞品A | 79.022 | 105 | +| Qwen2.5-32B-Instruct | 32P Atlas 200T A2 Box16 | 54.162 | 105 | # 公网地址说明 diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh new file mode 100644 index 0000000000..2e327fdc19 --- /dev/null +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_32b_instruct_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_32b_instruct_GRPO_full_32p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +ENGINE=vllm + +nohup python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$data_path/train.parquet \ + data.val_files=$data_path/test.parquet \ + data.train_batch_size=1024 \ + data.max_prompt_length=1024 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$model_path \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=False \ + actor_rollout_ref.actor.ppo_mini_batch_size=128 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=8 \ + actor_rollout_ref.rollout.name=$ENGINE \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=5 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger=['console'] \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen2_5_32b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=2 \ + trainer.save_freq=-1 \ + trainer.test_freq=10 \ + trainer.total_epochs=15 > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_grpo_full.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_grpo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'32p'_'full' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh new file mode 100644 index 0000000000..13050adae1 --- /dev/null +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_32b_instruct_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_32b_instruct_GRPO_performance_32p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +ENGINE=vllm + +nohup python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$data_path/train.parquet \ + data.val_files=$data_path/test.parquet \ + data.train_batch_size=1024 \ + data.max_prompt_length=1024 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$model_path \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=False \ + actor_rollout_ref.actor.ppo_mini_batch_size=128 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=8 \ + actor_rollout_ref.rollout.name=$ENGINE \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=5 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger=['console'] \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen2_5_32b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=2 \ + trainer.save_freq=-1 \ + trainer.test_freq=10 \ + trainer.total_epochs=1 > ${test_path_dir}/output/train_verl_qwen2_5_32b_instruct_grpo_perf.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_32b_instruct_grpo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'32p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh new file mode 100644 index 0000000000..3dbc768943 --- /dev/null +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_7b_instruct_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_7b_instruct_GRPO_full_16p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +ENGINE=vllm + +nohup python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$data_path/train.parquet \ + data.val_files=$data_path/test.parquet \ + data.train_batch_size=1024 \ + data.max_prompt_length=1024 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$model_path \ + actor_rollout_ref.actor.optim.lr=5e-8 \ + actor_rollout_ref.model.use_remove_padding=False \ + actor_rollout_ref.actor.ppo_mini_batch_size=32 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=$ENGINE \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \ + actor_rollout_ref.rollout.n=5 \ + actor_rollout_ref.rollout.enable_chunked_prefill=False \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger=['console'] \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen2_5_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=5 \ + trainer.total_epochs=5 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_grpo_full.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_grpo_full.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'16p'_'full' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh new file mode 100644 index 0000000000..9a4b342be3 --- /dev/null +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh @@ -0,0 +1,151 @@ +#!/bin/bash + +###############指定训练脚本执行路径############### +# cd到与test文件夹同层级目录下执行脚本,提高兼容性;test_path_dir为包含test文件夹的路径 +cur_path=`pwd` +cur_path_last_dirname=${cur_path##*/} +if [ x"${cur_path_last_dirname}" == x"test" ];then + test_path_dir=${cur_path} + cd .. + cur_path=`pwd` +else + test_path_dir=${cur_path}/test +fi + +# 数据集路径,保持为空,不需要修改 +data_path="" +model_path="" + +#基础参数,需要模型审视修改 +#网络名称,同目录名称 +Network="Qwen2_5_7b_instruct_for_PyTorch" + +# 帮助信息,不需要修改 +if [[ $1 == --help || $1 == -h ]];then + echo"usage:./test/train_qwen2_5_7b_instruct_GRPO_performance_16p.sh " + echo " " + echo "parameter explain: + --data_path source data of training + --model_path model path for GRPO + -h/--help show help message + " + exit 1 +fi + +#参数校验,不需要修改 +for para in $* +do + if [[ $para == --data_path* ]];then + data_path=`echo ${para#*=}` + elif [[ $para == --model_path* ]];then + model_path=`echo ${para#*=}` + fi +done + +#校验是否传入data_path,不需要修改 +if [[ $data_path == "" ]];then + echo "[Error] para \"data_path\" must be confing" + exit 1 +fi +if [[ $model_path == "" ]];then + echo "[Error] para \"model_path\" must be confing" + exit 1 +fi + +#非平台场景时source 环境变量 +check_etp_flag=`env | grep etp_running_flag` +etp_flag=`echo ${check_etp_flag#*=}` +if [ x"${etp_flag}" != x"true" ];then + source ${test_path_dir}/env_npu.sh +fi + +#训练开始时间,不需要修改 +start_time=$(date +%s) + +#进入训练脚本目录,需要模型审视修改 +cd $cur_path + +if [ -d ${test_path_dir}/output/${ASCEND_DEVICE_ID} ];then + rm -rf ${test_path_dir}/output + mkdir -p ${test_path_dir}/output +else + mkdir -p ${test_path_dir}/output +fi + +ENGINE=vllm + +nohup python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$data_path/train.parquet \ + data.val_files=$data_path/test.parquet \ + data.train_batch_size=1024 \ + data.max_prompt_length=1024 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$model_path \ + actor_rollout_ref.actor.optim.lr=5e-8 \ + actor_rollout_ref.model.use_remove_padding=False \ + actor_rollout_ref.actor.ppo_mini_batch_size=32 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.use_torch_compile=False \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=$ENGINE \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \ + actor_rollout_ref.rollout.n=5 \ + actor_rollout_ref.rollout.enable_chunked_prefill=False \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger=['console'] \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen2_5_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=5 \ + trainer.total_epochs=1 > ${test_path_dir}/output/train_verl_qwen2_5_7b_instruct_grpo_perf.log 2>&1 & +wait + +#训练结束时间,不需要修改 +end_time=$(date +%s) +e2e_time=$(( $end_time - $start_time )) +#结果打印,不需要修改 +echo "------------------ Final result ------------------" +#输出性能FPS,需要模型审视修改 +FPS=`grep 'perf/throughput:' $test_path_dir/output/train_verl_qwen2_5_7b_instruct_grpo_perf.log | awk -F 'perf/throughput:' '{print$2}' | awk -F ' ' '{print$1}' | head -n 4 | awk '{sum+=$1} END {print"",sum/NR}'` + +#排除功能问题导致计算溢出的异常,增加健壮性 +if [ x"${FPS}" == x"2147483647" ] || [ x"${FPS}" == x"-2147483647" ];then + FPS="" +fi +#打印,不需要修改 +echo "Final Performance images/sec : $FPS" + +#打印,不需要修改 +echo "E2E Training Duration sec : $e2e_time" + +#性能看护结果汇总 +#训练用例信息,不需要修改 +DeviceType=`uname -m` +CaseName=${Network}_'16p'_'perf' + +##获取性能数据,不需要修改 +#吞吐量 +ActualFPS=${FPS} + +#关键信息打印到${CaseName}.log中,不需要修改 +echo "Network = ${Network}" > $test_path_dir/output/${CaseName}.log +echo "DeviceType = ${DeviceType}" >> $test_path_dir/output/${CaseName}.log +echo "CaseName = ${CaseName}" >> $test_path_dir/output/${CaseName}.log +echo "ActualFPS = ${ActualFPS}" >> $test_path_dir/output/${CaseName}.log +echo "E2ETrainingTime = ${e2e_time}" >> $test_path_dir/output/${CaseName}.log diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_full_8p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_full_8p.sh similarity index 98% rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_full_8p.sh rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_full_8p.sh index cc649111ec..401c4b0dda 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_full_8p.sh +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_full_8p.sh @@ -22,7 +22,7 @@ Network="Qwen2_5_vl_3b_for_PyTorch" # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then - echo"usage:./test/train_qwen2_5_vl_3b_full_8p.sh " + echo"usage:./test/train_qwen2_5_vl_3b_GRPO_full_8p.sh " echo " " echo "parameter explain: --data_path source data of training diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_performance_8p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh similarity index 98% rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_performance_8p.sh rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh index 14f5a83710..09db8e28ee 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_performance_8p.sh +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh @@ -22,7 +22,7 @@ Network="Qwen2_5_vl_3b_for_PyTorch" # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then - echo"usage:./test/train_qwen2_5_vl_3b_performance_8p.sh " + echo"usage:./test/train_qwen2_5_vl_3b_GRPO_performance_8p.sh " echo " " echo "parameter explain: --data_path source data of training diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_full_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_full_16p.sh similarity index 98% rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_full_16p.sh rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_full_16p.sh index 5d38458968..09cb666966 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_full_16p.sh +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_full_16p.sh @@ -22,7 +22,7 @@ Network="Qwen2_5_vl_7b_for_PyTorch" # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then - echo"usage:./test/train_qwen2_5_vl_7b_full_16p.sh " + echo"usage:./test/train_qwen2_5_vl_7b_GRPO_full_16p.sh " echo " " echo "parameter explain: --data_path source data of training diff --git a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_performance_16p.sh b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh similarity index 98% rename from PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_performance_16p.sh rename to PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh index 7622bdf1ed..2776939d73 100644 --- a/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_performance_16p.sh +++ b/PyTorch/built-in/rl/VeRL_for_PyTorch/test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh @@ -22,7 +22,7 @@ Network="Qwen2_5_vl_7b_for_PyTorch" # 帮助信息,不需要修改 if [[ $1 == --help || $1 == -h ]];then - echo"usage:./test/train_qwen2_5_vl_7b_performance_16p.sh " + echo"usage:./test/train_qwen2_5_vl_7b_GRPO_performance_16p.sh " echo " " echo "parameter explain: --data_path source data of training -- Gitee