From d27a6f4c3b3dde7e3f97cbe8da5d512cc6277b01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=91=E7=89=B9=E9=A9=B9?= <zhengteju@h-partners.com>
Date: Mon, 9 Jun 2025 18:48:01 +0800
Subject: [PATCH] =?UTF-8?q?[built-in][PyTorch][OpenRLHF=5Fv0.6.2]=20?=
 =?UTF-8?q?=E4=BF=AE=E6=94=B9readme=E4=B8=ADPRM=E3=80=81RM=E3=80=81KTO?=
 =?UTF-8?q?=E5=BC=80=E5=A7=8B=E8=AE=AD=E7=BB=83=E7=9A=84=E5=91=BD=E4=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md     | 12 ++++++------
 .../test/train_prm_performance_8p.sh                 |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md
index bf33a116ef..6dd1923d8b 100644
--- a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md
+++ b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md
@@ -219,9 +219,9 @@ bash test/train_grpo_performance_16p.sh --model_path=./models/xxx --dataset_path
 
 ```shell
 # 8卡训练
-bash test/train_kto_full_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences
+bash test/train_kto_full_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences
 # 8卡性能
-bash test/train_kto_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences
+bash test/train_kto_performance_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences
 ```
 
 #### RM算法
@@ -230,9 +230,9 @@ bash test/train_kto_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixtu
 
 ```shell
 # 8卡训练
-bash test/train_rm_full_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku
+bash test/train_rm_full_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku
 # 8卡性能
-bash test/train_rm_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku
+bash test/train_rm_performance_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku
 ```
 
 #### PRM算法
@@ -241,9 +241,9 @@ bash test/train_rm_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixtur
 
 ```shell
 # 8卡训练
-bash test/train_prm_full_8p.sh --model_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data
+bash test/train_prm_full_8p.sh --pretrain_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data
 # 8卡性能
-bash test/train_prm_performance_8p.sh --model_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data
+bash test/train_prm_performance_8p.sh --pretrain_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data
 ```
 
 #### 训练结果展示
diff --git a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh
index 6ffa7c1dd4..45ea8fa81a 100644
--- a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh
+++ b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh
@@ -68,7 +68,7 @@ openrlhf.cli.train_prm \
    --eval_steps 100 \
    --train_batch_size 64 \
    --micro_train_batch_size 8 \
-   --max_samples 64000 \   
+   --max_samples 64000 \
    --pretrain $pretrain_path \
    --bf16 \
    --max_epochs $max_epochs \
-- 
Gitee