From d27a6f4c3b3dde7e3f97cbe8da5d512cc6277b01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=91=E7=89=B9=E9=A9=B9?= Date: Mon, 9 Jun 2025 18:48:01 +0800 Subject: [PATCH] =?UTF-8?q?[built-in][PyTorch][OpenRLHF=5Fv0.6.2]=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9readme=E4=B8=ADPRM=E3=80=81RM=E3=80=81KTO?= =?UTF-8?q?=E5=BC=80=E5=A7=8B=E8=AE=AD=E7=BB=83=E7=9A=84=E5=91=BD=E4=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md | 12 ++++++------ .../test/train_prm_performance_8p.sh | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md index bf33a116ef..6dd1923d8b 100644 --- a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md +++ b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/README.osc.md @@ -219,9 +219,9 @@ bash test/train_grpo_performance_16p.sh --model_path=./models/xxx --dataset_path ```shell # 8卡训练 -bash test/train_kto_full_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences +bash test/train_kto_full_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences # 8卡性能 -bash test/train_kto_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences +bash test/train_kto_performance_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/ultrafeedback-unpaired-preferences ``` #### RM算法 @@ -230,9 +230,9 @@ bash test/train_kto_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixtu ```shell # 8卡训练 -bash test/train_rm_full_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku +bash test/train_rm_full_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku # 8卡性能 -bash test/train_rm_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku +bash test/train_rm_performance_8p.sh --pretrain_path=./models/Llama-3-8b-sft-mixture --dataset_path=./data/preference_dataset_mixture2_and_safe_pku ``` #### PRM算法 @@ -241,9 +241,9 @@ bash test/train_rm_performance_8p.sh --model_path=./models/Llama-3-8b-sft-mixtur ```shell # 8卡训练 -bash test/train_prm_full_8p.sh --model_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data +bash test/train_prm_full_8p.sh --pretrain_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data # 8卡性能 -bash test/train_prm_performance_8p.sh --model_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data +bash test/train_prm_performance_8p.sh --pretrain_path=./models/Mistral-7B-v0.1 --dataset_path=./data/Math-Shepherd/data ``` #### 训练结果展示 diff --git a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh index 6ffa7c1dd4..45ea8fa81a 100644 --- a/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh +++ b/PyTorch/built-in/rl/OpenRLHF_v0.6.2_for_PyTorch/test/train_prm_performance_8p.sh @@ -68,7 +68,7 @@ openrlhf.cli.train_prm \ --eval_steps 100 \ --train_batch_size 64 \ --micro_train_batch_size 8 \ - --max_samples 64000 \ + --max_samples 64000 \ --pretrain $pretrain_path \ --bf16 \ --max_epochs $max_epochs \ -- Gitee