diff --git a/README.md b/README.md
index 9063ad8f6ffdb1022647f3a645faa00011d5b5bc..87939d77ea49cafb8bee1a15d8a134a2569c2190 100644
--- a/README.md
+++ b/README.md
@@ -169,14 +169,14 @@ source auto_convert_rl.sh
此处提供以下强化模型训练拉起流程作为参考。
-- [**DeepSeek-R1-ZERO-Qwen2.5 7B**](./docs/)
+- [**Qwen2.5-7B GRPO**](./docs/GRPO.md)
若在环境中`PYTHONPATH`等环境变量失效(例如退出容器后再进入等),可执行如下命令重新设置环境变量
```shell
# 在MindSpeed-Core-MS目录下执行
MindSpeed_Core_MS_PATH=$(pwd)
-export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/MindSpeed/:${MindSpeed_Core_MS_PATH}/Megatron-LM/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/transformers/src/:$PYTHONPATH
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH
```
---
diff --git a/auto_convert_rl.sh b/auto_convert_rl.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b2a89bddd002f318b097ef369d9f33d909b53b81
--- /dev/null
+++ b/auto_convert_rl.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+
+#MindSpeed-LLM
+rm -rf MindSpeed-LLM/
+git clone https://gitee.com/ascend/MindSpeed-LLM.git -b master
+if [ $? -ne 0 ]; then
+ echo "Error: git clone MindSpeed-LLM"
+ exit 1
+fi
+cd MindSpeed-LLM
+git checkout 71c5af4d72078d826fd93fec6980004f0de51132
+rm -rf tests
+cd ..
+echo "------------------------------------done MindSpeed-LLM"
+
+#MindSpeed
+rm -rf MindSpeed/
+git clone https://gitee.com/ascend/MindSpeed.git -b core_r0.8.0
+if [ $? -ne 0 ]; then
+ echo "Error: git clone MindSpeed"
+ exit 1
+fi
+cd MindSpeed
+git checkout 31aaf3d4ca86234b15f4a5d3af20bd6df06e7d45
+rm -rf tests_extend
+cd ..
+echo "...............................................done MindSpeed"
+
+#MindSpeed-RL
+rm -rf MindSpeed-RL/
+git clone https://gitee.com/ascend/MindSpeed-RL.git
+if [ $? -ne 0 ]; then
+ echo "Error: git clone MindSpeed-RL"
+ exit 1
+fi
+cd MindSpeed-RL
+git checkout 559db0856891e5f8504a0b21d4b26969a82241df
+rm -rf tests
+cd ..
+echo "...............................................done MindSpeed-RL"
+
+#Megatron-LM
+rm -rf Megatron-LM/
+git clone https://gitee.com/mirrors/Megatron-LM.git
+if [ $? -ne 0 ]; then
+ echo "Error: git clone Megatron-LM"
+ exit 1
+fi
+cd Megatron-LM
+git checkout core_r0.8.0
+rm -rf tests
+cd ..
+echo "..............................................done Megatron-LM"
+
+#MSAdapter
+rm -rf MSAdapter
+git clone https://openi.pcl.ac.cn/OpenI/MSAdapter.git -b master
+if [ $? -ne 0 ]; then
+ echo "Error: git clone MSAdapter"
+ exit 1
+fi
+cd MSAdapter
+rm -rf tests
+cd ..
+echo "..............................................done MSAdapter"
+
+#vllm
+rm -rf vllm
+git clone https://gitee.com/mirrors/vllm.git
+cd vllm
+git checkout v0.7.3
+rm -rf tests
+if [ $? -ne 0 ]; then
+ echo "Error: git clone vllm"
+ exit 1
+fi
+cd ..
+echo "..............................................done vllm"
+
+
+#vllm-ascend
+rm -rf vllm-ascend
+git clone https://gitee.com/mirrors/vllm-ascend.git
+cd vllm-ascend
+git checkout 0713836e95fe993feefe334945b5b273e4add1f1
+rm -rf tests
+if [ $? -ne 0 ]; then
+ echo "Error: git clone vllm-ascend"
+ exit 1
+fi
+cd ..
+echo "..............................................done vllm-ascend"
+
+#transformers
+rm -rf transformers/
+git clone https://gitee.com/mirrors/huggingface_transformers.git -b v4.47.0
+if [ $? -ne 0 ]; then
+ echo "Error: git clone huggingface_transformers"
+ exit 1
+fi
+mv huggingface_transformers transformers
+cd transformers
+git apply ../tools/rules/transformers.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply transformers"
+
+#accelerate
+rm -rf accelerate/
+git clone https://github.com/huggingface/accelerate.git -b v1.6.0
+if [ $? -ne 0 ]; then
+ echo "Error: git clone accelerate"
+ exit 1
+fi
+cd accelerate
+git apply ../tools/rules/accelerate.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply accelerate"
+
+#safetensors
+rm -rf safetensors_dir
+mkdir safetensors_dir
+pip install --no-deps safetensors==0.5.1
+if [ $? -ne 0 ]; then
+ echo "Error: pip install safetensors fail"
+else
+ ST_PATH=$(python -c "import site; print(site.getsitepackages()[0])")
+ cp -r ${ST_PATH}/safetensors ./safetensors_dir
+ cd safetensors_dir/safetensors
+ git init
+ git apply ../../tools/rules/safetensors.diff
+ cd ../../
+ export PYTHONPATH=$(pwd)/safetensors_dir:$PYTHONPATH
+ echo "..............................................done apply safetensors"
+fi
+
+#huggingface_hub
+rm -rf huggingface_hub
+git clone https://github.com/huggingface/huggingface_hub.git -b v0.29.2
+if [ $? -ne 0 ]; then
+ echo "Error: git clone huggingface_hub"
+ exit 1
+fi
+cd huggingface_hub
+git apply ../tools/rules_rl/huggingface_hub.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply huggingface_hub"
+
+echo "..............................................start code_convert"
+MindSpeed_Core_MS_PATH=$PWD
+echo ${MindSpeed_Core_MS_PATH}
+
+python3 tools/transfer.py \
+--megatron_path ${MindSpeed_Core_MS_PATH}/Megatron-LM/megatron/ \
+--mindspeed_path ${MindSpeed_Core_MS_PATH}/MindSpeed/mindspeed/ \
+--mindspeed_llm_path ${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/ \
+--mindspeed_rl_path ${MindSpeed_Core_MS_PATH}/MindSpeed-RL/ \
+--vllm_path ${MindSpeed_Core_MS_PATH}/vllm/ \
+--vllm_ascend_path ${MindSpeed_Core_MS_PATH}/vllm-ascend/ \
+--is_rl
+
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH
+echo $PYTHONPATH
+echo "..............................................done code_convert"
+
diff --git a/docs/GRPO.md b/docs/GRPO.md
index e94d67a59c4b55cdfc8c2fb23aa81d5c9c9ae08a..189b02fbb4bc57643afff378a8cfb1af72b3b284 100644
--- a/docs/GRPO.md
+++ b/docs/GRPO.md
@@ -1,39 +1,114 @@
-# RL使用
+# 基于 MindSpore AI 框架的 GRPO-DeepSeek-R1-Qwen2.5-7B 使用指南
-## 转换脚本
+Group Relative Policy Optimization (GRPO) 是 Deepseek-Math 中提出的训练方法,它移除了 PPO 中对 Critic 模型的依赖,而是通过计算同一 prompt 多次重复采样输出的相对奖励来估计优势函数,这一创新大大减少了显存占用,提高了算法在强化学习任务中的效率。
-test_convert_rl.sh 脚本自动执行以下设置过程:
+在 GRPO 方法中包含了三个关键模型:Actor,Reference,Reward。其中 Actor 和 Reference 模型是通过 SFT 后得到的策略模型,而 Reward 模型则是通过规则奖励来评估。GRPO 的核心训练目标是优化 Actor 模型的策略,使其在执行强化学习任务时能够产生更优的动作序列,更符合任务目标的预期。
-1. 克隆所需仓库:
- - MindSpeed-LLM(commit id:421ef7bcb83fb31844a1efb688cde71705c0526e)
- - MindSpeed(commit id:0dfa0035ec54d9a74b2f6ee2867367df897299df)
- - MindSpeed-RL(分支:2.0.0)
- - Megatron-LM(分支:core_r0.8.0)
- - msadapter(分支:master)
- - vllm(分支:v0.7.3)
- - vllm-ascend(commit id:0713836e95fe993feefe334945b5b273e4add1f1)
- - transformers(分支:v4.47.0)
+本篇工作基于 MindSpore AI 架使用 Qwen2.5-7B 模型复现 GRPO-DeepSeek-R1 在 Math 领域的工作。
-2. 使用 transfer.py 工具运行代码转换
+## 依赖的三方库版本
-## 使用说明
+- MindSpeed-LLM(commit id:71c5af4d72078d826fd93fec6980004f0de51132)
+- MindSpeed(分支:core_r0.8.0, commit id:31aaf3d4ca86234b15f4a5d3af20bd6df06e7d45)
+- MindSpeed-RL(分支:master, commit id:559db0856891e5f8504a0b21d4b26969a82241df)
+- Megatron-LM(分支:core_r0.8.0)
+- MSAdapter(分支:master)
+- vllm(分支:v0.7.3)
+- vllm-ascend(commit id:0713836e95fe993feefe334945b5b273e4add1f1)
+- transformers(分支:v4.47.0)
+- accelerate(分支:v1.6.0)
+- safetensors(版本:0.5.1)
+- huggingface_hub(分支:v0.29.2)
-1. 克隆 MindSpeed-Core-MS 仓库:
+## 模型选择
- ```shell
- git clone -b feature-0.2 https://gitee.com/ascend/MindSpeed-Core-MS.git
- cd MindSpeed-Core-MS/
- ```
+ [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) 模型指令遵从度高,有一定概率能引导模型输出 `......$\boxed{}` 格式回复,训练曲线符合预期,在评测集上提升较大。
-2. 使转换脚本可执行并运行:
+## 数据预处理
- ```shell
- chmod +x test_convert_rl.sh
- ./test_convert_rl.sh
- ```
+以 DeepScaler 为例:
+
+数据集下载地址:[DeepScaler](https://huggingface.co/datasets/agentica-org/DeepScaleR-Preview-Dataset/tree/main)
+
+数据集下载可以基于网页直接下载,也可以基于命令行下载,比如:
+
+```shell
+# 读取deepscaler数据集
+mkdir dataset
+cd dataset/
+wget https://huggingface.co/datasets/agentica-org/DeepScaleR-Preview-Dataset/resolve/main/deepscaler.json --no-check
+cd ..
+```
+
+数据预处理的 yaml 配置文件放置于 `MindSpeed-RL/configs/datasets` 文件夹下,通过以下命令进行数据集预处理:
+
+```shell
+# 读取configs/datasets/deepscaler.yaml文件
+bash examples/data/preprocess_data.sh deepscaler
+```
+
+数据集处理配置可以根据需求自行配置,以下是数据集处理的 yaml 文件中基础参数的介绍:
+
+- `input`:数据集的路径,需指定具体文件,例如/datasets/deepscaler.json
+- `tokenizer_type`:指定分词器的类型,例如 HuggingFaceTokenizer 使用 Hugging Face 库提供的分词器来对文本进行分词处理;
+- `tokenizer_name_or_path`:指定分词器的名称或路径;
+- `output_prefix`:输出结果的前缀路径,例如 /datasets/data;
+- `workers`:设置处理数据时使用的 worker 数;
+- `prompt_type`: 用于指定对话模板,能够让 base 模型微调后能具备更好的对话能力,prompt-type 的可选项可以在 configs/model/templates.json 文件内查看;
+- `log_interval`:设置日志记录的间隔,每处理多少条数据时记录一次日志,用于监控数据处理的进度和状态;
+- `handler_name`:指定处理数据的处理器名称;
+- `seq_length`:设置数据预处理最大序列长度,超过了会过滤掉;
+
+## 权重转换
+
+根据 GRPO 算法要求,Actor 和 Reference 模型应该使用 SFT 微调后的模型进行初始化,Reward 模型应该使用规则奖励。GRPO 算法模型权重均使用 Megatron-mcore 格式,其他格式的权重需要进行模型权重转换。可参考[权重转换部分](https://gitee.com/ascend/MindSpeed-RL/blob/master/docs/algorithms/grpo.md)
+
+以 Qwen2.5-7B 模型的权重转换脚本为参考,权重转换步骤如下:
+
+### 获取权重文件
+
+hf 权重文件可从 Huggingface 网站获取,请根据模型的使用场景灵活选择,在此以 [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) 为例。
+
+### hf 转 mcore
+
+在训练前,需要将 Hugging Face 权重转换成Mcore格式,示例脚本启动命令和配置参数如下:
+
+```bash
+# 脚本中路径请按真实情况配置
+bash examples/ckpt/ckpt_convert_qwen25_hf2mcore.sh
+```
+
+> 注:这里会调用到 MindSpeed-LLM 仓,进行权重转换前需先确认环境变量已配备完毕。
+
+配置参数介绍
+
+- `use-mcore-models`:启用 MCore 模型;
+- `model-type`:指定模型类型,如 GPT;
+- `load-model-type`:指定加载模型的类型,如 hf(Hugging Face);
+- `save-model-type`:指定保存模型的类型,如 mg;
+- `target-tensor-parallel-size`:设置目标张量并行大小;
+- `target-pipeline-parallel-size`:设置目标流水线并行大小;
+- `add-qkv-bias`:是否进行 QKV 偏置;
+- `load-dir`:加载 Hugging Face 权重的路径;
+- `save-dir`:保存转换后权重的路径;
+- `tokenizer-model`:分词器模型文件的路径;
+- `model-type-hf`:指定 Hugging Face 模型类型,如 llama2;
+- `params-dtype`:指定参数的数据类型,如 bf16。
+
+### mcore 转 hf
+
+训练结束后,如果需要将生成的mcore格式权重转换回 Hugging Face 格式,可以参照以下示例脚本命令及脚本参数:
-3. 数据集权重准备
- - 参考MindSpeed-RL仓库下[grpo.md](https://gitee.com/ascend/MindSpeed-RL/blob/master/docs/algorithms/grpo.md)
+```shell
+# 脚本中路径请按真实情况配置
+bash examples/ckpt/ckpt_convert_qwen25_mcore2hf.sh
+```
+
+配置参数与上文一致,但需注意以下事项:
+
+- 权重转换转回 Hugging Face 格式时,tp 和 pp 配置需配置为1;
+- load-model-type 参数配置为 mg,save-model-type 参数配置为 hf ;
+- save-dir 路径需要填入原始 HF 模型路径,新权重会存于 HF 原始权重文件下的 mg2hg 目录下,如/qwen2.5_7b_hf/mg2hg/
## 脚本启动
@@ -41,21 +116,10 @@ test_convert_rl.sh 脚本自动执行以下设置过程:
```bash
source /usr/local/Ascend/nnal/atb/set_env.sh --cxx_abi=0
- source /usr/local/Ascend/ascend-toolkit/latest/env/ascend_env.sh
- ```
-
-2. 设置环境变量
-
- ```bash
- MindSpeed_Core_MS_PATH=$(pwd)
- Ascend_PATH=/usr/loacl/Ascend/ascend-toolkit/latest/
- export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter/mindtorch:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/MindSpeed/: \
- ${MindSpeed_Core_MS_PATH}/Megatron-LM/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/: \
- ${MindSpeed_Core_MS_PATH}/transformers/src/: \
- ${Ascend_PATH}/python/site-packages:${Ascend_PATH}/opp/built-in/op_impl/ai_core/tbe:
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh
```
-3. 在**MindSpeed-RL/configs/XXX.yaml**中,将tokenizer_name_or_path、data_path、load字段内容修改为刚刚准备的分词目录、数据集目录和权重目录,如下:
+2. 在**MindSpeed-RL/configs/*_qwen25_7b.yaml**中,将`tokenizer_name_or_path`、`data_path`、`load`字段内容修改为刚刚准备的分词目录、数据集目录和权重目录,如下:
```shell
# e.g:
@@ -64,18 +128,53 @@ test_convert_rl.sh 脚本自动执行以下设置过程:
load: ./ckpt
```
-## 启动命令
+ > 注:yaml 中需添加参数`megatron_training.ai_framework:mindspore`
-1. qwen25-7b-r1-zero
+ 卡数配置参数介绍
- ```shell
- cd MindSpeed-RL
- python cli/train_grpo.py --config-name grpo_trainer_qwen25_7b | tee logs/grpo_trainer_qwen25_7b.log
- ```
+ - `rl_config.actor_resource.num_npus`:actor 需使用的卡数,如 4;
+ - `rl_config.reference_resource.num_npus`:reference 需使用的卡数,如 2;
+ - `rl_config.reward_resource.num_npus`:reward 需使用的卡数,如 2。
-2. deepseek v3-r1-zero
+3. 任务拉起
- ```shell
- cd MindSpeed-RL
- python cli/train_grpo.py --config-name r1_zero_deepseekv3_671b | tee logs/r1_zero_deepseekv3_671b.log
- ```
\ No newline at end of file
+ 需保证任务拉起时可用的 npu 数量满足`2.`中yaml配置所需的卡数要求
+
+- 若`2.`中 yaml 配置的为**单机**参数,可执行如下命令拉起任务:
+
+ ```shell
+ cd MindSpeed-RL
+ bash examples/grpo/grpo_trainer_qwen25_7b.sh
+ ```
+
+ > 注:脚本中`--config-name`应修改为`2.`中设置的 yaml 文件名称, 如`grpo_trainer_qwen25_7b`
+
+- 若`2.`中 yaml 配置的为**多机**参数,可执行如下命令拉起任务:
+
+ **主节点执行:**
+
+ ```shell
+ bash examples/r1/qwen25/r1_zero_qwen25_7b_master.sh
+ ```
+
+ > 注:脚本中`DEFAULT_YAML`应修改为`2.`中设置的 yaml 名称,根据实际机器设置`NNODES`和`NPUS_PER_NODE`
+
+ 配置参数介绍
+
+ - `DEFAULT_YAML`:指定参数配置的 yaml 名称,如 r1_zero_qwen25_7b.yaml;
+ - `NNODES`:共使用多少节点训练,如 2;
+ - `NPUS_PER_NODE`:每个节点有多少张卡,如 8。
+
+ **从节点执行:**
+
+ ```shell
+ bash examples/r1/qwen25/r1_zero_qwen25_7b_worker.sh
+ ```
+
+ > 注:脚本中`NNODES`和`NPUS_PER_NODE`应于主节点配置一致,`MASTER_ADDR`应为主节点`ip`
+
+ 配置参数介绍
+
+ - `NNODES`:共使用多少节点训练,如 2;
+ - `NPUS_PER_NODE`:每个节点有多少张卡,如 8;
+ - `MASTER_ADDR`:主节点 IP 地址。
diff --git a/scripts/set_path_rl.sh b/scripts/set_path_rl.sh
index 15d6535cc732cf490eb7a989efe79ed010f84425..997fdfd52cce6d43ca636eb62d8512908b63cbb0 100644
--- a/scripts/set_path_rl.sh
+++ b/scripts/set_path_rl.sh
@@ -8,7 +8,7 @@ script_path=$(realpath "${BASH_SOURCE[0]}")
script_dir=$(dirname "$script_path")
MindSpeed_Core_MS_PATH=$(dirname $script_dir)
export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/Megatron-LM/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed-RL/:$PYTHONPATH
-export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/msadapter/mindtorch/:${MindSpeed_Core_MS_PATH}/RL/transformers/src:${MindSpeed_Core_MS_PATH}/RL/vllm/:${MindSpeed_Core_MS_PATH}/RL/vllm-ascend/:$PYTHONPATH
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/msadapter/mindtorch/:${MindSpeed_Core_MS_PATH}/RL/transformers/src:${MindSpeed_Core_MS_PATH}/RL/vllm/:${MindSpeed_Core_MS_PATH}/RL/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:$PYTHONPATH
export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/opp/built-in/op_impl/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$PYTHONPATH
echo "..............................................done set PYTHONPATH"
echo $PYTHONPATH
\ No newline at end of file
diff --git a/test_convert_rl.sh b/test_convert_rl.sh
index db53529260b0d4a4bd9ad9b1b8d4f9f6bba4e408..78f6a43401bb6403bc6cc438b70021d787ea29e9 100644
--- a/test_convert_rl.sh
+++ b/test_convert_rl.sh
@@ -105,6 +105,49 @@ rm -rf tests
cd ..
echo "..............................................done apply transformers"
+#accelerate
+rm -rf accelerate/
+git clone https://github.com/huggingface/accelerate.git -b v1.6.0
+if [ $? -ne 0 ]; then
+ echo "Error: git clone accelerate"
+ exit 1
+fi
+cd accelerate
+git apply ../tools/rules/accelerate.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply accelerate"
+
+#safetensors
+rm -rf safetensors_dir
+mkdir safetensors_dir
+pip install --no-deps safetensors==0.5.1
+if [ $? -ne 0 ]; then
+ echo "Error: pip install safetensors fail"
+else
+ ST_PATH=$(python -c "import site; print(site.getsitepackages()[0])")
+ cp -r ${ST_PATH}/safetensors ./safetensors_dir
+ cd safetensors_dir/safetensors
+ git init
+ git apply ../../tools/rules/safetensors.diff
+ cd ../../
+ export PYTHONPATH=$(pwd)/safetensors_dir:$PYTHONPATH
+ echo "..............................................done apply safetensors"
+fi
+
+#huggingface_hub
+rm -rf huggingface_hub
+git clone https://github.com/huggingface/huggingface_hub.git -b v0.29.2
+if [ $? -ne 0 ]; then
+ echo "Error: git clone huggingface_hub"
+ exit 1
+fi
+cd huggingface_hub
+git apply ../tools/rules_rl/huggingface_hub.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply huggingface_hub"
+
echo "..............................................start code_convert"
MindSpeed_Core_MS_PATH=$PWD
echo ${MindSpeed_Core_MS_PATH}
@@ -118,5 +161,7 @@ python3 tools/transfer.py \
--vllm_ascend_path ${MindSpeed_Core_MS_PATH}/vllm-ascend/ \
--is_rl
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH
+echo $PYTHONPATH
echo "..............................................done code_convert"
diff --git a/tools/rules_rl/huggingface_hub.diff b/tools/rules_rl/huggingface_hub.diff
new file mode 100644
index 0000000000000000000000000000000000000000..65b15d672566f6de2e530450817f7f56883a553f
--- /dev/null
+++ b/tools/rules_rl/huggingface_hub.diff
@@ -0,0 +1,22 @@
+diff --git a/src/huggingface_hub/serialization/_torch.py b/src/huggingface_hub/serialization/_torch.py
+index ccb9c42b..3afd0613 100644
+--- a/src/huggingface_hub/serialization/_torch.py
++++ b/src/huggingface_hub/serialization/_torch.py
+@@ -765,7 +765,7 @@ def get_torch_storage_size(tensor: "torch.Tensor") -> int:
+ pass
+
+ try:
+- return tensor.untyped_storage().nbytes()
++ return tensor.nbytes
+ except AttributeError:
+ # Fallback for torch==1.10
+ try:
+@@ -816,7 +816,7 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
+ except Exception:
+ # Fallback for torch==1.10
+ try:
+- return tensor.storage().data_ptr()
++ return tensor
+ except NotImplementedError:
+ # Fallback for meta storage
+ return 0
diff --git a/tools/rules_rl/line_rules.py b/tools/rules_rl/line_rules.py
index 2c8a4ea642c35f98bc802c913e00aa56076d5ff1..ad7665e8296e62383a2b53023821726387b89150 100644
--- a/tools/rules_rl/line_rules.py
+++ b/tools/rules_rl/line_rules.py
@@ -44,14 +44,22 @@ LINE_RULES = {
def func(self, **kwargs):
- return _get_dst_obj(self, value, **kwargs).weight.data.copy_(kwargs.get('data'))
+ set_tensor = _get_dst_obj(self, value, **kwargs)
-+ set_tensor.weight.data = kwargs.get('data')
++ data = kwargs.get('data')
++ if data.dtype != set_tensor.weight.dtype:
++ data = data.to(dtype = set_tensor.weight.dtype)
++ set_tensor.weight.data = data
+
+
+ return set_tensor.weight.data
return func""",
""" def _func_generator_set_bias(value):
def func(self, **kwargs):
- return _get_dst_obj(self, value, **kwargs).bias.data.copy_(kwargs.get('data'))
+ set_tensor = _get_dst_obj(self, value, **kwargs)
-+ set_tensor.bias.data = kwargs.get('data')
++ data = kwargs.get('data')
++ if data.dtype != set_tensor.weight.dtype:
++ data = data.to(dtype = set_tensor.weight.dtype)
++ set_tensor.bias.data = data
+ return set_tensor.bias.data
return func""",
""" self.module = [AutoModelForCausalLM.from_pretrained(
@@ -74,6 +82,15 @@ LINE_RULES = {
},
"mindspeed-rl": {
+ "cli/convert_ckpt.py": [
+"""if __name__ == '__main__':
++ import mindspore as ms
++ ms.set_context(device_target = "CPU")
++ import torch
++ torch.configs.set_pyboost(False)""",
+"""+ parser.add_argument('--save_lora_to_hf', action="store_true", default=False)
+ known_args, _ = parser.parse_known_args()"""
+ ],
"mindspeed_rl/config_cls/megatron_config.py": [
""" self.swap_attention = False
+ self.ai_framework = \"pytorch\"""",