diff --git a/README.md b/README.md index 9063ad8f6ffdb1022647f3a645faa00011d5b5bc..87939d77ea49cafb8bee1a15d8a134a2569c2190 100644 --- a/README.md +++ b/README.md @@ -169,14 +169,14 @@ source auto_convert_rl.sh 此处提供以下强化模型训练拉起流程作为参考。 -- [**DeepSeek-R1-ZERO-Qwen2.5 7B**](./docs/) +- [**Qwen2.5-7B GRPO**](./docs/GRPO.md) 若在环境中`PYTHONPATH`等环境变量失效(例如退出容器后再进入等),可执行如下命令重新设置环境变量 ```shell # 在MindSpeed-Core-MS目录下执行 MindSpeed_Core_MS_PATH=$(pwd) -export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/MindSpeed/:${MindSpeed_Core_MS_PATH}/Megatron-LM/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/transformers/src/:$PYTHONPATH +export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH ``` --- diff --git a/auto_convert_rl.sh b/auto_convert_rl.sh new file mode 100644 index 0000000000000000000000000000000000000000..b2a89bddd002f318b097ef369d9f33d909b53b81 --- /dev/null +++ b/auto_convert_rl.sh @@ -0,0 +1,167 @@ +#!/bin/bash + +#MindSpeed-LLM +rm -rf MindSpeed-LLM/ +git clone https://gitee.com/ascend/MindSpeed-LLM.git -b master +if [ $? -ne 0 ]; then + echo "Error: git clone MindSpeed-LLM" + exit 1 +fi +cd MindSpeed-LLM +git checkout 71c5af4d72078d826fd93fec6980004f0de51132 +rm -rf tests +cd .. +echo "------------------------------------done MindSpeed-LLM" + +#MindSpeed +rm -rf MindSpeed/ +git clone https://gitee.com/ascend/MindSpeed.git -b core_r0.8.0 +if [ $? -ne 0 ]; then + echo "Error: git clone MindSpeed" + exit 1 +fi +cd MindSpeed +git checkout 31aaf3d4ca86234b15f4a5d3af20bd6df06e7d45 +rm -rf tests_extend +cd .. +echo "...............................................done MindSpeed" + +#MindSpeed-RL +rm -rf MindSpeed-RL/ +git clone https://gitee.com/ascend/MindSpeed-RL.git +if [ $? -ne 0 ]; then + echo "Error: git clone MindSpeed-RL" + exit 1 +fi +cd MindSpeed-RL +git checkout 559db0856891e5f8504a0b21d4b26969a82241df +rm -rf tests +cd .. +echo "...............................................done MindSpeed-RL" + +#Megatron-LM +rm -rf Megatron-LM/ +git clone https://gitee.com/mirrors/Megatron-LM.git +if [ $? -ne 0 ]; then + echo "Error: git clone Megatron-LM" + exit 1 +fi +cd Megatron-LM +git checkout core_r0.8.0 +rm -rf tests +cd .. +echo "..............................................done Megatron-LM" + +#MSAdapter +rm -rf MSAdapter +git clone https://openi.pcl.ac.cn/OpenI/MSAdapter.git -b master +if [ $? -ne 0 ]; then + echo "Error: git clone MSAdapter" + exit 1 +fi +cd MSAdapter +rm -rf tests +cd .. +echo "..............................................done MSAdapter" + +#vllm +rm -rf vllm +git clone https://gitee.com/mirrors/vllm.git +cd vllm +git checkout v0.7.3 +rm -rf tests +if [ $? -ne 0 ]; then + echo "Error: git clone vllm" + exit 1 +fi +cd .. +echo "..............................................done vllm" + + +#vllm-ascend +rm -rf vllm-ascend +git clone https://gitee.com/mirrors/vllm-ascend.git +cd vllm-ascend +git checkout 0713836e95fe993feefe334945b5b273e4add1f1 +rm -rf tests +if [ $? -ne 0 ]; then + echo "Error: git clone vllm-ascend" + exit 1 +fi +cd .. +echo "..............................................done vllm-ascend" + +#transformers +rm -rf transformers/ +git clone https://gitee.com/mirrors/huggingface_transformers.git -b v4.47.0 +if [ $? -ne 0 ]; then + echo "Error: git clone huggingface_transformers" + exit 1 +fi +mv huggingface_transformers transformers +cd transformers +git apply ../tools/rules/transformers.diff +rm -rf tests +cd .. +echo "..............................................done apply transformers" + +#accelerate +rm -rf accelerate/ +git clone https://github.com/huggingface/accelerate.git -b v1.6.0 +if [ $? -ne 0 ]; then + echo "Error: git clone accelerate" + exit 1 +fi +cd accelerate +git apply ../tools/rules/accelerate.diff +rm -rf tests +cd .. +echo "..............................................done apply accelerate" + +#safetensors +rm -rf safetensors_dir +mkdir safetensors_dir +pip install --no-deps safetensors==0.5.1 +if [ $? -ne 0 ]; then + echo "Error: pip install safetensors fail" +else + ST_PATH=$(python -c "import site; print(site.getsitepackages()[0])") + cp -r ${ST_PATH}/safetensors ./safetensors_dir + cd safetensors_dir/safetensors + git init + git apply ../../tools/rules/safetensors.diff + cd ../../ + export PYTHONPATH=$(pwd)/safetensors_dir:$PYTHONPATH + echo "..............................................done apply safetensors" +fi + +#huggingface_hub +rm -rf huggingface_hub +git clone https://github.com/huggingface/huggingface_hub.git -b v0.29.2 +if [ $? -ne 0 ]; then + echo "Error: git clone huggingface_hub" + exit 1 +fi +cd huggingface_hub +git apply ../tools/rules_rl/huggingface_hub.diff +rm -rf tests +cd .. +echo "..............................................done apply huggingface_hub" + +echo "..............................................start code_convert" +MindSpeed_Core_MS_PATH=$PWD +echo ${MindSpeed_Core_MS_PATH} + +python3 tools/transfer.py \ +--megatron_path ${MindSpeed_Core_MS_PATH}/Megatron-LM/megatron/ \ +--mindspeed_path ${MindSpeed_Core_MS_PATH}/MindSpeed/mindspeed/ \ +--mindspeed_llm_path ${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/ \ +--mindspeed_rl_path ${MindSpeed_Core_MS_PATH}/MindSpeed-RL/ \ +--vllm_path ${MindSpeed_Core_MS_PATH}/vllm/ \ +--vllm_ascend_path ${MindSpeed_Core_MS_PATH}/vllm-ascend/ \ +--is_rl + +export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH +echo $PYTHONPATH +echo "..............................................done code_convert" + diff --git a/docs/GRPO.md b/docs/GRPO.md index e94d67a59c4b55cdfc8c2fb23aa81d5c9c9ae08a..189b02fbb4bc57643afff378a8cfb1af72b3b284 100644 --- a/docs/GRPO.md +++ b/docs/GRPO.md @@ -1,39 +1,114 @@ -# RL使用 +# 基于 MindSpore AI 框架的 GRPO-DeepSeek-R1-Qwen2.5-7B 使用指南 -## 转换脚本 +Group Relative Policy Optimization (GRPO) 是 Deepseek-Math 中提出的训练方法,它移除了 PPO 中对 Critic 模型的依赖,而是通过计算同一 prompt 多次重复采样输出的相对奖励来估计优势函数,这一创新大大减少了显存占用,提高了算法在强化学习任务中的效率。 -test_convert_rl.sh 脚本自动执行以下设置过程: +在 GRPO 方法中包含了三个关键模型:Actor,Reference,Reward。其中 Actor 和 Reference 模型是通过 SFT 后得到的策略模型,而 Reward 模型则是通过规则奖励来评估。GRPO 的核心训练目标是优化 Actor 模型的策略,使其在执行强化学习任务时能够产生更优的动作序列,更符合任务目标的预期。 -1. 克隆所需仓库: - - MindSpeed-LLM(commit id:421ef7bcb83fb31844a1efb688cde71705c0526e) - - MindSpeed(commit id:0dfa0035ec54d9a74b2f6ee2867367df897299df) - - MindSpeed-RL(分支:2.0.0) - - Megatron-LM(分支:core_r0.8.0) - - msadapter(分支:master) - - vllm(分支:v0.7.3) - - vllm-ascend(commit id:0713836e95fe993feefe334945b5b273e4add1f1) - - transformers(分支:v4.47.0) +本篇工作基于 MindSpore AI 架使用 Qwen2.5-7B 模型复现 GRPO-DeepSeek-R1 在 Math 领域的工作。 -2. 使用 transfer.py 工具运行代码转换 +## 依赖的三方库版本 -## 使用说明 +- MindSpeed-LLM(commit id:71c5af4d72078d826fd93fec6980004f0de51132) +- MindSpeed(分支:core_r0.8.0, commit id:31aaf3d4ca86234b15f4a5d3af20bd6df06e7d45) +- MindSpeed-RL(分支:master, commit id:559db0856891e5f8504a0b21d4b26969a82241df) +- Megatron-LM(分支:core_r0.8.0) +- MSAdapter(分支:master) +- vllm(分支:v0.7.3) +- vllm-ascend(commit id:0713836e95fe993feefe334945b5b273e4add1f1) +- transformers(分支:v4.47.0) +- accelerate(分支:v1.6.0) +- safetensors(版本:0.5.1) +- huggingface_hub(分支:v0.29.2) -1. 克隆 MindSpeed-Core-MS 仓库: +## 模型选择 - ```shell - git clone -b feature-0.2 https://gitee.com/ascend/MindSpeed-Core-MS.git - cd MindSpeed-Core-MS/ - ``` + [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) 模型指令遵从度高,有一定概率能引导模型输出 `......$\boxed{}` 格式回复,训练曲线符合预期,在评测集上提升较大。 -2. 使转换脚本可执行并运行: +## 数据预处理 - ```shell - chmod +x test_convert_rl.sh - ./test_convert_rl.sh - ``` +以 DeepScaler 为例: + +数据集下载地址:[DeepScaler](https://huggingface.co/datasets/agentica-org/DeepScaleR-Preview-Dataset/tree/main) + +数据集下载可以基于网页直接下载,也可以基于命令行下载,比如: + +```shell +# 读取deepscaler数据集 +mkdir dataset +cd dataset/ +wget https://huggingface.co/datasets/agentica-org/DeepScaleR-Preview-Dataset/resolve/main/deepscaler.json --no-check +cd .. +``` + +数据预处理的 yaml 配置文件放置于 `MindSpeed-RL/configs/datasets` 文件夹下,通过以下命令进行数据集预处理: + +```shell +# 读取configs/datasets/deepscaler.yaml文件 +bash examples/data/preprocess_data.sh deepscaler +``` + +数据集处理配置可以根据需求自行配置,以下是数据集处理的 yaml 文件中基础参数的介绍: + +- `input`:数据集的路径,需指定具体文件,例如/datasets/deepscaler.json +- `tokenizer_type`:指定分词器的类型,例如 HuggingFaceTokenizer 使用 Hugging Face 库提供的分词器来对文本进行分词处理; +- `tokenizer_name_or_path`:指定分词器的名称或路径; +- `output_prefix`:输出结果的前缀路径,例如 /datasets/data; +- `workers`:设置处理数据时使用的 worker 数; +- `prompt_type`: 用于指定对话模板,能够让 base 模型微调后能具备更好的对话能力,prompt-type 的可选项可以在 configs/model/templates.json 文件内查看; +- `log_interval`:设置日志记录的间隔,每处理多少条数据时记录一次日志,用于监控数据处理的进度和状态; +- `handler_name`:指定处理数据的处理器名称; +- `seq_length`:设置数据预处理最大序列长度,超过了会过滤掉; + +## 权重转换 + +根据 GRPO 算法要求,Actor 和 Reference 模型应该使用 SFT 微调后的模型进行初始化,Reward 模型应该使用规则奖励。GRPO 算法模型权重均使用 Megatron-mcore 格式,其他格式的权重需要进行模型权重转换。可参考[权重转换部分](https://gitee.com/ascend/MindSpeed-RL/blob/master/docs/algorithms/grpo.md) + +以 Qwen2.5-7B 模型的权重转换脚本为参考,权重转换步骤如下: + +### 获取权重文件 + +hf 权重文件可从 Huggingface 网站获取,请根据模型的使用场景灵活选择,在此以 [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) 为例。 + +### hf 转 mcore + +在训练前,需要将 Hugging Face 权重转换成Mcore格式,示例脚本启动命令和配置参数如下: + +```bash +# 脚本中路径请按真实情况配置 +bash examples/ckpt/ckpt_convert_qwen25_hf2mcore.sh +``` + +> 注:这里会调用到 MindSpeed-LLM 仓,进行权重转换前需先确认环境变量已配备完毕。 + +配置参数介绍 + +- `use-mcore-models`:启用 MCore 模型; +- `model-type`:指定模型类型,如 GPT; +- `load-model-type`:指定加载模型的类型,如 hf(Hugging Face); +- `save-model-type`:指定保存模型的类型,如 mg; +- `target-tensor-parallel-size`:设置目标张量并行大小; +- `target-pipeline-parallel-size`:设置目标流水线并行大小; +- `add-qkv-bias`:是否进行 QKV 偏置; +- `load-dir`:加载 Hugging Face 权重的路径; +- `save-dir`:保存转换后权重的路径; +- `tokenizer-model`:分词器模型文件的路径; +- `model-type-hf`:指定 Hugging Face 模型类型,如 llama2; +- `params-dtype`:指定参数的数据类型,如 bf16。 + +### mcore 转 hf + +训练结束后,如果需要将生成的mcore格式权重转换回 Hugging Face 格式,可以参照以下示例脚本命令及脚本参数: -3. 数据集权重准备 - - 参考MindSpeed-RL仓库下[grpo.md](https://gitee.com/ascend/MindSpeed-RL/blob/master/docs/algorithms/grpo.md) +```shell +# 脚本中路径请按真实情况配置 +bash examples/ckpt/ckpt_convert_qwen25_mcore2hf.sh +``` + +配置参数与上文一致,但需注意以下事项: + +- 权重转换转回 Hugging Face 格式时,tp 和 pp 配置需配置为1; +- load-model-type 参数配置为 mg,save-model-type 参数配置为 hf ; +- save-dir 路径需要填入原始 HF 模型路径,新权重会存于 HF 原始权重文件下的 mg2hg 目录下,如/qwen2.5_7b_hf/mg2hg/ ## 脚本启动 @@ -41,21 +116,10 @@ test_convert_rl.sh 脚本自动执行以下设置过程: ```bash source /usr/local/Ascend/nnal/atb/set_env.sh --cxx_abi=0 - source /usr/local/Ascend/ascend-toolkit/latest/env/ascend_env.sh - ``` - -2. 设置环境变量 - - ```bash - MindSpeed_Core_MS_PATH=$(pwd) - Ascend_PATH=/usr/loacl/Ascend/ascend-toolkit/latest/ - export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter/mindtorch:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/MindSpeed/: \ - ${MindSpeed_Core_MS_PATH}/Megatron-LM/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/: \ - ${MindSpeed_Core_MS_PATH}/transformers/src/: \ - ${Ascend_PATH}/python/site-packages:${Ascend_PATH}/opp/built-in/op_impl/ai_core/tbe: + source /usr/local/Ascend/ascend-toolkit/set_env.sh ``` -3. 在**MindSpeed-RL/configs/XXX.yaml**中,将tokenizer_name_or_path、data_path、load字段内容修改为刚刚准备的分词目录、数据集目录和权重目录,如下: +2. 在**MindSpeed-RL/configs/*_qwen25_7b.yaml**中,将`tokenizer_name_or_path`、`data_path`、`load`字段内容修改为刚刚准备的分词目录、数据集目录和权重目录,如下: ```shell # e.g: @@ -64,18 +128,53 @@ test_convert_rl.sh 脚本自动执行以下设置过程: load: ./ckpt ``` -## 启动命令 + > 注:yaml 中需添加参数`megatron_training.ai_framework:mindspore` -1. qwen25-7b-r1-zero + 卡数配置参数介绍 - ```shell - cd MindSpeed-RL - python cli/train_grpo.py --config-name grpo_trainer_qwen25_7b | tee logs/grpo_trainer_qwen25_7b.log - ``` + - `rl_config.actor_resource.num_npus`:actor 需使用的卡数,如 4; + - `rl_config.reference_resource.num_npus`:reference 需使用的卡数,如 2; + - `rl_config.reward_resource.num_npus`:reward 需使用的卡数,如 2。 -2. deepseek v3-r1-zero +3. 任务拉起 - ```shell - cd MindSpeed-RL - python cli/train_grpo.py --config-name r1_zero_deepseekv3_671b | tee logs/r1_zero_deepseekv3_671b.log - ``` \ No newline at end of file + 需保证任务拉起时可用的 npu 数量满足`2.`中yaml配置所需的卡数要求 + +- 若`2.`中 yaml 配置的为**单机**参数,可执行如下命令拉起任务: + + ```shell + cd MindSpeed-RL + bash examples/grpo/grpo_trainer_qwen25_7b.sh + ``` + + > 注:脚本中`--config-name`应修改为`2.`中设置的 yaml 文件名称, 如`grpo_trainer_qwen25_7b` + +- 若`2.`中 yaml 配置的为**多机**参数,可执行如下命令拉起任务: + + **主节点执行:** + + ```shell + bash examples/r1/qwen25/r1_zero_qwen25_7b_master.sh + ``` + + > 注:脚本中`DEFAULT_YAML`应修改为`2.`中设置的 yaml 名称,根据实际机器设置`NNODES`和`NPUS_PER_NODE` + + 配置参数介绍 + + - `DEFAULT_YAML`:指定参数配置的 yaml 名称,如 r1_zero_qwen25_7b.yaml; + - `NNODES`:共使用多少节点训练,如 2; + - `NPUS_PER_NODE`:每个节点有多少张卡,如 8。 + + **从节点执行:** + + ```shell + bash examples/r1/qwen25/r1_zero_qwen25_7b_worker.sh + ``` + + > 注:脚本中`NNODES`和`NPUS_PER_NODE`应于主节点配置一致,`MASTER_ADDR`应为主节点`ip` + + 配置参数介绍 + + - `NNODES`:共使用多少节点训练,如 2; + - `NPUS_PER_NODE`:每个节点有多少张卡,如 8; + - `MASTER_ADDR`:主节点 IP 地址。 diff --git a/scripts/set_path_rl.sh b/scripts/set_path_rl.sh index 15d6535cc732cf490eb7a989efe79ed010f84425..997fdfd52cce6d43ca636eb62d8512908b63cbb0 100644 --- a/scripts/set_path_rl.sh +++ b/scripts/set_path_rl.sh @@ -8,7 +8,7 @@ script_path=$(realpath "${BASH_SOURCE[0]}") script_dir=$(dirname "$script_path") MindSpeed_Core_MS_PATH=$(dirname $script_dir) export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/Megatron-LM/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed-RL/:$PYTHONPATH -export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/msadapter/mindtorch/:${MindSpeed_Core_MS_PATH}/RL/transformers/src:${MindSpeed_Core_MS_PATH}/RL/vllm/:${MindSpeed_Core_MS_PATH}/RL/vllm-ascend/:$PYTHONPATH +export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/msadapter/mindtorch/:${MindSpeed_Core_MS_PATH}/RL/transformers/src:${MindSpeed_Core_MS_PATH}/RL/vllm/:${MindSpeed_Core_MS_PATH}/RL/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:$PYTHONPATH export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/opp/built-in/op_impl/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$PYTHONPATH echo "..............................................done set PYTHONPATH" echo $PYTHONPATH \ No newline at end of file diff --git a/test_convert_rl.sh b/test_convert_rl.sh index db53529260b0d4a4bd9ad9b1b8d4f9f6bba4e408..78f6a43401bb6403bc6cc438b70021d787ea29e9 100644 --- a/test_convert_rl.sh +++ b/test_convert_rl.sh @@ -105,6 +105,49 @@ rm -rf tests cd .. echo "..............................................done apply transformers" +#accelerate +rm -rf accelerate/ +git clone https://github.com/huggingface/accelerate.git -b v1.6.0 +if [ $? -ne 0 ]; then + echo "Error: git clone accelerate" + exit 1 +fi +cd accelerate +git apply ../tools/rules/accelerate.diff +rm -rf tests +cd .. +echo "..............................................done apply accelerate" + +#safetensors +rm -rf safetensors_dir +mkdir safetensors_dir +pip install --no-deps safetensors==0.5.1 +if [ $? -ne 0 ]; then + echo "Error: pip install safetensors fail" +else + ST_PATH=$(python -c "import site; print(site.getsitepackages()[0])") + cp -r ${ST_PATH}/safetensors ./safetensors_dir + cd safetensors_dir/safetensors + git init + git apply ../../tools/rules/safetensors.diff + cd ../../ + export PYTHONPATH=$(pwd)/safetensors_dir:$PYTHONPATH + echo "..............................................done apply safetensors" +fi + +#huggingface_hub +rm -rf huggingface_hub +git clone https://github.com/huggingface/huggingface_hub.git -b v0.29.2 +if [ $? -ne 0 ]; then + echo "Error: git clone huggingface_hub" + exit 1 +fi +cd huggingface_hub +git apply ../tools/rules_rl/huggingface_hub.diff +rm -rf tests +cd .. +echo "..............................................done apply huggingface_hub" + echo "..............................................start code_convert" MindSpeed_Core_MS_PATH=$PWD echo ${MindSpeed_Core_MS_PATH} @@ -118,5 +161,7 @@ python3 tools/transfer.py \ --vllm_ascend_path ${MindSpeed_Core_MS_PATH}/vllm-ascend/ \ --is_rl +export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH +echo $PYTHONPATH echo "..............................................done code_convert" diff --git a/tools/rules_rl/huggingface_hub.diff b/tools/rules_rl/huggingface_hub.diff new file mode 100644 index 0000000000000000000000000000000000000000..65b15d672566f6de2e530450817f7f56883a553f --- /dev/null +++ b/tools/rules_rl/huggingface_hub.diff @@ -0,0 +1,22 @@ +diff --git a/src/huggingface_hub/serialization/_torch.py b/src/huggingface_hub/serialization/_torch.py +index ccb9c42b..3afd0613 100644 +--- a/src/huggingface_hub/serialization/_torch.py ++++ b/src/huggingface_hub/serialization/_torch.py +@@ -765,7 +765,7 @@ def get_torch_storage_size(tensor: "torch.Tensor") -> int: + pass + + try: +- return tensor.untyped_storage().nbytes() ++ return tensor.nbytes + except AttributeError: + # Fallback for torch==1.10 + try: +@@ -816,7 +816,7 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]: + except Exception: + # Fallback for torch==1.10 + try: +- return tensor.storage().data_ptr() ++ return tensor + except NotImplementedError: + # Fallback for meta storage + return 0 diff --git a/tools/rules_rl/line_rules.py b/tools/rules_rl/line_rules.py index 2c8a4ea642c35f98bc802c913e00aa56076d5ff1..ad7665e8296e62383a2b53023821726387b89150 100644 --- a/tools/rules_rl/line_rules.py +++ b/tools/rules_rl/line_rules.py @@ -44,14 +44,22 @@ LINE_RULES = { def func(self, **kwargs): - return _get_dst_obj(self, value, **kwargs).weight.data.copy_(kwargs.get('data')) + set_tensor = _get_dst_obj(self, value, **kwargs) -+ set_tensor.weight.data = kwargs.get('data') ++ data = kwargs.get('data') ++ if data.dtype != set_tensor.weight.dtype: ++ data = data.to(dtype = set_tensor.weight.dtype) ++ set_tensor.weight.data = data + + + return set_tensor.weight.data return func""", """ def _func_generator_set_bias(value): def func(self, **kwargs): - return _get_dst_obj(self, value, **kwargs).bias.data.copy_(kwargs.get('data')) + set_tensor = _get_dst_obj(self, value, **kwargs) -+ set_tensor.bias.data = kwargs.get('data') ++ data = kwargs.get('data') ++ if data.dtype != set_tensor.weight.dtype: ++ data = data.to(dtype = set_tensor.weight.dtype) ++ set_tensor.bias.data = data + return set_tensor.bias.data return func""", """ self.module = [AutoModelForCausalLM.from_pretrained( @@ -74,6 +82,15 @@ LINE_RULES = { }, "mindspeed-rl": { + "cli/convert_ckpt.py": [ +"""if __name__ == '__main__': ++ import mindspore as ms ++ ms.set_context(device_target = "CPU") ++ import torch ++ torch.configs.set_pyboost(False)""", +"""+ parser.add_argument('--save_lora_to_hf', action="store_true", default=False) + known_args, _ = parser.parse_known_args()""" + ], "mindspeed_rl/config_cls/megatron_config.py": [ """ self.swap_attention = False + self.ai_framework = \"pytorch\"""",