diff --git a/README.md b/README.md
index 9063ad8f6ffdb1022647f3a645faa00011d5b5bc..87939d77ea49cafb8bee1a15d8a134a2569c2190 100644
--- a/README.md
+++ b/README.md
@@ -169,14 +169,14 @@ source auto_convert_rl.sh
 
 此处提供以下强化模型训练拉起流程作为参考。
 
-- [**DeepSeek-R1-ZERO-Qwen2.5 7B**](./docs/)
+- [**Qwen2.5-7B GRPO**](./docs/GRPO.md)
 
 若在环境中`PYTHONPATH`等环境变量失效（例如退出容器后再进入等），可执行如下命令重新设置环境变量
 
 ```shell
 # 在MindSpeed-Core-MS目录下执行
 MindSpeed_Core_MS_PATH=$(pwd)
-export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/MindSpeed/:${MindSpeed_Core_MS_PATH}/Megatron-LM/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/transformers/src/:$PYTHONPATH
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH
 ```
 
 ---
diff --git a/auto_convert_rl.sh b/auto_convert_rl.sh
new file mode 100644
index 0000000000000000000000000000000000000000..b2a89bddd002f318b097ef369d9f33d909b53b81
--- /dev/null
+++ b/auto_convert_rl.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+
+#MindSpeed-LLM
+rm -rf MindSpeed-LLM/
+git clone https://gitee.com/ascend/MindSpeed-LLM.git -b master
+if [ $? -ne 0 ]; then
+    echo "Error: git clone MindSpeed-LLM"
+    exit 1
+fi
+cd MindSpeed-LLM
+git checkout 71c5af4d72078d826fd93fec6980004f0de51132
+rm -rf tests
+cd ..
+echo "------------------------------------done MindSpeed-LLM"
+
+#MindSpeed
+rm -rf MindSpeed/
+git clone https://gitee.com/ascend/MindSpeed.git -b core_r0.8.0
+if [ $? -ne 0 ]; then
+    echo "Error: git clone MindSpeed"
+    exit 1
+fi
+cd MindSpeed
+git checkout 31aaf3d4ca86234b15f4a5d3af20bd6df06e7d45
+rm -rf tests_extend
+cd ..
+echo "...............................................done MindSpeed"
+
+#MindSpeed-RL
+rm -rf MindSpeed-RL/
+git clone https://gitee.com/ascend/MindSpeed-RL.git
+if [ $? -ne 0 ]; then
+    echo "Error: git clone MindSpeed-RL"
+    exit 1
+fi
+cd MindSpeed-RL
+git checkout 559db0856891e5f8504a0b21d4b26969a82241df
+rm -rf tests
+cd ..
+echo "...............................................done MindSpeed-RL"
+
+#Megatron-LM
+rm -rf Megatron-LM/
+git clone https://gitee.com/mirrors/Megatron-LM.git
+if [ $? -ne 0 ]; then
+    echo "Error: git clone Megatron-LM"
+    exit 1
+fi
+cd Megatron-LM
+git checkout core_r0.8.0
+rm -rf tests
+cd ..
+echo "..............................................done Megatron-LM"
+
+#MSAdapter
+rm -rf MSAdapter
+git clone https://openi.pcl.ac.cn/OpenI/MSAdapter.git -b master
+if [ $? -ne 0 ]; then
+    echo "Error: git clone MSAdapter"
+    exit 1
+fi
+cd MSAdapter
+rm -rf tests
+cd ..
+echo "..............................................done MSAdapter"
+
+#vllm
+rm -rf vllm
+git clone https://gitee.com/mirrors/vllm.git
+cd vllm
+git checkout v0.7.3
+rm -rf tests
+if [ $? -ne 0 ]; then
+    echo "Error: git clone vllm"
+    exit 1
+fi
+cd ..
+echo "..............................................done vllm"
+
+
+#vllm-ascend
+rm -rf vllm-ascend
+git clone https://gitee.com/mirrors/vllm-ascend.git
+cd vllm-ascend
+git checkout 0713836e95fe993feefe334945b5b273e4add1f1
+rm -rf tests
+if [ $? -ne 0 ]; then
+    echo "Error: git clone vllm-ascend"
+    exit 1
+fi
+cd ..
+echo "..............................................done vllm-ascend"
+
+#transformers
+rm -rf transformers/
+git clone https://gitee.com/mirrors/huggingface_transformers.git -b v4.47.0
+if [ $? -ne 0 ]; then
+    echo "Error: git clone huggingface_transformers"
+    exit 1
+fi
+mv huggingface_transformers transformers
+cd transformers
+git apply ../tools/rules/transformers.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply transformers"
+
+#accelerate
+rm -rf accelerate/
+git clone https://github.com/huggingface/accelerate.git -b v1.6.0
+if [ $? -ne 0 ]; then
+    echo "Error: git clone accelerate"
+    exit 1
+fi
+cd accelerate
+git apply ../tools/rules/accelerate.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply accelerate"
+
+#safetensors
+rm -rf safetensors_dir
+mkdir safetensors_dir
+pip install --no-deps safetensors==0.5.1
+if [ $? -ne 0 ]; then
+    echo "Error: pip install safetensors fail"
+else
+    ST_PATH=$(python -c "import site; print(site.getsitepackages()[0])")
+    cp -r ${ST_PATH}/safetensors ./safetensors_dir
+    cd safetensors_dir/safetensors
+    git init
+    git apply ../../tools/rules/safetensors.diff
+    cd ../../
+    export PYTHONPATH=$(pwd)/safetensors_dir:$PYTHONPATH
+    echo "..............................................done apply safetensors"
+fi
+
+#huggingface_hub
+rm -rf huggingface_hub
+git clone https://github.com/huggingface/huggingface_hub.git -b v0.29.2
+if [ $? -ne 0 ]; then
+    echo "Error: git clone huggingface_hub"
+    exit 1
+fi
+cd huggingface_hub
+git apply ../tools/rules_rl/huggingface_hub.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply huggingface_hub"
+
+echo "..............................................start code_convert"
+MindSpeed_Core_MS_PATH=$PWD
+echo ${MindSpeed_Core_MS_PATH}
+
+python3 tools/transfer.py \
+--megatron_path ${MindSpeed_Core_MS_PATH}/Megatron-LM/megatron/ \
+--mindspeed_path ${MindSpeed_Core_MS_PATH}/MindSpeed/mindspeed/ \
+--mindspeed_llm_path ${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/ \
+--mindspeed_rl_path ${MindSpeed_Core_MS_PATH}/MindSpeed-RL/ \
+--vllm_path ${MindSpeed_Core_MS_PATH}/vllm/ \
+--vllm_ascend_path ${MindSpeed_Core_MS_PATH}/vllm-ascend/ \
+--is_rl
+
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/MSAdapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH
+echo $PYTHONPATH
+echo "..............................................done code_convert"
+
diff --git a/docs/GRPO.md b/docs/GRPO.md
index e94d67a59c4b55cdfc8c2fb23aa81d5c9c9ae08a..189b02fbb4bc57643afff378a8cfb1af72b3b284 100644
--- a/docs/GRPO.md
+++ b/docs/GRPO.md
@@ -1,39 +1,114 @@
-# RL使用
+# 基于 MindSpore AI 框架的 GRPO-DeepSeek-R1-Qwen2.5-7B 使用指南
 
-## 转换脚本
+Group Relative Policy Optimization (GRPO) 是 Deepseek-Math 中提出的训练方法，它移除了 PPO 中对 Critic 模型的依赖，而是通过计算同一 prompt 多次重复采样输出的相对奖励来估计优势函数，这一创新大大减少了显存占用，提高了算法在强化学习任务中的效率。
 
-test_convert_rl.sh 脚本自动执行以下设置过程：
+在 GRPO 方法中包含了三个关键模型：Actor，Reference，Reward。其中 Actor 和 Reference 模型是通过 SFT 后得到的策略模型，而 Reward 模型则是通过规则奖励来评估。GRPO 的核心训练目标是优化 Actor 模型的策略，使其在执行强化学习任务时能够产生更优的动作序列，更符合任务目标的预期。
 
-1. 克隆所需仓库：
-   - MindSpeed-LLM（commit id：421ef7bcb83fb31844a1efb688cde71705c0526e）
-   - MindSpeed（commit id：0dfa0035ec54d9a74b2f6ee2867367df897299df）
-   - MindSpeed-RL（分支：2.0.0）
-   - Megatron-LM（分支：core_r0.8.0）
-   - msadapter（分支：master）
-   - vllm（分支：v0.7.3）
-   - vllm-ascend（commit id：0713836e95fe993feefe334945b5b273e4add1f1）
-   - transformers（分支：v4.47.0）
+本篇工作基于 MindSpore AI 架使用 Qwen2.5-7B 模型复现 GRPO-DeepSeek-R1 在 Math 领域的工作。
 
-2. 使用 transfer.py 工具运行代码转换
+## 依赖的三方库版本
 
-## 使用说明
+- MindSpeed-LLM（commit id：71c5af4d72078d826fd93fec6980004f0de51132）
+- MindSpeed（分支：core_r0.8.0, commit id：31aaf3d4ca86234b15f4a5d3af20bd6df06e7d45）
+- MindSpeed-RL（分支：master, commit id：559db0856891e5f8504a0b21d4b26969a82241df）
+- Megatron-LM（分支：core_r0.8.0）
+- MSAdapter（分支：master）
+- vllm（分支：v0.7.3）
+- vllm-ascend（commit id：0713836e95fe993feefe334945b5b273e4add1f1）
+- transformers（分支：v4.47.0）
+- accelerate（分支：v1.6.0）
+- safetensors（版本：0.5.1）
+- huggingface_hub（分支：v0.29.2）
 
-1. 克隆 MindSpeed-Core-MS 仓库：
+## 模型选择
 
-   ```shell
-   git clone -b feature-0.2 https://gitee.com/ascend/MindSpeed-Core-MS.git
-   cd MindSpeed-Core-MS/
-   ```
+ [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) 模型指令遵从度高，有一定概率能引导模型输出 `<think>...</think><answer>...$\boxed{}</answer>` 格式回复，训练曲线符合预期，在评测集上提升较大。
 
-2. 使转换脚本可执行并运行：
+## 数据预处理
 
-   ```shell
-   chmod +x test_convert_rl.sh
-   ./test_convert_rl.sh
-   ```
+以 DeepScaler 为例：
+
+数据集下载地址：[DeepScaler](https://huggingface.co/datasets/agentica-org/DeepScaleR-Preview-Dataset/tree/main)
+
+数据集下载可以基于网页直接下载，也可以基于命令行下载，比如：
+
+```shell
+# 读取deepscaler数据集
+mkdir dataset
+cd dataset/
+wget https://huggingface.co/datasets/agentica-org/DeepScaleR-Preview-Dataset/resolve/main/deepscaler.json --no-check
+cd ..
+```
+
+数据预处理的 yaml 配置文件放置于 `MindSpeed-RL/configs/datasets` 文件夹下，通过以下命令进行数据集预处理：
+
+```shell
+# 读取configs/datasets/deepscaler.yaml文件
+bash examples/data/preprocess_data.sh deepscaler
+```
+
+数据集处理配置可以根据需求自行配置，以下是数据集处理的 yaml 文件中基础参数的介绍：
+
+- `input`：数据集的路径，需指定具体文件，例如/datasets/deepscaler.json
+- `tokenizer_type`：指定分词器的类型，例如 HuggingFaceTokenizer 使用 Hugging Face 库提供的分词器来对文本进行分词处理;
+- `tokenizer_name_or_path`：指定分词器的名称或路径;
+- `output_prefix`：输出结果的前缀路径，例如 /datasets/data;
+- `workers`：设置处理数据时使用的 worker 数;
+- `prompt_type`: 用于指定对话模板，能够让 base 模型微调后能具备更好的对话能力，prompt-type 的可选项可以在 configs/model/templates.json 文件内查看;
+- `log_interval`：设置日志记录的间隔，每处理多少条数据时记录一次日志，用于监控数据处理的进度和状态;
+- `handler_name`：指定处理数据的处理器名称；
+- `seq_length`：设置数据预处理最大序列长度，超过了会过滤掉;
+
+## 权重转换
+
+根据 GRPO 算法要求，Actor 和 Reference 模型应该使用 SFT 微调后的模型进行初始化，Reward 模型应该使用规则奖励。GRPO 算法模型权重均使用 Megatron-mcore 格式，其他格式的权重需要进行模型权重转换。可参考[权重转换部分](https://gitee.com/ascend/MindSpeed-RL/blob/master/docs/algorithms/grpo.md)
+
+以 Qwen2.5-7B 模型的权重转换脚本为参考，权重转换步骤如下:
+
+### 获取权重文件
+
+hf 权重文件可从 Huggingface 网站获取，请根据模型的使用场景灵活选择，在此以 [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B) 为例。
+
+### hf 转 mcore
+
+在训练前，需要将 Hugging Face 权重转换成Mcore格式，示例脚本启动命令和配置参数如下：
+
+```bash
+# 脚本中路径请按真实情况配置
+bash examples/ckpt/ckpt_convert_qwen25_hf2mcore.sh
+```
+
+> 注：这里会调用到 MindSpeed-LLM 仓，进行权重转换前需先确认环境变量已配备完毕。
+
+配置参数介绍
+
+- `use-mcore-models`：启用 MCore 模型；
+- `model-type`：指定模型类型，如 GPT；
+- `load-model-type`：指定加载模型的类型，如 hf（Hugging Face）；
+- `save-model-type`：指定保存模型的类型，如 mg；
+- `target-tensor-parallel-size`：设置目标张量并行大小；
+- `target-pipeline-parallel-size`：设置目标流水线并行大小；
+- `add-qkv-bias`：是否进行 QKV 偏置；
+- `load-dir`：加载 Hugging Face 权重的路径；
+- `save-dir`：保存转换后权重的路径；
+- `tokenizer-model`：分词器模型文件的路径；
+- `model-type-hf`：指定 Hugging Face 模型类型，如 llama2；
+- `params-dtype`：指定参数的数据类型，如 bf16。
+
+### mcore 转 hf
+
+训练结束后，如果需要将生成的mcore格式权重转换回 Hugging Face 格式，可以参照以下示例脚本命令及脚本参数：
 
-3. 数据集权重准备
-   - 参考MindSpeed-RL仓库下[grpo.md](https://gitee.com/ascend/MindSpeed-RL/blob/master/docs/algorithms/grpo.md)
+```shell
+# 脚本中路径请按真实情况配置
+bash examples/ckpt/ckpt_convert_qwen25_mcore2hf.sh
+```
+
+配置参数与上文一致，但需注意以下事项：
+
+- 权重转换转回 Hugging Face 格式时，tp 和 pp 配置需配置为1；
+- load-model-type 参数配置为 mg，save-model-type 参数配置为 hf ;
+- save-dir 路径需要填入原始 HF 模型路径，新权重会存于 HF 原始权重文件下的 mg2hg 目录下，如/qwen2.5_7b_hf/mg2hg/
 
 ## 脚本启动
 
@@ -41,21 +116,10 @@ test_convert_rl.sh 脚本自动执行以下设置过程：
 
    ```bash
    source /usr/local/Ascend/nnal/atb/set_env.sh --cxx_abi=0
-   source /usr/local/Ascend/ascend-toolkit/latest/env/ascend_env.sh
-   ```
-
-2. 设置环境变量
-
-   ```bash
-   MindSpeed_Core_MS_PATH=$(pwd)
-   Ascend_PATH=/usr/loacl/Ascend/ascend-toolkit/latest/
-   export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter/mindtorch:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/MindSpeed/: \
-                     ${MindSpeed_Core_MS_PATH}/Megatron-LM/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/: \
-                     ${MindSpeed_Core_MS_PATH}/transformers/src/: \
-                     ${Ascend_PATH}/python/site-packages:${Ascend_PATH}/opp/built-in/op_impl/ai_core/tbe:
+   source /usr/local/Ascend/ascend-toolkit/set_env.sh
    ```
 
-3. 在**MindSpeed-RL/configs/XXX.yaml**中，将tokenizer_name_or_path、data_path、load字段内容修改为刚刚准备的分词目录、数据集目录和权重目录，如下：
+2. 在**MindSpeed-RL/configs/*_qwen25_7b.yaml**中，将`tokenizer_name_or_path`、`data_path`、`load`字段内容修改为刚刚准备的分词目录、数据集目录和权重目录，如下：
 
    ```shell
    # e.g:
@@ -64,18 +128,53 @@ test_convert_rl.sh 脚本自动执行以下设置过程：
    load: ./ckpt
    ```
 
-## 启动命令
+   > 注：yaml 中需添加参数`megatron_training.ai_framework：mindspore`
 
-1. qwen25-7b-r1-zero
+   卡数配置参数介绍
 
-   ```shell
-   cd MindSpeed-RL
-   python cli/train_grpo.py --config-name grpo_trainer_qwen25_7b | tee logs/grpo_trainer_qwen25_7b.log
-   ```
+    - `rl_config.actor_resource.num_npus`：actor 需使用的卡数，如 4；
+    - `rl_config.reference_resource.num_npus`：reference 需使用的卡数，如 2；
+    - `rl_config.reward_resource.num_npus`：reward 需使用的卡数，如 2。
 
-2. deepseek v3-r1-zero
+3. 任务拉起
 
-   ```shell
-   cd MindSpeed-RL
-   python cli/train_grpo.py --config-name r1_zero_deepseekv3_671b | tee logs/r1_zero_deepseekv3_671b.log
-   ```
\ No newline at end of file
+    需保证任务拉起时可用的 npu 数量满足`2.`中yaml配置所需的卡数要求
+
+- 若`2.`中 yaml 配置的为**单机**参数，可执行如下命令拉起任务：
+
+     ```shell
+     cd MindSpeed-RL
+     bash examples/grpo/grpo_trainer_qwen25_7b.sh
+     ```
+
+     > 注：脚本中`--config-name`应修改为`2.`中设置的 yaml 文件名称, 如`grpo_trainer_qwen25_7b`
+
+- 若`2.`中 yaml 配置的为**多机**参数，可执行如下命令拉起任务：
+
+  **主节点执行：**
+
+  ```shell
+  bash examples/r1/qwen25/r1_zero_qwen25_7b_master.sh
+  ```
+
+  > 注：脚本中`DEFAULT_YAML`应修改为`2.`中设置的 yaml 名称，根据实际机器设置`NNODES`和`NPUS_PER_NODE`
+
+  配置参数介绍
+
+    - `DEFAULT_YAML`：指定参数配置的 yaml 名称，如 r1_zero_qwen25_7b.yaml；
+    - `NNODES`：共使用多少节点训练，如 2；
+    - `NPUS_PER_NODE`：每个节点有多少张卡，如 8。
+
+  **从节点执行：**
+
+  ```shell
+  bash examples/r1/qwen25/r1_zero_qwen25_7b_worker.sh
+  ```
+
+  > 注：脚本中`NNODES`和`NPUS_PER_NODE`应于主节点配置一致，`MASTER_ADDR`应为主节点`ip`
+
+  配置参数介绍
+
+    - `NNODES`：共使用多少节点训练，如 2；
+    - `NPUS_PER_NODE`：每个节点有多少张卡，如 8；
+    - `MASTER_ADDR`：主节点 IP 地址。
diff --git a/scripts/set_path_rl.sh b/scripts/set_path_rl.sh
index 15d6535cc732cf490eb7a989efe79ed010f84425..997fdfd52cce6d43ca636eb62d8512908b63cbb0 100644
--- a/scripts/set_path_rl.sh
+++ b/scripts/set_path_rl.sh
@@ -8,7 +8,7 @@ script_path=$(realpath "${BASH_SOURCE[0]}")
 script_dir=$(dirname "$script_path")
 MindSpeed_Core_MS_PATH=$(dirname $script_dir)
 export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/Megatron-LM/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed-LLM/:${MindSpeed_Core_MS_PATH}/RL/MindSpeed-RL/:$PYTHONPATH
-export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/msadapter/mindtorch/:${MindSpeed_Core_MS_PATH}/RL/transformers/src:${MindSpeed_Core_MS_PATH}/RL/vllm/:${MindSpeed_Core_MS_PATH}/RL/vllm-ascend/:$PYTHONPATH
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/RL/msadapter/mindtorch/:${MindSpeed_Core_MS_PATH}/RL/transformers/src:${MindSpeed_Core_MS_PATH}/RL/vllm/:${MindSpeed_Core_MS_PATH}/RL/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:$PYTHONPATH
 export PYTHONPATH=/usr/local/Ascend/ascend-toolkit/latest/opp/built-in/op_impl/ai_core/tbe:/usr/local/Ascend/ascend-toolkit/latest/python/site-packages:$PYTHONPATH
 echo "..............................................done set PYTHONPATH"
 echo $PYTHONPATH
\ No newline at end of file
diff --git a/test_convert_rl.sh b/test_convert_rl.sh
index db53529260b0d4a4bd9ad9b1b8d4f9f6bba4e408..78f6a43401bb6403bc6cc438b70021d787ea29e9 100644
--- a/test_convert_rl.sh
+++ b/test_convert_rl.sh
@@ -105,6 +105,49 @@ rm -rf tests
 cd ..
 echo "..............................................done apply transformers"
 
+#accelerate
+rm -rf accelerate/
+git clone https://github.com/huggingface/accelerate.git -b v1.6.0
+if [ $? -ne 0 ]; then
+    echo "Error: git clone accelerate"
+    exit 1
+fi
+cd accelerate
+git apply ../tools/rules/accelerate.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply accelerate"
+
+#safetensors
+rm -rf safetensors_dir
+mkdir safetensors_dir
+pip install --no-deps safetensors==0.5.1
+if [ $? -ne 0 ]; then
+    echo "Error: pip install safetensors fail"
+else
+    ST_PATH=$(python -c "import site; print(site.getsitepackages()[0])")
+    cp -r ${ST_PATH}/safetensors ./safetensors_dir
+    cd safetensors_dir/safetensors
+    git init
+    git apply ../../tools/rules/safetensors.diff
+    cd ../../
+    export PYTHONPATH=$(pwd)/safetensors_dir:$PYTHONPATH
+    echo "..............................................done apply safetensors"
+fi
+
+#huggingface_hub
+rm -rf huggingface_hub
+git clone https://github.com/huggingface/huggingface_hub.git -b v0.29.2
+if [ $? -ne 0 ]; then
+    echo "Error: git clone huggingface_hub"
+    exit 1
+fi
+cd huggingface_hub
+git apply ../tools/rules_rl/huggingface_hub.diff
+rm -rf tests
+cd ..
+echo "..............................................done apply huggingface_hub"
+
 echo "..............................................start code_convert"
 MindSpeed_Core_MS_PATH=$PWD
 echo ${MindSpeed_Core_MS_PATH}
@@ -118,5 +161,7 @@ python3 tools/transfer.py \
 --vllm_ascend_path ${MindSpeed_Core_MS_PATH}/vllm-ascend/ \
 --is_rl
 
+export PYTHONPATH=${MindSpeed_Core_MS_PATH}/msadapter/mindtorch:${MindSpeed_Core_MS_PATH}/Megatron-LM:${MindSpeed_Core_MS_PATH}/MindSpeed:${MindSpeed_Core_MS_PATH}/MindSpeed-LLM:${MindSpeed_Core_MS_PATH}/transformers/src/:${MindSpeed_Core_MS_PATH}/vllm/:${MindSpeed_Core_MS_PATH}/vllm-ascend/:${MindSpeed_Core_MS_PATH}/accelerate/src/:${MindSpeed_Core_MS_PATH}/safetensors_dir/:${MindSpeed_Core_MS_PATH}/huggingface_hub/src/:${MindSpeed_Core_MS_PATH}/MindSpeed-RL/:$PYTHONPATH
+echo $PYTHONPATH
 echo "..............................................done code_convert"
 
diff --git a/tools/rules_rl/huggingface_hub.diff b/tools/rules_rl/huggingface_hub.diff
new file mode 100644
index 0000000000000000000000000000000000000000..65b15d672566f6de2e530450817f7f56883a553f
--- /dev/null
+++ b/tools/rules_rl/huggingface_hub.diff
@@ -0,0 +1,22 @@
+diff --git a/src/huggingface_hub/serialization/_torch.py b/src/huggingface_hub/serialization/_torch.py
+index ccb9c42b..3afd0613 100644
+--- a/src/huggingface_hub/serialization/_torch.py
++++ b/src/huggingface_hub/serialization/_torch.py
+@@ -765,7 +765,7 @@ def get_torch_storage_size(tensor: "torch.Tensor") -> int:
+         pass
+ 
+     try:
+-        return tensor.untyped_storage().nbytes()
++        return tensor.nbytes
+     except AttributeError:
+         # Fallback for torch==1.10
+         try:
+@@ -816,7 +816,7 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
+     except Exception:
+         # Fallback for torch==1.10
+         try:
+-            return tensor.storage().data_ptr()
++            return tensor
+         except NotImplementedError:
+             # Fallback for meta storage
+             return 0
diff --git a/tools/rules_rl/line_rules.py b/tools/rules_rl/line_rules.py
index 2c8a4ea642c35f98bc802c913e00aa56076d5ff1..ad7665e8296e62383a2b53023821726387b89150 100644
--- a/tools/rules_rl/line_rules.py
+++ b/tools/rules_rl/line_rules.py
@@ -44,14 +44,22 @@ LINE_RULES = {
              def func(self, **kwargs):
 -                return _get_dst_obj(self, value, **kwargs).weight.data.copy_(kwargs.get('data'))
 +                set_tensor = _get_dst_obj(self, value, **kwargs)
-+                set_tensor.weight.data = kwargs.get('data')
++                data = kwargs.get('data')
++                if data.dtype != set_tensor.weight.dtype:
++                   data = data.to(dtype = set_tensor.weight.dtype)
++                set_tensor.weight.data = data
+
+
 +                return set_tensor.weight.data
              return func""",
              """         def _func_generator_set_bias(value):
              def func(self, **kwargs):
 -                return _get_dst_obj(self, value, **kwargs).bias.data.copy_(kwargs.get('data'))
 +                set_tensor = _get_dst_obj(self, value, **kwargs)
-+                set_tensor.bias.data = kwargs.get('data')
++                data = kwargs.get('data')
++                if data.dtype != set_tensor.weight.dtype:
++                   data = data.to(dtype = set_tensor.weight.dtype)
++                set_tensor.bias.data = data
 +                return set_tensor.bias.data
              return func""",
      """             self.module = [AutoModelForCausalLM.from_pretrained(
@@ -74,6 +82,15 @@ LINE_RULES = {
     },
 
 "mindspeed-rl": {
+        "cli/convert_ckpt.py": [
+"""if __name__ == '__main__':
++    import mindspore as ms
++    ms.set_context(device_target = "CPU")
++    import torch
++    torch.configs.set_pyboost(False)""",
+"""+    parser.add_argument('--save_lora_to_hf', action="store_true", default=False)
+     known_args, _ = parser.parse_known_args()"""
+        ],
         "mindspeed_rl/config_cls/megatron_config.py": [
 """         self.swap_attention = False
 +        self.ai_framework = \"pytorch\"""",