diff --git a/README.md b/README.md index 1b19f9d9767fcf5d75a83de25ee0856e79e41dc6..8086494c06a269371a239c43ef0a10ed3317636e 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,9 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 | [ChatGLM2-6B SFT](nlp/llm/chatglm2-6b-sft/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm2-6b | 3.4.0 | | [ChatGLM3-6B](nlp/llm/chatglm3-6b/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm3-6b | 4.1.1 | | [DeepSeekMoE 7B](nlp/llm/deepseek_moe_7b/pytorch) | PyTorch | ColossalAI | deepseek-moe-16b-base | 4.1.1 | +| [DeepSeek-LLM-7B](nlp/llm/deepseek-llm-7b/pytorch) | PyTorch | verl | deepseek-llm-7b-chat | 4.4.0 | | [GLM-4](nlp/llm/glm-4/pytorch) | PyTorch | Torchrun | glm-4-9b-chat | 4.2.0 | +| [Gemma-2-2B-IT](nlp/llm/gemma-2-2b-it/pytorch) | PyTorch | verl | gemma-2-2b-it | 4.4.0 | | [Llama-7B](nlp/llm/llama-7b/pytorch) | PyTorch | ColossalAI | llama-7b-hf | 3.1.0 | | [Llama2-7B](nlp/llm/llama2-7b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.1.0 | | [Llama2-7B RMF](nlp/llm/llama2-7b_reward_sft/pytorch) | PyTorch | DeepSpeed | Dahoas/rm-static | 3.1.1 | @@ -49,10 +51,13 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 | [QWen-7B](nlp/llm/qwen-7b/pytorch) | PyTorch | Firefly | qwen-7b | 3.4.0 | | [QWen1.5-7B](nlp/llm/qwen1.5-7b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | | [QWen1.5-14B](nlp/llm/qwen1.5-14b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | +| [Qwen2-7B](nlp/llm/qwen2-7b/verl) | PyTorch | verl | qwen2-7b | 4.4.0 | | [Qwen2.5-7B SFT](nlp/llm/qwen2.5-7b/pytorch) | PyTorch | LLaMA-Factory | qwen2.5-7b | 4.1.1 | | [Qwen2.5-1.5B verl](nlp/llm/qwen2.5-1.5b/verl) | PyTorch | verl | qwen2.5-1.5b | 4.2.0 | | [Qwen2.5-7B verl](nlp/llm/qwen2.5-7b/verl) | PyTorch | verl | qwen2.5-7b | 4.2.0 | | [Qwen2.5-3B](nlp/llm/qwen2.5-3b/pytorch) | PyTorch | ColossalAI | qwen2.5-3b | 4.3.0 | +| [Qwen2.5-VL-7B](nlp/llm/qwen2.5-vl-7b/verl) | PyTorch | verl | qwen2.5-vl-7b | 4.4.0 | +| [Qwen3-8B](nlp/llm/qwen3-8b/verl) | PyTorch | verl | qwen3-8b | 4.4.0 | | [Yi-6B](nlp/llm/yi-6b/pytorch) | PyTorch | DeepSpeed | Yi-6B | 4.2.0 | | [Yi-1.5-6B](nlp/llm/yi-1.5-6b/pytorch) | PyTorch | DeepSpeed | Yi-1.5-6B | 4.2.0 | | [Yi-VL-6B](nlp/llm/yi-vl-6b/pytorch) | PyTorch | LLaMA-Factory | Yi-VL-6B-hf | 4.2.0 | diff --git a/README_en.md b/README_en.md index e73d799b6816ba496ec07624a42b2b3718970469..b31e48663f67f9e00b6404b42e18cd3ea514e035 100644 --- a/README_en.md +++ b/README_en.md @@ -30,7 +30,9 @@ individuals, healthcare, education, communication, energy, and more. | [ChatGLM2-6B SFT](nlp/llm/chatglm2-6b-sft/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm2-6b | 3.4.0 | | [ChatGLM3-6B](nlp/llm/chatglm3-6b/pytorch) | PyTorch | DeepSpeed | ADGEN & chatglm3-6b | 4.1.1 | | [DeepSeekMoE 7B](nlp/llm/deepseek_moe_7b/pytorch) | PyTorch | ColossalAI | deepseek-moe-16b-base | 4.1.1 | +| [DeepSeek-LLM-7B](nlp/llm/deepseek-llm-7b/pytorch) | PyTorch | verl | deepseek-llm-7b-chat | 4.4.0 | | [GLM-4](nlp/llm/glm-4/pytorch) | PyTorch | Torchrun | glm-4-9b-chat | 4.2.0 | +| [Gemma-2-2B-IT](nlp/llm/gemma-2-2b-it/pytorch) | PyTorch | verl | gemma-2-2b-it | 4.4.0 | | [Llama-7B](nlp/llm/llama-7b/pytorch) | PyTorch | ColossalAI | llama-7b-hf | 3.1.0 | | [Llama2-7B](nlp/llm/llama2-7b/pytorch) | PyTorch | Megatron-DeepSpeed | Bookcorpus | 3.1.0 | | [Llama2-7B RMF](nlp/llm/llama2-7b_reward_sft/pytorch) | PyTorch | DeepSpeed | Dahoas/rm-static | 3.1.1 | @@ -51,10 +53,13 @@ individuals, healthcare, education, communication, energy, and more. | [QWen-7B](nlp/llm/qwen-7b/pytorch) | PyTorch | Firefly | qwen-7b | 3.4.0 | | [QWen1.5-7B](nlp/llm/qwen1.5-7b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | | [QWen1.5-14B](nlp/llm/qwen1.5-14b/pytorch) | PyTorch | Firefly | school_math | 4.1.1 | +| [Qwen2-7B](nlp/llm/qwen2-7b/verl) | PyTorch | verl | qwen2-7b | 4.4.0 | | [Qwen2.5-7B SFT](nlp/llm/qwen2.5-7b/pytorch) | PyTorch | LLaMA-Factory | qwen2.5-7b | 4.1.1 | | [Qwen2.5-1.5B verl](nlp/llm/qwen2.5-1.5b/verl) | PyTorch | verl | qwen2.5-1.5b | 4.2.0 | | [Qwen2.5-7B verl](nlp/llm/qwen2.5-7b/verl) | PyTorch | verl | qwen2.5-7b | 4.2.0 | | [Qwen2.5-3B](nlp/llm/qwen2.5-3b/pytorch) | PyTorch | ColossalAI | qwen2.5-3b | 4.3.0 | +| [Qwen2.5-VL-7B](nlp/llm/qwen2.5-vl-7b/verl) | PyTorch | verl | qwen2.5-vl-7b | 4.4.0 | +| [Qwen3-8B](nlp/llm/qwen3-8b/verl) | PyTorch | verl | qwen3-8b | 4.4.0 | | [Yi-6B](nlp/llm/yi-6b/pytorch) | PyTorch | DeepSpeed | Yi-6B | 4.2.0 | | [Yi-1.5-6B](nlp/llm/yi-1.5-6b/pytorch) | PyTorch | DeepSpeed | Yi-1.5-6B | 4.2.0 | | [Yi-VL-6B](nlp/llm/yi-vl-6b/pytorch) | PyTorch | LLaMA-Factory | Yi-VL-6B-hf | 4.2.0 | diff --git a/nlp/llm/deepseek-llm-7b/verl/README.md b/nlp/llm/deepseek-llm-7b/verl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..756f20361c2827dc58552b49e3783c033ee9fc0c --- /dev/null +++ b/nlp/llm/deepseek-llm-7b/verl/README.md @@ -0,0 +1,49 @@ +# deepseek-llm-7b-chat ppo (verl) + +## Model Description + +Introducing DeepSeek LLM, an advanced language model comprising 7 billion parameters. It has been trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese. In order to foster research, we have made DeepSeek LLM 7B/67B Base and DeepSeek LLM 7B/67B Chat open source for the research community. + +deepseek-llm-7b-chat is a 7B parameter model initialized from deepseek-llm-7b-base and fine-tuned on extra instruction data. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.4.0 | 25.09 | + +## Environment Preparation + +### Install Dependencies +```bash +git clone https://github.com/volcengine/verl.git -b v0.5.0 +cd verl +cp -rf toolbox/verl/v0.5.0/patches/* ./ +pip3 install -r requirements.txt +python3 setup.py install +``` + +### Prepare Resources + +```bash +python3 examples/data_preprocess/gsm8k.py +mkdir -p /home/datasets/verl/ +mv ~/data/gsm8k /home/datasets/verl/gsm8k + +mkdir -p /home/model_zoos/verl/ +# download deepseek-ai/deepseek-llm-7b-chat and put to /home/model_zoos/verl/deepseek-llm-7b-chat +``` + +## Model Training + +### train on gsm8k +```bash +cd nlp/llm/deepseek-llm-7b/verl +bash run_deepseek7b_llm_gsm8k.sh +``` + +## Model Results + +## References + +- [verl](https://github.com/volcengine/verl/tree/v0.5.0) diff --git a/nlp/llm/deepseek-llm-7b/verl/run_deepseek7b_llm_gsm8k.sh b/nlp/llm/deepseek-llm-7b/verl/run_deepseek7b_llm_gsm8k.sh new file mode 100644 index 0000000000000000000000000000000000000000..781c877f763a0df601505bc29e58b40bdb4bf74f --- /dev/null +++ b/nlp/llm/deepseek-llm-7b/verl/run_deepseek7b_llm_gsm8k.sh @@ -0,0 +1,51 @@ +set -x + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=gae \ + data.train_files=$PATH_DATASETS/gsm8k/train.parquet \ + data.val_files=$PATH_DATASETS/gsm8k/test.parquet \ + data.train_batch_size=16 \ + data.max_prompt_length=512 \ + data.max_response_length=512 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$PATH_MODEL/deepseek-llm-7b-chat \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=16 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.actor.use_kl_loss=False \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=8 \ + critic.optim.lr=1e-5 \ + critic.model.use_remove_padding=True \ + critic.model.path=$PATH_MODEL/deepseek-llm-7b-chat \ + critic.model.enable_gradient_checkpointing=True \ + critic.ppo_micro_batch_size_per_gpu=8 \ + critic.model.fsdp_config.param_offload=False \ + critic.model.fsdp_config.optimizer_offload=False \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger='["console"]' \ + trainer.project_name='verl_example_gsm8k' \ + trainer.experiment_name='deepseek_llm_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=20 \ + trainer.test_freq=1 \ + trainer.use_legacy_worker_impl=auto \ + trainer.total_epochs=15 $@ \ No newline at end of file diff --git a/nlp/llm/gemma-2-2b-it/verl/README.md b/nlp/llm/gemma-2-2b-it/verl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4786d9cababab5b048dba8fa822feb98750d339c --- /dev/null +++ b/nlp/llm/gemma-2-2b-it/verl/README.md @@ -0,0 +1,47 @@ +# gemma-2-2b-it ppo (verl) + +## Model Description + +Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights for both pre-trained variants and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.4.0 | 25.09 | + +## Environment Preparation + +### Install Dependencies +```bash +git clone https://github.com/volcengine/verl.git -b v0.5.0 +cd verl +cp -rf toolbox/verl/v0.5.0/patches/* ./ +pip3 install -r requirements.txt +python3 setup.py install +``` + +### Prepare Resources + +```bash +python3 examples/data_preprocess/gsm8k.py +mkdir -p /home/datasets/verl/ +mv ~/data/gsm8k /home/datasets/verl/gsm8k + +mkdir -p /home/model_zoos/verl/ +# download google/gemma-2-2b-it and put to /home/model_zoos/verl/gemma-2-2b-it +``` + +## Model Training + +### train on gsm8k +```bash +cd nlp/llm/gemma-2-2b-it/verl +bash run_gemma_gsm8k.sh +``` + +## Model Results + +## References + +- [verl](https://github.com/volcengine/verl/tree/v0.5.0) diff --git a/nlp/llm/gemma-2-2b-it/verl/run_gemma_gsm8k.sh b/nlp/llm/gemma-2-2b-it/verl/run_gemma_gsm8k.sh new file mode 100644 index 0000000000000000000000000000000000000000..209fd51c48c8b600809cc7db2ab7e2d2ba04818b --- /dev/null +++ b/nlp/llm/gemma-2-2b-it/verl/run_gemma_gsm8k.sh @@ -0,0 +1,47 @@ +set -x + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=gae \ + data.train_files=$PATH_DATASETS/gsm8k/train.parquet \ + data.val_files=$PATH_DATASETS/gsm8k/test.parquet \ + data.train_batch_size=4 \ + data.max_prompt_length=1024 \ + data.max_response_length=512 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$PATH_MODEL/gemma-2-2b-it \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=False \ + actor_rollout_ref.actor.ppo_mini_batch_size=4 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.fsdp_config.param_offload=False \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ + actor_rollout_ref.actor.use_kl_loss=False \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=2 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=8 \ + critic.optim.lr=1e-5 \ + critic.model.use_remove_padding=False \ + critic.model.path=$PATH_MODEL/gemma-2-2b-it \ + critic.model.enable_gradient_checkpointing=False \ + critic.ppo_micro_batch_size_per_gpu=2 \ + critic.model.fsdp_config.param_offload=False \ + critic.model.fsdp_config.optimizer_offload=False \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger='["console","wandb"]' \ + trainer.project_name='verl_example' \ + trainer.experiment_name='gemma2b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=20 \ + trainer.test_freq=10 \ + trainer.total_epochs=15 $@ \ No newline at end of file diff --git a/nlp/llm/qwen2-7b/verl/README.md b/nlp/llm/qwen2-7b/verl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f82e8f78dd692636b9d7b2d6268e1bdbbe62aba7 --- /dev/null +++ b/nlp/llm/qwen2-7b/verl/README.md @@ -0,0 +1,47 @@ +# Qwen2.5-7B grpo (verl) + +## Model Description + +Qwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. Compared with the state-of-the-art opensource language models, including the previous released Qwen1.5, Qwen2 has generally surpassed most opensource models and demonstrated competitiveness against proprietary models across a series of benchmarks targeting for language understanding, language generation, multilingual capability, coding, mathematics, reasoning, etc. Qwen2-7B-Instruct supports a context length of up to 131,072 tokens, enabling the processing of extensive inputs. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.4.0 | 25.09 | + +## Environment Preparation + +### Install Dependencies +```bash +git clone https://github.com/volcengine/verl.git -b v0.5.0 +cd verl +cp -rf toolbox/verl/v0.5.0/patches/* ./ +pip3 install -r requirements.txt +python3 setup.py install +``` + +### Prepare Resources + +```bash +python3 examples/data_preprocess/gsm8k.py +mkdir -p /home/datasets/verl/ +mv ~/data/gsm8k /home/datasets/verl/gsm8k + +mkdir -p /home/model_zoos/verl/ +# download Qwen2-7B-Instruct and put to /home/model_zoos/verl/Qwen2-7B-Instruct +``` + +## Model Training + +### train on gsm8k +```bash +cd nlp/llm/qwen2-7b/verl +bash run_qwen2_7B_gsm8k.sh +``` + +## Model Results + +## References + +- [verl](https://github.com/volcengine/verl/tree/v0.5.0) diff --git a/nlp/llm/qwen2-7b/verl/run_qwen2_7b_gsm8k.sh b/nlp/llm/qwen2-7b/verl/run_qwen2_7b_gsm8k.sh new file mode 100644 index 0000000000000000000000000000000000000000..7c1a56dfa3130b03137e4ce823b607b03441a642 --- /dev/null +++ b/nlp/llm/qwen2-7b/verl/run_qwen2_7b_gsm8k.sh @@ -0,0 +1,49 @@ +set -x + + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$PATH_DATASETS/gsm8k/train.parquet \ + data.val_files=$PATH_DATASETS/gsm8k/test.parquet \ + data.train_batch_size=16 \ + data.max_prompt_length=512 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$PATH_MODEL/Qwen2-7B-Instruct \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=16 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=40 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=8 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger=['console'] \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen2_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=100 \ + trainer.total_epochs=1 $@ diff --git a/nlp/llm/qwen2.5-1.5b/verl/README.md b/nlp/llm/qwen2.5-1.5b/verl/README.md index a1b8735aefb03868b028bfb905518e0c4945a263..554f172a8b100e86d930c1e777f3c3d57787d3bd 100644 --- a/nlp/llm/qwen2.5-1.5b/verl/README.md +++ b/nlp/llm/qwen2.5-1.5b/verl/README.md @@ -29,7 +29,7 @@ python3 data_preprocess/kk.py --local_dir data/kk --data_path path_to_kk/train/p ### Install Dependencies ```bash -cd deepsparkhub/toolbox/verl +cd deepsparkhub/toolbox/verl/0dc8e8596 pip3 install -r requirements.txt python3 setup.py install ``` diff --git a/nlp/llm/qwen2.5-7b/verl/README.md b/nlp/llm/qwen2.5-7b/verl/README.md index b6fc72cab333b7d86c40138d98bdce2a837053cb..d5b78986aa5cb47c4cd73f71ad1a6a3b4b79a3f2 100644 --- a/nlp/llm/qwen2.5-7b/verl/README.md +++ b/nlp/llm/qwen2.5-7b/verl/README.md @@ -29,7 +29,7 @@ python3 data_preprocess/kk.py --local_dir data/kk --data_path path_to_kk/train/p ### Install Dependencies ```bash -cd deepsparkhub/toolbox/verl +cd deepsparkhub/toolbox/verl/0dc8e8596 pip3 install -r requirements.txt python3 setup.py install ``` diff --git a/nlp/llm/qwen2.5-vl-7b/verl/README.md b/nlp/llm/qwen2.5-vl-7b/verl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3fb0ed51e4d5f945c0d22c6ba473a5adcc0f583e --- /dev/null +++ b/nlp/llm/qwen2.5-vl-7b/verl/README.md @@ -0,0 +1,50 @@ +# Qwen2.5-VL-7B grpo (verl) + +## Model Description + +Qwen2.5-VL is not only proficient in recognizing common objects such as flowers, birds, fish, and insects, but it is highly capable of analyzing texts, charts, icons, graphics, and layouts within images. +Directly plays as a visual agent that can reason and dynamically direct tools, which is capable of computer use and phone use. Can comprehend videos of over 1 hour, and this time it has a new ability of cpaturing event by pinpointing the relevant video segments. Can accurately localize objects in an image by generating bounding boxes or points, and it can provide stable JSON outputs for coordinates and attributes. For data like scans of invoices, forms, tables, etc. Qwen2.5-VL supports structured outputs of their contents, benefiting usages in finance, commerce, etc. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.4.0 | 25.09 | + +## Environment Preparation + +### Install Dependencies +```bash +git clone https://github.com/volcengine/verl.git -b v0.5.0 +cd verl +cp -rf toolbox/verl/v0.5.0/patches/* ./ +pip3 install -r requirements.txt +python3 setup.py install + +pip install qwen_vl_utils transformers==4.52.0 +``` + +### Prepare Resources + +```bash +python3 examples/data_preprocess/geo3k.py +mkdir -p /home/datasets/verl/ +mv ~/data/geo3k /home/datasets/verl/geo3k + +mkdir -p /home/model_zoos/verl/ +# download Qwen2.5-VL-7B-Instruct and put to /home/model_zoos/verl/Qwen2.5-VL-7B-Instruct +``` + +## Model Training + +### train on geo3k +```bash +cd nlp/llm/qwen2.5-vl-7b/verl +bash run_qwen2_5_vl-7b_geo3k.sh +``` + +## Model Results + +## References + +- [verl](https://github.com/volcengine/verl/tree/v0.5.0) diff --git a/nlp/llm/qwen2.5-vl-7b/verl/run_qwen2_5_vl-7b_geo3k.sh b/nlp/llm/qwen2.5-vl-7b/verl/run_qwen2_5_vl-7b_geo3k.sh new file mode 100644 index 0000000000000000000000000000000000000000..c55bd0c3450976806b8b388440defada0d8ac71a --- /dev/null +++ b/nlp/llm/qwen2.5-vl-7b/verl/run_qwen2_5_vl-7b_geo3k.sh @@ -0,0 +1,52 @@ +set -x + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$PATH_DATASETS/geo3k/train.parquet \ + data.val_files=$PATH_DATASETS/geo3k/test.parquet \ + data.train_batch_size=16 \ + data.max_prompt_length=1024 \ + data.max_response_length=2048 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + data.image_key=images \ + actor_rollout_ref.model.path=$PATH_MODEL/Qwen2.5-VL-7B-Instruct \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=16 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.01 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=20 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.enable_chunked_prefill=False \ + actor_rollout_ref.rollout.enforce_eager=True \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.n=8 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=20 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger='["console"]' \ + trainer.project_name='verl_grpo_example_geo3k' \ + trainer.experiment_name='qwen2_5_vl_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=10 \ + trainer.total_epochs=1 $@ \ No newline at end of file diff --git a/nlp/llm/qwen2.5-vl-7b/verl/run_qwen2_5_vl_7b_geo3k.sh b/nlp/llm/qwen2.5-vl-7b/verl/run_qwen2_5_vl_7b_geo3k.sh new file mode 100644 index 0000000000000000000000000000000000000000..c55bd0c3450976806b8b388440defada0d8ac71a --- /dev/null +++ b/nlp/llm/qwen2.5-vl-7b/verl/run_qwen2_5_vl_7b_geo3k.sh @@ -0,0 +1,52 @@ +set -x + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$PATH_DATASETS/geo3k/train.parquet \ + data.val_files=$PATH_DATASETS/geo3k/test.parquet \ + data.train_batch_size=16 \ + data.max_prompt_length=1024 \ + data.max_response_length=2048 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + data.image_key=images \ + actor_rollout_ref.model.path=$PATH_MODEL/Qwen2.5-VL-7B-Instruct \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=16 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.01 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=20 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.enable_chunked_prefill=False \ + actor_rollout_ref.rollout.enforce_eager=True \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.n=8 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=20 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger='["console"]' \ + trainer.project_name='verl_grpo_example_geo3k' \ + trainer.experiment_name='qwen2_5_vl_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=10 \ + trainer.total_epochs=1 $@ \ No newline at end of file diff --git a/nlp/llm/qwen3-8b/verl/README.md b/nlp/llm/qwen3-8b/verl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eca405e67fd23f2d7f5ce676d5148cad73cfdea1 --- /dev/null +++ b/nlp/llm/qwen3-8b/verl/README.md @@ -0,0 +1,55 @@ +# Qwen3-8B grpo (verl) + +## Model Description + +Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features: + +- Uniquely support of seamless switching between thinking mode (for complex logical reasoning, math, and coding) and non-thinking mode (for efficient, general-purpose dialogue) within single model, ensuring optimal performance across various scenarios. +- Significantly enhancement in its reasoning capabilities, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning. +- Superior human preference alignment, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience. +- Expertise in agent capabilities, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks. +- Support of 100+ languages and dialects with strong capabilities for multilingual instruction following and translation. + +## Supported Environments + +| GPU | [IXUCA SDK](https://gitee.com/deep-spark/deepspark#%E5%A4%A9%E6%95%B0%E6%99%BA%E7%AE%97%E8%BD%AF%E4%BB%B6%E6%A0%88-ixuca) | Release | +| :----: | :----: | :----: | +| BI-V150 | 4.4.0 | 25.09 | + +## Environment Preparation + +### Install Dependencies +```bash +git clone https://github.com/volcengine/verl.git -b v0.5.0 +cd verl +cp -rf toolbox/verl/v0.5.0/patches/* ./ +pip3 install -r requirements.txt +python3 setup.py install + +pip install qwen_vl_utils transformers==4.52.0 +``` + +### Prepare Resources + +```bash +python3 examples/data_preprocess/gsm8k.py +mkdir -p /home/datasets/verl/ +mv ~/data/gsm8k /home/datasets/verl/gsm8k + +mkdir -p /home/model_zoos/verl/ +# download Qwen3-8B and put to /home/model_zoos/verl/Qwen3-8B +``` + +## Model Training + +### train on geo3k +```bash +cd nlp/llm/qwen3-8b/verl +bash run_qwen3-8b_gsm8k.sh +``` + +## Model Results + +## References + +- [verl](https://github.com/volcengine/verl/tree/v0.5.0) diff --git a/nlp/llm/qwen3-8b/verl/run_qwen3-8b_gsm8k.sh b/nlp/llm/qwen3-8b/verl/run_qwen3-8b_gsm8k.sh new file mode 100644 index 0000000000000000000000000000000000000000..d5dabb4d9ec4c5e5faa4372963574639a184ed2f --- /dev/null +++ b/nlp/llm/qwen3-8b/verl/run_qwen3-8b_gsm8k.sh @@ -0,0 +1,48 @@ +set -x + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$PATH_DATASETS/gsm8k/train.parquet \ + data.val_files=$PATH_DATASETS/gsm8k/test.parquet \ + data.train_batch_size=4 \ + data.max_prompt_length=512 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$PATH_MODEL/Qwen3-8B \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=4 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \ + actor_rollout_ref.rollout.n=8 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger='["console"]' \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen3_8b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=10 \ + trainer.total_epochs=1 $@ \ No newline at end of file diff --git a/tests/model_info.json b/tests/model_info.json index 205942a0f19216b4d0e284823174c763cedd6743..87f01851f7d60254bfa04ee050b006be947453a4 100644 --- a/tests/model_info.json +++ b/tests/model_info.json @@ -7391,6 +7391,126 @@ "github_branch": "", "github_path": "", "priority": "P4" + }, + { + "model_name": "qwen2-7b", + "framework": "pytorch", + "release_version": "25.09", + "release_sdk": "4.4.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/nlp/llm/qwen2-7b/verl/", + "readme_file": "deepsparkhub/nlp/llm/qwen2-7b/verl/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" + }, + { + "model_name": "qwen2.5-vl-7b", + "framework": "pytorch", + "release_version": "25.09", + "release_sdk": "4.4.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/nlp/llm/qwen2.5-vl-7b/verl/", + "readme_file": "deepsparkhub/nlp/llm/qwen2.5-vl-7b/verl/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" + }, + { + "model_name": "qwen3-8b", + "framework": "pytorch", + "release_version": "25.09", + "release_sdk": "4.4.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/nlp/llm/qwen3-8b/verl/", + "readme_file": "deepsparkhub/nlp/llm/qwen3-8b/verl/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" + }, + { + "model_name": "deepseek-llm-7b", + "framework": "pytorch", + "release_version": "25.09", + "release_sdk": "4.4.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/nlp/llm/deepseek-llm-7b/verl/", + "readme_file": "deepsparkhub/nlp/llm/deepseek-llm-7b/verl/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" + }, + { + "model_name": "gemma-2-7b-it", + "framework": "pytorch", + "release_version": "25.09", + "release_sdk": "4.4.0", + "release_gpgpu": "BI-V150", + "latest_sdk": "", + "latest_gpgpu": "", + "category": "nlp/llm", + "toolbox": "", + "mdims": "", + "dataset": "", + "license": "", + "model_path": "deepsparkhub/nlp/llm/gemma-2-7b-it/verl/", + "readme_file": "deepsparkhub/nlp/llm/gemma-2-7b-it/verl/README.md", + "bitbucket_repo": "", + "bitbucket_branch": "", + "bitbucket_path": "", + "develop_owner": "", + "github_repo": "", + "github_branch": "", + "github_path": "", + "priority": "P4" } ] } \ No newline at end of file diff --git a/toolbox/verl/LICENSE b/toolbox/verl/0dc8e8596/LICENSE similarity index 100% rename from toolbox/verl/LICENSE rename to toolbox/verl/0dc8e8596/LICENSE diff --git a/toolbox/verl/Notice.txt b/toolbox/verl/0dc8e8596/Notice.txt similarity index 100% rename from toolbox/verl/Notice.txt rename to toolbox/verl/0dc8e8596/Notice.txt diff --git a/toolbox/verl/README.md b/toolbox/verl/0dc8e8596/README.md similarity index 100% rename from toolbox/verl/README.md rename to toolbox/verl/0dc8e8596/README.md diff --git a/toolbox/verl/pyproject.toml b/toolbox/verl/0dc8e8596/pyproject.toml similarity index 100% rename from toolbox/verl/pyproject.toml rename to toolbox/verl/0dc8e8596/pyproject.toml diff --git a/toolbox/verl/requirements.txt b/toolbox/verl/0dc8e8596/requirements.txt similarity index 100% rename from toolbox/verl/requirements.txt rename to toolbox/verl/0dc8e8596/requirements.txt diff --git a/toolbox/verl/setup.py b/toolbox/verl/0dc8e8596/setup.py similarity index 100% rename from toolbox/verl/setup.py rename to toolbox/verl/0dc8e8596/setup.py diff --git a/toolbox/verl/verl/__init__.py b/toolbox/verl/0dc8e8596/verl/__init__.py similarity index 100% rename from toolbox/verl/verl/__init__.py rename to toolbox/verl/0dc8e8596/verl/__init__.py diff --git a/toolbox/verl/verl/models/README.md b/toolbox/verl/0dc8e8596/verl/models/README.md similarity index 100% rename from toolbox/verl/verl/models/README.md rename to toolbox/verl/0dc8e8596/verl/models/README.md diff --git a/toolbox/verl/verl/models/__init__.py b/toolbox/verl/0dc8e8596/verl/models/__init__.py similarity index 100% rename from toolbox/verl/verl/models/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/__init__.py diff --git a/toolbox/verl/verl/models/llama/__init__.py b/toolbox/verl/0dc8e8596/verl/models/llama/__init__.py similarity index 100% rename from toolbox/verl/verl/models/llama/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/llama/__init__.py diff --git a/toolbox/verl/verl/models/llama/megatron/__init__.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/__init__.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/__init__.py diff --git a/toolbox/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/checkpoint_utils/__init__.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/checkpoint_utils/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/checkpoint_utils/__init__.py diff --git a/toolbox/verl/verl/models/llama/megatron/checkpoint_utils/llama_loader.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/checkpoint_utils/llama_loader.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/checkpoint_utils/llama_loader.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/checkpoint_utils/llama_loader.py diff --git a/toolbox/verl/verl/models/llama/megatron/checkpoint_utils/llama_saver.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/checkpoint_utils/llama_saver.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/checkpoint_utils/llama_saver.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/checkpoint_utils/llama_saver.py diff --git a/toolbox/verl/verl/models/llama/megatron/layers/__init__.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/__init__.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/layers/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/__init__.py diff --git a/toolbox/verl/verl/models/llama/megatron/layers/parallel_attention.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_attention.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/layers/parallel_attention.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_attention.py diff --git a/toolbox/verl/verl/models/llama/megatron/layers/parallel_decoder.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_decoder.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/layers/parallel_decoder.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_decoder.py diff --git a/toolbox/verl/verl/models/llama/megatron/layers/parallel_linear.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_linear.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/layers/parallel_linear.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_linear.py diff --git a/toolbox/verl/verl/models/llama/megatron/layers/parallel_mlp.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_mlp.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/layers/parallel_mlp.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_mlp.py diff --git a/toolbox/verl/verl/models/llama/megatron/layers/parallel_rmsnorm.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_rmsnorm.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/layers/parallel_rmsnorm.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/layers/parallel_rmsnorm.py diff --git a/toolbox/verl/verl/models/llama/megatron/modeling_llama_megatron.py b/toolbox/verl/0dc8e8596/verl/models/llama/megatron/modeling_llama_megatron.py similarity index 100% rename from toolbox/verl/verl/models/llama/megatron/modeling_llama_megatron.py rename to toolbox/verl/0dc8e8596/verl/models/llama/megatron/modeling_llama_megatron.py diff --git a/toolbox/verl/verl/models/qwen2/__init__.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/__init__.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/__init__.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/__init__.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/__init__.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/__init__.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/checkpoint_utils/__init__.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/checkpoint_utils/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/checkpoint_utils/__init__.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/checkpoint_utils/qwen2_loader.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/checkpoint_utils/qwen2_saver.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/layers/__init__.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/__init__.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/layers/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/__init__.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/layers/parallel_attention.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_attention.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/layers/parallel_attention.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_attention.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/layers/parallel_decoder.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_decoder.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/layers/parallel_decoder.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_decoder.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/layers/parallel_linear.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_linear.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/layers/parallel_linear.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_linear.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/layers/parallel_mlp.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_mlp.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/layers/parallel_mlp.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_mlp.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/layers/parallel_rmsnorm.py diff --git a/toolbox/verl/verl/models/qwen2/megatron/modeling_qwen2_megatron.py b/toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/modeling_qwen2_megatron.py similarity index 100% rename from toolbox/verl/verl/models/qwen2/megatron/modeling_qwen2_megatron.py rename to toolbox/verl/0dc8e8596/verl/models/qwen2/megatron/modeling_qwen2_megatron.py diff --git a/toolbox/verl/verl/models/registry.py b/toolbox/verl/0dc8e8596/verl/models/registry.py similarity index 100% rename from toolbox/verl/verl/models/registry.py rename to toolbox/verl/0dc8e8596/verl/models/registry.py diff --git a/toolbox/verl/verl/models/transformers/__init__.py b/toolbox/verl/0dc8e8596/verl/models/transformers/__init__.py similarity index 100% rename from toolbox/verl/verl/models/transformers/__init__.py rename to toolbox/verl/0dc8e8596/verl/models/transformers/__init__.py diff --git a/toolbox/verl/verl/models/transformers/llama.py b/toolbox/verl/0dc8e8596/verl/models/transformers/llama.py similarity index 100% rename from toolbox/verl/verl/models/transformers/llama.py rename to toolbox/verl/0dc8e8596/verl/models/transformers/llama.py diff --git a/toolbox/verl/verl/models/transformers/monkey_patch.py b/toolbox/verl/0dc8e8596/verl/models/transformers/monkey_patch.py similarity index 100% rename from toolbox/verl/verl/models/transformers/monkey_patch.py rename to toolbox/verl/0dc8e8596/verl/models/transformers/monkey_patch.py diff --git a/toolbox/verl/verl/models/transformers/qwen2.py b/toolbox/verl/0dc8e8596/verl/models/transformers/qwen2.py similarity index 100% rename from toolbox/verl/verl/models/transformers/qwen2.py rename to toolbox/verl/0dc8e8596/verl/models/transformers/qwen2.py diff --git a/toolbox/verl/verl/models/weight_loader_registry.py b/toolbox/verl/0dc8e8596/verl/models/weight_loader_registry.py similarity index 100% rename from toolbox/verl/verl/models/weight_loader_registry.py rename to toolbox/verl/0dc8e8596/verl/models/weight_loader_registry.py diff --git a/toolbox/verl/verl/protocol.py b/toolbox/verl/0dc8e8596/verl/protocol.py similarity index 100% rename from toolbox/verl/verl/protocol.py rename to toolbox/verl/0dc8e8596/verl/protocol.py diff --git a/toolbox/verl/verl/single_controller/__init__.py b/toolbox/verl/0dc8e8596/verl/single_controller/__init__.py similarity index 100% rename from toolbox/verl/verl/single_controller/__init__.py rename to toolbox/verl/0dc8e8596/verl/single_controller/__init__.py diff --git a/toolbox/verl/verl/single_controller/base/__init__.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/__init__.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/__init__.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/__init__.py diff --git a/toolbox/verl/verl/single_controller/base/decorator.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/decorator.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/decorator.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/decorator.py diff --git a/toolbox/verl/verl/single_controller/base/megatron/__init__.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/megatron/__init__.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/megatron/__init__.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/megatron/__init__.py diff --git a/toolbox/verl/verl/single_controller/base/megatron/worker.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/megatron/worker.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/megatron/worker.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/megatron/worker.py diff --git a/toolbox/verl/verl/single_controller/base/megatron/worker_group.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/megatron/worker_group.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/megatron/worker_group.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/megatron/worker_group.py diff --git a/toolbox/verl/verl/single_controller/base/register_center/__init__.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/register_center/__init__.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/register_center/__init__.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/register_center/__init__.py diff --git a/toolbox/verl/verl/single_controller/base/register_center/ray.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/register_center/ray.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/register_center/ray.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/register_center/ray.py diff --git a/toolbox/verl/verl/single_controller/base/worker.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/worker.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/worker.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/worker.py diff --git a/toolbox/verl/verl/single_controller/base/worker_group.py b/toolbox/verl/0dc8e8596/verl/single_controller/base/worker_group.py similarity index 100% rename from toolbox/verl/verl/single_controller/base/worker_group.py rename to toolbox/verl/0dc8e8596/verl/single_controller/base/worker_group.py diff --git a/toolbox/verl/verl/single_controller/ray/__init__.py b/toolbox/verl/0dc8e8596/verl/single_controller/ray/__init__.py similarity index 100% rename from toolbox/verl/verl/single_controller/ray/__init__.py rename to toolbox/verl/0dc8e8596/verl/single_controller/ray/__init__.py diff --git a/toolbox/verl/verl/single_controller/ray/base.py b/toolbox/verl/0dc8e8596/verl/single_controller/ray/base.py similarity index 100% rename from toolbox/verl/verl/single_controller/ray/base.py rename to toolbox/verl/0dc8e8596/verl/single_controller/ray/base.py diff --git a/toolbox/verl/verl/single_controller/ray/megatron.py b/toolbox/verl/0dc8e8596/verl/single_controller/ray/megatron.py similarity index 100% rename from toolbox/verl/verl/single_controller/ray/megatron.py rename to toolbox/verl/0dc8e8596/verl/single_controller/ray/megatron.py diff --git a/toolbox/verl/verl/third_party/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_spmd/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_spmd/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_spmd/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_spmd/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_spmd/dtensor_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_spmd/dtensor_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_spmd/dtensor_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_spmd/dtensor_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/arg_utils.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/config.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/config.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/config.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/config.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/llm.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/llm.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/llm.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/llm.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/llm_engine_sp.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/model_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/model_runner.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/parallel_state.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/tokenizer.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/worker.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/worker.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_3_1/worker.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_3_1/worker.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/arg_utils.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/config.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/config.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/config.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/config.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/dtensor_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/hf_weight_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/llm.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/llm.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/llm.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/llm.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/llm_engine_sp.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/megatron_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/model_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/model_runner.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/parallel_state.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/spmd_gpu_executor.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/tokenizer.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/worker.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/worker.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_4_2/worker.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_4_2/worker.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/arg_utils.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/config.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/config.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/config.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/config.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/dtensor_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/hf_weight_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/llm.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/llm.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/llm.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/llm.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/llm_engine_sp.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/megatron_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/model_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/model_runner.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/spmd_gpu_executor.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/tokenizer.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/worker.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/worker.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_5_4/worker.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_5_4/worker.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/__init__.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/__init__.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/__init__.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/arg_utils.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/config.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/config.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/config.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/config.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/dtensor_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/hf_weight_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/llm.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/llm.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/llm.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/llm.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/llm_engine_sp.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/megatron_weight_loaders.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/model_loader.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/model_runner.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/parallel_state.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/spmd_gpu_executor.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/tokenizer.py diff --git a/toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/worker.py b/toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/worker.py similarity index 100% rename from toolbox/verl/verl/third_party/vllm/vllm_v_0_6_3/worker.py rename to toolbox/verl/0dc8e8596/verl/third_party/vllm/vllm_v_0_6_3/worker.py diff --git a/toolbox/verl/verl/trainer/__init__.py b/toolbox/verl/0dc8e8596/verl/trainer/__init__.py similarity index 100% rename from toolbox/verl/verl/trainer/__init__.py rename to toolbox/verl/0dc8e8596/verl/trainer/__init__.py diff --git a/toolbox/verl/verl/trainer/config/evaluation.yaml b/toolbox/verl/0dc8e8596/verl/trainer/config/evaluation.yaml similarity index 100% rename from toolbox/verl/verl/trainer/config/evaluation.yaml rename to toolbox/verl/0dc8e8596/verl/trainer/config/evaluation.yaml diff --git a/toolbox/verl/verl/trainer/config/generation.yaml b/toolbox/verl/0dc8e8596/verl/trainer/config/generation.yaml similarity index 100% rename from toolbox/verl/verl/trainer/config/generation.yaml rename to toolbox/verl/0dc8e8596/verl/trainer/config/generation.yaml diff --git a/toolbox/verl/verl/trainer/config/ppo_megatron_trainer.yaml b/toolbox/verl/0dc8e8596/verl/trainer/config/ppo_megatron_trainer.yaml similarity index 100% rename from toolbox/verl/verl/trainer/config/ppo_megatron_trainer.yaml rename to toolbox/verl/0dc8e8596/verl/trainer/config/ppo_megatron_trainer.yaml diff --git a/toolbox/verl/verl/trainer/config/ppo_trainer.yaml b/toolbox/verl/0dc8e8596/verl/trainer/config/ppo_trainer.yaml similarity index 100% rename from toolbox/verl/verl/trainer/config/ppo_trainer.yaml rename to toolbox/verl/0dc8e8596/verl/trainer/config/ppo_trainer.yaml diff --git a/toolbox/verl/verl/trainer/config/sft_trainer.yaml b/toolbox/verl/0dc8e8596/verl/trainer/config/sft_trainer.yaml similarity index 100% rename from toolbox/verl/verl/trainer/config/sft_trainer.yaml rename to toolbox/verl/0dc8e8596/verl/trainer/config/sft_trainer.yaml diff --git a/toolbox/verl/verl/trainer/fsdp_sft_trainer.py b/toolbox/verl/0dc8e8596/verl/trainer/fsdp_sft_trainer.py similarity index 100% rename from toolbox/verl/verl/trainer/fsdp_sft_trainer.py rename to toolbox/verl/0dc8e8596/verl/trainer/fsdp_sft_trainer.py diff --git a/toolbox/verl/verl/trainer/main_eval.py b/toolbox/verl/0dc8e8596/verl/trainer/main_eval.py similarity index 100% rename from toolbox/verl/verl/trainer/main_eval.py rename to toolbox/verl/0dc8e8596/verl/trainer/main_eval.py diff --git a/toolbox/verl/verl/trainer/main_generation.py b/toolbox/verl/0dc8e8596/verl/trainer/main_generation.py similarity index 100% rename from toolbox/verl/verl/trainer/main_generation.py rename to toolbox/verl/0dc8e8596/verl/trainer/main_generation.py diff --git a/toolbox/verl/verl/trainer/main_ppo.py b/toolbox/verl/0dc8e8596/verl/trainer/main_ppo.py similarity index 100% rename from toolbox/verl/verl/trainer/main_ppo.py rename to toolbox/verl/0dc8e8596/verl/trainer/main_ppo.py diff --git a/toolbox/verl/verl/trainer/ppo/__init__.py b/toolbox/verl/0dc8e8596/verl/trainer/ppo/__init__.py similarity index 100% rename from toolbox/verl/verl/trainer/ppo/__init__.py rename to toolbox/verl/0dc8e8596/verl/trainer/ppo/__init__.py diff --git a/toolbox/verl/verl/trainer/ppo/core_algos.py b/toolbox/verl/0dc8e8596/verl/trainer/ppo/core_algos.py similarity index 100% rename from toolbox/verl/verl/trainer/ppo/core_algos.py rename to toolbox/verl/0dc8e8596/verl/trainer/ppo/core_algos.py diff --git a/toolbox/verl/verl/trainer/ppo/ray_trainer.py b/toolbox/verl/0dc8e8596/verl/trainer/ppo/ray_trainer.py similarity index 100% rename from toolbox/verl/verl/trainer/ppo/ray_trainer.py rename to toolbox/verl/0dc8e8596/verl/trainer/ppo/ray_trainer.py diff --git a/toolbox/verl/verl/trainer/runtime_env.yaml b/toolbox/verl/0dc8e8596/verl/trainer/runtime_env.yaml similarity index 100% rename from toolbox/verl/verl/trainer/runtime_env.yaml rename to toolbox/verl/0dc8e8596/verl/trainer/runtime_env.yaml diff --git a/toolbox/verl/verl/utils/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/__init__.py diff --git a/toolbox/verl/verl/utils/checkpoint/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/checkpoint/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/checkpoint/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/checkpoint/__init__.py diff --git a/toolbox/verl/verl/utils/checkpoint/checkpoint_manager.py b/toolbox/verl/0dc8e8596/verl/utils/checkpoint/checkpoint_manager.py similarity index 100% rename from toolbox/verl/verl/utils/checkpoint/checkpoint_manager.py rename to toolbox/verl/0dc8e8596/verl/utils/checkpoint/checkpoint_manager.py diff --git a/toolbox/verl/verl/utils/checkpoint/fsdp_checkpoint_manager.py b/toolbox/verl/0dc8e8596/verl/utils/checkpoint/fsdp_checkpoint_manager.py similarity index 100% rename from toolbox/verl/verl/utils/checkpoint/fsdp_checkpoint_manager.py rename to toolbox/verl/0dc8e8596/verl/utils/checkpoint/fsdp_checkpoint_manager.py diff --git a/toolbox/verl/verl/utils/config.py b/toolbox/verl/0dc8e8596/verl/utils/config.py similarity index 100% rename from toolbox/verl/verl/utils/config.py rename to toolbox/verl/0dc8e8596/verl/utils/config.py diff --git a/toolbox/verl/verl/utils/dataset/README.md b/toolbox/verl/0dc8e8596/verl/utils/dataset/README.md similarity index 100% rename from toolbox/verl/verl/utils/dataset/README.md rename to toolbox/verl/0dc8e8596/verl/utils/dataset/README.md diff --git a/toolbox/verl/verl/utils/dataset/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/dataset/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/dataset/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/dataset/__init__.py diff --git a/toolbox/verl/verl/utils/dataset/rl_dataset.py b/toolbox/verl/0dc8e8596/verl/utils/dataset/rl_dataset.py similarity index 100% rename from toolbox/verl/verl/utils/dataset/rl_dataset.py rename to toolbox/verl/0dc8e8596/verl/utils/dataset/rl_dataset.py diff --git a/toolbox/verl/verl/utils/dataset/rm_dataset.py b/toolbox/verl/0dc8e8596/verl/utils/dataset/rm_dataset.py similarity index 100% rename from toolbox/verl/verl/utils/dataset/rm_dataset.py rename to toolbox/verl/0dc8e8596/verl/utils/dataset/rm_dataset.py diff --git a/toolbox/verl/verl/utils/dataset/sft_dataset.py b/toolbox/verl/0dc8e8596/verl/utils/dataset/sft_dataset.py similarity index 100% rename from toolbox/verl/verl/utils/dataset/sft_dataset.py rename to toolbox/verl/0dc8e8596/verl/utils/dataset/sft_dataset.py diff --git a/toolbox/verl/verl/utils/debug/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/debug/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/debug/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/debug/__init__.py diff --git a/toolbox/verl/verl/utils/debug/performance.py b/toolbox/verl/0dc8e8596/verl/utils/debug/performance.py similarity index 100% rename from toolbox/verl/verl/utils/debug/performance.py rename to toolbox/verl/0dc8e8596/verl/utils/debug/performance.py diff --git a/toolbox/verl/verl/utils/debug/trajectory_tracker.py b/toolbox/verl/0dc8e8596/verl/utils/debug/trajectory_tracker.py similarity index 100% rename from toolbox/verl/verl/utils/debug/trajectory_tracker.py rename to toolbox/verl/0dc8e8596/verl/utils/debug/trajectory_tracker.py diff --git a/toolbox/verl/verl/utils/distributed.py b/toolbox/verl/0dc8e8596/verl/utils/distributed.py similarity index 100% rename from toolbox/verl/verl/utils/distributed.py rename to toolbox/verl/0dc8e8596/verl/utils/distributed.py diff --git a/toolbox/verl/verl/utils/flops_counter.py b/toolbox/verl/0dc8e8596/verl/utils/flops_counter.py similarity index 100% rename from toolbox/verl/verl/utils/flops_counter.py rename to toolbox/verl/0dc8e8596/verl/utils/flops_counter.py diff --git a/toolbox/verl/verl/utils/fs.py b/toolbox/verl/0dc8e8596/verl/utils/fs.py similarity index 100% rename from toolbox/verl/verl/utils/fs.py rename to toolbox/verl/0dc8e8596/verl/utils/fs.py diff --git a/toolbox/verl/verl/utils/fsdp_utils.py b/toolbox/verl/0dc8e8596/verl/utils/fsdp_utils.py similarity index 100% rename from toolbox/verl/verl/utils/fsdp_utils.py rename to toolbox/verl/0dc8e8596/verl/utils/fsdp_utils.py diff --git a/toolbox/verl/verl/utils/hdfs_io.py b/toolbox/verl/0dc8e8596/verl/utils/hdfs_io.py similarity index 100% rename from toolbox/verl/verl/utils/hdfs_io.py rename to toolbox/verl/0dc8e8596/verl/utils/hdfs_io.py diff --git a/toolbox/verl/verl/utils/import_utils.py b/toolbox/verl/0dc8e8596/verl/utils/import_utils.py similarity index 100% rename from toolbox/verl/verl/utils/import_utils.py rename to toolbox/verl/0dc8e8596/verl/utils/import_utils.py diff --git a/toolbox/verl/verl/utils/logger/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/logger/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/logger/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/logger/__init__.py diff --git a/toolbox/verl/verl/utils/logger/aggregate_logger.py b/toolbox/verl/0dc8e8596/verl/utils/logger/aggregate_logger.py similarity index 100% rename from toolbox/verl/verl/utils/logger/aggregate_logger.py rename to toolbox/verl/0dc8e8596/verl/utils/logger/aggregate_logger.py diff --git a/toolbox/verl/verl/utils/logging_utils.py b/toolbox/verl/0dc8e8596/verl/utils/logging_utils.py similarity index 100% rename from toolbox/verl/verl/utils/logging_utils.py rename to toolbox/verl/0dc8e8596/verl/utils/logging_utils.py diff --git a/toolbox/verl/verl/utils/megatron/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/__init__.py diff --git a/toolbox/verl/verl/utils/megatron/memory.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/memory.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/memory.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/memory.py diff --git a/toolbox/verl/verl/utils/megatron/optimizer.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/optimizer.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/optimizer.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/optimizer.py diff --git a/toolbox/verl/verl/utils/megatron/optimizer_config.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/optimizer_config.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/optimizer_config.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/optimizer_config.py diff --git a/toolbox/verl/verl/utils/megatron/pipeline_parallel.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/pipeline_parallel.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/pipeline_parallel.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/pipeline_parallel.py diff --git a/toolbox/verl/verl/utils/megatron/sequence_parallel.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/sequence_parallel.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/sequence_parallel.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/sequence_parallel.py diff --git a/toolbox/verl/verl/utils/megatron/tensor_parallel.py b/toolbox/verl/0dc8e8596/verl/utils/megatron/tensor_parallel.py similarity index 100% rename from toolbox/verl/verl/utils/megatron/tensor_parallel.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron/tensor_parallel.py diff --git a/toolbox/verl/verl/utils/megatron_utils.py b/toolbox/verl/0dc8e8596/verl/utils/megatron_utils.py similarity index 100% rename from toolbox/verl/verl/utils/megatron_utils.py rename to toolbox/verl/0dc8e8596/verl/utils/megatron_utils.py diff --git a/toolbox/verl/verl/utils/memory_buffer.py b/toolbox/verl/0dc8e8596/verl/utils/memory_buffer.py similarity index 100% rename from toolbox/verl/verl/utils/memory_buffer.py rename to toolbox/verl/0dc8e8596/verl/utils/memory_buffer.py diff --git a/toolbox/verl/verl/utils/model.py b/toolbox/verl/0dc8e8596/verl/utils/model.py similarity index 100% rename from toolbox/verl/verl/utils/model.py rename to toolbox/verl/0dc8e8596/verl/utils/model.py diff --git a/toolbox/verl/verl/utils/py_functional.py b/toolbox/verl/0dc8e8596/verl/utils/py_functional.py similarity index 100% rename from toolbox/verl/verl/utils/py_functional.py rename to toolbox/verl/0dc8e8596/verl/utils/py_functional.py diff --git a/toolbox/verl/verl/utils/ray_utils.py b/toolbox/verl/0dc8e8596/verl/utils/ray_utils.py similarity index 100% rename from toolbox/verl/verl/utils/ray_utils.py rename to toolbox/verl/0dc8e8596/verl/utils/ray_utils.py diff --git a/toolbox/verl/verl/utils/rendezvous/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/rendezvous/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/rendezvous/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/rendezvous/__init__.py diff --git a/toolbox/verl/verl/utils/rendezvous/ray_backend.py b/toolbox/verl/0dc8e8596/verl/utils/rendezvous/ray_backend.py similarity index 100% rename from toolbox/verl/verl/utils/rendezvous/ray_backend.py rename to toolbox/verl/0dc8e8596/verl/utils/rendezvous/ray_backend.py diff --git a/toolbox/verl/verl/utils/reward_score/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/__init__.py diff --git a/toolbox/verl/verl/utils/reward_score/gsm8k.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/gsm8k.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/gsm8k.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/gsm8k.py diff --git a/toolbox/verl/verl/utils/reward_score/kk.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/kk.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/kk.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/kk.py diff --git a/toolbox/verl/verl/utils/reward_score/math.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/math.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/math.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/math.py diff --git a/toolbox/verl/verl/utils/reward_score/prime_code/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_code/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/prime_code/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_code/__init__.py diff --git a/toolbox/verl/verl/utils/reward_score/prime_code/testing_util.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_code/testing_util.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/prime_code/testing_util.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_code/testing_util.py diff --git a/toolbox/verl/verl/utils/reward_score/prime_code/utils.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_code/utils.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/prime_code/utils.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_code/utils.py diff --git a/toolbox/verl/verl/utils/reward_score/prime_math/__init__.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_math/__init__.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/prime_math/__init__.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_math/__init__.py diff --git a/toolbox/verl/verl/utils/reward_score/prime_math/grader.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_math/grader.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/prime_math/grader.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_math/grader.py diff --git a/toolbox/verl/verl/utils/reward_score/prime_math/math_normalize.py b/toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_math/math_normalize.py similarity index 100% rename from toolbox/verl/verl/utils/reward_score/prime_math/math_normalize.py rename to toolbox/verl/0dc8e8596/verl/utils/reward_score/prime_math/math_normalize.py diff --git a/toolbox/verl/verl/utils/seqlen_balancing.py b/toolbox/verl/0dc8e8596/verl/utils/seqlen_balancing.py similarity index 100% rename from toolbox/verl/verl/utils/seqlen_balancing.py rename to toolbox/verl/0dc8e8596/verl/utils/seqlen_balancing.py diff --git a/toolbox/verl/verl/utils/tokenizer.py b/toolbox/verl/0dc8e8596/verl/utils/tokenizer.py similarity index 100% rename from toolbox/verl/verl/utils/tokenizer.py rename to toolbox/verl/0dc8e8596/verl/utils/tokenizer.py diff --git a/toolbox/verl/verl/utils/torch_dtypes.py b/toolbox/verl/0dc8e8596/verl/utils/torch_dtypes.py similarity index 100% rename from toolbox/verl/verl/utils/torch_dtypes.py rename to toolbox/verl/0dc8e8596/verl/utils/torch_dtypes.py diff --git a/toolbox/verl/verl/utils/torch_functional.py b/toolbox/verl/0dc8e8596/verl/utils/torch_functional.py similarity index 100% rename from toolbox/verl/verl/utils/torch_functional.py rename to toolbox/verl/0dc8e8596/verl/utils/torch_functional.py diff --git a/toolbox/verl/verl/utils/tracking.py b/toolbox/verl/0dc8e8596/verl/utils/tracking.py similarity index 100% rename from toolbox/verl/verl/utils/tracking.py rename to toolbox/verl/0dc8e8596/verl/utils/tracking.py diff --git a/toolbox/verl/verl/utils/ulysses.py b/toolbox/verl/0dc8e8596/verl/utils/ulysses.py similarity index 100% rename from toolbox/verl/verl/utils/ulysses.py rename to toolbox/verl/0dc8e8596/verl/utils/ulysses.py diff --git a/toolbox/verl/verl/version/version b/toolbox/verl/0dc8e8596/verl/version/version similarity index 100% rename from toolbox/verl/verl/version/version rename to toolbox/verl/0dc8e8596/verl/version/version diff --git a/toolbox/verl/verl/workers/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/__init__.py diff --git a/toolbox/verl/verl/workers/actor/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/actor/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/actor/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/actor/__init__.py diff --git a/toolbox/verl/verl/workers/actor/base.py b/toolbox/verl/0dc8e8596/verl/workers/actor/base.py similarity index 100% rename from toolbox/verl/verl/workers/actor/base.py rename to toolbox/verl/0dc8e8596/verl/workers/actor/base.py diff --git a/toolbox/verl/verl/workers/actor/dp_actor.py b/toolbox/verl/0dc8e8596/verl/workers/actor/dp_actor.py similarity index 100% rename from toolbox/verl/verl/workers/actor/dp_actor.py rename to toolbox/verl/0dc8e8596/verl/workers/actor/dp_actor.py diff --git a/toolbox/verl/verl/workers/actor/megatron_actor.py b/toolbox/verl/0dc8e8596/verl/workers/actor/megatron_actor.py similarity index 100% rename from toolbox/verl/verl/workers/actor/megatron_actor.py rename to toolbox/verl/0dc8e8596/verl/workers/actor/megatron_actor.py diff --git a/toolbox/verl/verl/workers/critic/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/critic/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/critic/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/critic/__init__.py diff --git a/toolbox/verl/verl/workers/critic/base.py b/toolbox/verl/0dc8e8596/verl/workers/critic/base.py similarity index 100% rename from toolbox/verl/verl/workers/critic/base.py rename to toolbox/verl/0dc8e8596/verl/workers/critic/base.py diff --git a/toolbox/verl/verl/workers/critic/dp_critic.py b/toolbox/verl/0dc8e8596/verl/workers/critic/dp_critic.py similarity index 100% rename from toolbox/verl/verl/workers/critic/dp_critic.py rename to toolbox/verl/0dc8e8596/verl/workers/critic/dp_critic.py diff --git a/toolbox/verl/verl/workers/critic/megatron_critic.py b/toolbox/verl/0dc8e8596/verl/workers/critic/megatron_critic.py similarity index 100% rename from toolbox/verl/verl/workers/critic/megatron_critic.py rename to toolbox/verl/0dc8e8596/verl/workers/critic/megatron_critic.py diff --git a/toolbox/verl/verl/workers/fsdp_workers.py b/toolbox/verl/0dc8e8596/verl/workers/fsdp_workers.py similarity index 100% rename from toolbox/verl/verl/workers/fsdp_workers.py rename to toolbox/verl/0dc8e8596/verl/workers/fsdp_workers.py diff --git a/toolbox/verl/verl/workers/megatron_workers.py b/toolbox/verl/0dc8e8596/verl/workers/megatron_workers.py similarity index 100% rename from toolbox/verl/verl/workers/megatron_workers.py rename to toolbox/verl/0dc8e8596/verl/workers/megatron_workers.py diff --git a/toolbox/verl/verl/workers/reward_manager/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/reward_manager/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/reward_manager/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_manager/__init__.py diff --git a/toolbox/verl/verl/workers/reward_manager/naive.py b/toolbox/verl/0dc8e8596/verl/workers/reward_manager/naive.py similarity index 100% rename from toolbox/verl/verl/workers/reward_manager/naive.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_manager/naive.py diff --git a/toolbox/verl/verl/workers/reward_manager/prime.py b/toolbox/verl/0dc8e8596/verl/workers/reward_manager/prime.py similarity index 100% rename from toolbox/verl/verl/workers/reward_manager/prime.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_manager/prime.py diff --git a/toolbox/verl/verl/workers/reward_model/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/reward_model/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/reward_model/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_model/__init__.py diff --git a/toolbox/verl/verl/workers/reward_model/base.py b/toolbox/verl/0dc8e8596/verl/workers/reward_model/base.py similarity index 100% rename from toolbox/verl/verl/workers/reward_model/base.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_model/base.py diff --git a/toolbox/verl/verl/workers/reward_model/megatron/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/reward_model/megatron/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/reward_model/megatron/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_model/megatron/__init__.py diff --git a/toolbox/verl/verl/workers/reward_model/megatron/reward_model.py b/toolbox/verl/0dc8e8596/verl/workers/reward_model/megatron/reward_model.py similarity index 100% rename from toolbox/verl/verl/workers/reward_model/megatron/reward_model.py rename to toolbox/verl/0dc8e8596/verl/workers/reward_model/megatron/reward_model.py diff --git a/toolbox/verl/verl/workers/rollout/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/__init__.py diff --git a/toolbox/verl/verl/workers/rollout/base.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/base.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/base.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/base.py diff --git a/toolbox/verl/verl/workers/rollout/hf_rollout.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/hf_rollout.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/hf_rollout.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/hf_rollout.py diff --git a/toolbox/verl/verl/workers/rollout/naive/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/naive/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/naive/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/naive/__init__.py diff --git a/toolbox/verl/verl/workers/rollout/naive/naive_rollout.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/naive/naive_rollout.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/naive/naive_rollout.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/naive/naive_rollout.py diff --git a/toolbox/verl/verl/workers/rollout/tokenizer.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/tokenizer.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/tokenizer.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/tokenizer.py diff --git a/toolbox/verl/verl/workers/rollout/vllm_rollout/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/vllm_rollout/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/vllm_rollout/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/vllm_rollout/__init__.py diff --git a/toolbox/verl/verl/workers/rollout/vllm_rollout/vllm_rollout.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/vllm_rollout/vllm_rollout.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/vllm_rollout/vllm_rollout.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/vllm_rollout/vllm_rollout.py diff --git a/toolbox/verl/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py b/toolbox/verl/0dc8e8596/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py similarity index 100% rename from toolbox/verl/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py rename to toolbox/verl/0dc8e8596/verl/workers/rollout/vllm_rollout/vllm_rollout_spmd.py diff --git a/toolbox/verl/verl/workers/sharding_manager/__init__.py b/toolbox/verl/0dc8e8596/verl/workers/sharding_manager/__init__.py similarity index 100% rename from toolbox/verl/verl/workers/sharding_manager/__init__.py rename to toolbox/verl/0dc8e8596/verl/workers/sharding_manager/__init__.py diff --git a/toolbox/verl/verl/workers/sharding_manager/base.py b/toolbox/verl/0dc8e8596/verl/workers/sharding_manager/base.py similarity index 100% rename from toolbox/verl/verl/workers/sharding_manager/base.py rename to toolbox/verl/0dc8e8596/verl/workers/sharding_manager/base.py diff --git a/toolbox/verl/verl/workers/sharding_manager/fsdp_ulysses.py b/toolbox/verl/0dc8e8596/verl/workers/sharding_manager/fsdp_ulysses.py similarity index 100% rename from toolbox/verl/verl/workers/sharding_manager/fsdp_ulysses.py rename to toolbox/verl/0dc8e8596/verl/workers/sharding_manager/fsdp_ulysses.py diff --git a/toolbox/verl/verl/workers/sharding_manager/fsdp_vllm.py b/toolbox/verl/0dc8e8596/verl/workers/sharding_manager/fsdp_vllm.py similarity index 100% rename from toolbox/verl/verl/workers/sharding_manager/fsdp_vllm.py rename to toolbox/verl/0dc8e8596/verl/workers/sharding_manager/fsdp_vllm.py diff --git a/toolbox/verl/verl/workers/sharding_manager/megatron_vllm.py b/toolbox/verl/0dc8e8596/verl/workers/sharding_manager/megatron_vllm.py similarity index 100% rename from toolbox/verl/verl/workers/sharding_manager/megatron_vllm.py rename to toolbox/verl/0dc8e8596/verl/workers/sharding_manager/megatron_vllm.py diff --git a/toolbox/verl/v0.5.0/patches/build_verl.sh b/toolbox/verl/v0.5.0/patches/build_verl.sh new file mode 100644 index 0000000000000000000000000000000000000000..63677059e59c456fd97c2bbd00f82d2bc0fcb1f9 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/build_verl.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +COREX_VERSION=${COREX_VERSION:-latest} +MAX_JOBS=${MAX_JOBS:-$(nproc --all)} +PYTHON_PATH=$(which python3) +${PYTHON_PATH} -c "import torch;print(torch.__version__)" || { + echo "ERROR: building vision requries torch has been installed." + exit 1 +} +PY_VERSION=`${PYTHON_PATH} -V 2>&1|awk '{print $2}'|awk -F '.' '{print $2}'` +OS_ID=$(awk -F= '/^ID=/{print $2}' /etc/os-release | tr -d '"') + +pip3 install -r requirements.txt + +# ${PYTHON_PATH} -m pip install -r requirements_dev.txt || exit + +if [[ "${COREX_VERSION}" == "latest" || -z "${COREX_VERSION}" ]]; then + COREX_VERSION=`date --utc +%Y%m%d%H%M%S` +fi +export VERL_LOCAL_VERSION_IDENTIFIER="corex.${COREX_VERSION}" + +export MAX_JOBS=${MAX_JOBS} + +# ${PYTHON_PATH} setup.py build 2>&1 | tee compile.log; [[ ${PIPESTATUS[0]} == 0 ]] || exit + +${PYTHON_PATH} setup.py bdist_wheel -d build_pip || exit + +# Return 0 status if all finished +exit 0 \ No newline at end of file diff --git a/toolbox/verl/v0.5.0/patches/clean_verl.sh b/toolbox/verl/v0.5.0/patches/clean_verl.sh new file mode 100644 index 0000000000000000000000000000000000000000..4805313d5c07b8689b5f2e6106c5031d5961fb7c --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/clean_verl.sh @@ -0,0 +1,8 @@ +#!/bin/bash +pip3 uninstall verl -y || true + +rm -rf build +rm -rf build_pip +rm -rf verl.egg-info +# Return 0 status if all finished +exit 0 \ No newline at end of file diff --git a/toolbox/verl/v0.5.0/patches/examples/grpo_trainer/run_qwen2-7b.sh b/toolbox/verl/v0.5.0/patches/examples/grpo_trainer/run_qwen2-7b.sh new file mode 100644 index 0000000000000000000000000000000000000000..0714b90f47e8ea1d385cdca3de8a8fe4c68d59b8 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/examples/grpo_trainer/run_qwen2-7b.sh @@ -0,0 +1,49 @@ +set -x + + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$PATH_DATASETS/gsm8k/train.parquet \ + data.val_files=$PATH_DATASETS/gsm8k/test.parquet \ + data.train_batch_size=16 \ + data.max_prompt_length=512 \ + data.max_response_length=1024 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + actor_rollout_ref.model.path=$PATH_MODEL/Qwen2-7B-Instruct \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=16 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.001 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=40 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.free_cache_engine=False \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \ + actor_rollout_ref.rollout.n=8 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=40 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger=['console'] \ + trainer.project_name='verl_grpo_example_gsm8k' \ + trainer.experiment_name='qwen2_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=100 \ + trainer.total_epochs=1 $@ diff --git a/toolbox/verl/v0.5.0/patches/examples/grpo_trainer/run_qwen2_5_vl-7b.sh b/toolbox/verl/v0.5.0/patches/examples/grpo_trainer/run_qwen2_5_vl-7b.sh new file mode 100644 index 0000000000000000000000000000000000000000..10937ffde744d60f4d3e4b3802293a2ff2873a2b --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/examples/grpo_trainer/run_qwen2_5_vl-7b.sh @@ -0,0 +1,52 @@ +set -x + +HOME=$(cd "$(dirname "$0")"; pwd) +echo "HOME:$HOME" +PATH_DATASETS=/home/datasets/verl +PATH_MODEL=/home/model_zoos/verl + +export VLLM_USE_V1=0 + +python3 -m verl.trainer.main_ppo \ + algorithm.adv_estimator=grpo \ + data.train_files=$PATH_DATASETS/geo3k/train.parquet \ + data.val_files=$PATH_DATASETS/geo3k/test.parquet \ + data.train_batch_size=16 \ + data.max_prompt_length=1024 \ + data.max_response_length=2048 \ + data.filter_overlong_prompts=True \ + data.truncation='error' \ + data.image_key=images \ + actor_rollout_ref.model.path=$PATH_MODEL/Qwen2.5-VL-7B-Instruct \ + actor_rollout_ref.actor.optim.lr=1e-6 \ + actor_rollout_ref.model.use_remove_padding=True \ + actor_rollout_ref.actor.ppo_mini_batch_size=16 \ + actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \ + actor_rollout_ref.actor.use_kl_loss=True \ + actor_rollout_ref.actor.kl_loss_coef=0.01 \ + actor_rollout_ref.actor.kl_loss_type=low_var_kl \ + actor_rollout_ref.actor.entropy_coeff=0 \ + actor_rollout_ref.model.enable_gradient_checkpointing=True \ + actor_rollout_ref.actor.fsdp_config.param_offload=True \ + actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \ + actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=20 \ + actor_rollout_ref.rollout.tensor_model_parallel_size=4 \ + actor_rollout_ref.rollout.name=vllm \ + actor_rollout_ref.rollout.engine_kwargs.vllm.disable_mm_preprocessor_cache=True \ + actor_rollout_ref.rollout.gpu_memory_utilization=0.3 \ + actor_rollout_ref.rollout.enable_chunked_prefill=False \ + actor_rollout_ref.rollout.enforce_eager=True \ + actor_rollout_ref.rollout.free_cache_engine=True \ + actor_rollout_ref.rollout.n=8 \ + actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=20 \ + actor_rollout_ref.ref.fsdp_config.param_offload=True \ + algorithm.use_kl_in_reward=False \ + trainer.critic_warmup=0 \ + trainer.logger='["console","wandb"]' \ + trainer.project_name='verl_grpo_example_geo3k' \ + trainer.experiment_name='qwen2_5_vl_7b_function_rm' \ + trainer.n_gpus_per_node=16 \ + trainer.nnodes=1 \ + trainer.save_freq=-1 \ + trainer.test_freq=100 \ + trainer.total_epochs=1 $@ diff --git a/toolbox/verl/v0.5.0/patches/install_verl.sh b/toolbox/verl/v0.5.0/patches/install_verl.sh new file mode 100644 index 0000000000000000000000000000000000000000..4c8f6dbb265b8c83a6fce0f7df3d402c93f506a1 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/install_verl.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +TARGET_DIR=${TARGET_DIR:-} + +PYTHON_PATH=$(which python3) +PYTHON_DIST_PATH=${TARGET_DIR}/lib/python3/dist-packages + +PKG_DIR="build_pip" +PKG_NAME="verl" + +if [[ ! -d ${PKG_DIR} ]]; then + echo "ERROR: Package directory ${PKG_DIR} doesn't exist" + exit 1 +fi + +latest_pkg="$(ls -t ${PKG_DIR} | grep ${PKG_NAME} | head -1)" +if [[ "${latest_pkg}" == "" ]]; then + echo "ERROR: Cannot find latest ${PKG_NAME} package" + exit 1 +else + echo "INFO: Found latest package ${latest_pkg} in directory ${PKG_DIR}" +fi + +${PYTHON_PATH} -m pip uninstall ${PKG_NAME} -y +${PYTHON_PATH} -m pip install ${PKG_DIR}/${latest_pkg} || exit + +# Return 0 status if all finished +exit 0 \ No newline at end of file diff --git a/toolbox/verl/v0.5.0/patches/pyproject.toml b/toolbox/verl/v0.5.0/patches/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..78273d8ee5fadd59574a23364ff6e3d2dfd7c397 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/pyproject.toml @@ -0,0 +1,91 @@ +# ------------------------------- +# build-system +# ------------------------------- +[build-system] +requires = [ + "setuptools>=61.0", + "wheel" +] +build-backend = "setuptools.build_meta" + +# ------------------------------- +# project (PEP 621 metadata) +# ------------------------------- +[project] +name = "verl" +# We'll mark the version as "dynamic" because it's read from the file "verl/version/version" +# (PEP 621 calls this "dynamic version"). +# The actual version is specified in the [tool.setuptools.dynamic] section below. +dynamic = ["version", "dependencies", "optional-dependencies", "urls"] + +description = "verl: Volcano Engine Reinforcement Learning for LLM" +license = {text = "Apache-2.0"} # Changed from file to text format +readme = {file = "README.md", content-type = "text/markdown"} +requires-python = ">=3.10" + +# ------------------------------- +# tool.ruff - Linting configuration +# ------------------------------- +[tool.ruff] +# Note: While the formatter will attempt to format lines such that they remain within the line-length, +# it isn't a hard upper bound, and formatted lines may exceed the line-length. +line-length = 120 +exclude = ["tests/workers/rollout/test_sglang_async_rollout_sf_tools.py", "scripts/legacy_model_merger.py"] + +[tool.ruff.lint] +isort = {known-first-party = ["verl"]} +# c.f. https://github.com/vllm-project/vllm/blob/ce8d6b75fc0586045df75ee1568a5b5f9957251b/pyproject.toml +select = [ + # pycodestyle + "E", + # Pyflakes + "F", + # pyupgrade + "UP", + # flake8-bugbear + "B", + # isort + "I", + "G", +] +ignore = [ + # star imports + "F405", "F403", + # lambda expression assignment + "E731", + # Loop control variable not used within loop body + "B007", + # f-string format + "UP032", + # `.log()` statement uses f-string + "G004", + # X | None for type annotations + "UP045", + # deprecated import + "UP035", +] + +# ------------------------------- +# tool.setuptools - Additional config +# ------------------------------- +[tool.setuptools] +# True means `setuptools` will attempt to include all relevant files in package_data automatically. +# This corresponds to `include_package_data=True` in setup.py. +include-package-data = true + +# We read the version from a file in 'verl/version/version' +[tool.setuptools.dynamic] +version = {file = "verl/version/version"} + +# If you need to mimic `package_dir={'': '.'}`: +[tool.setuptools.package-dir] +"" = "." + +# If you need to include specific non-Python data (like YAML files or version file): +# This is the rough equivalent of package_data={'': ['version/*'], 'verl': ['trainer/config/*.yaml']} +[tool.setuptools.package-data] +verl = [ + "version/*", + "trainer/config/*.yaml", + "trainer/config/*/*.yaml", +] diff --git a/toolbox/verl/v0.5.0/patches/requirements.txt b/toolbox/verl/v0.5.0/patches/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..72bbe7e48ca267b2f63c68c660dd9af86fd9ffc7 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/requirements.txt @@ -0,0 +1,27 @@ +# requirements.txt records the full set of dependencies for development +accelerate +codetiming +datasets +dill +#flash-attn +hydra-core +#liger-kernel +numpy==1.26.4 +pandas +peft +pyarrow>=19.0.0 +pybind11 +pylatexenc +pre-commit +ray[default] +tensordict==0.6.2 +torchdata +#transformers==4.52.0 +# vllm==0.9.1 +wandb +packaging>=20.0 +uvicorn +fastapi +latex2sympy2_extended +math_verify +mathruler \ No newline at end of file diff --git a/toolbox/verl/v0.5.0/patches/setup.py b/toolbox/verl/v0.5.0/patches/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..1671d84882d40bebba7d601c8818950ba7f6553f --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/setup.py @@ -0,0 +1,99 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# setup.py is the fallback installation script when pyproject.toml does not work +import os +from pathlib import Path + +from setuptools import find_packages, setup + +version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__))) + +with open(os.path.join(version_folder, "verl/version/version")) as f: + __version__ = f.read().strip() + +install_requires = [ + "accelerate", + "codetiming", + "datasets", + "dill", + "hydra-core", + "numpy<2.0.0", + "pandas", + "peft", + "pyarrow>=19.0.0", + "pybind11", + "pylatexenc", + "ray[default]>=2.41.0", + "torchdata", + "tensordict>=0.8.0,<=0.9.1,!=0.9.0", + "transformers", + "wandb", + "packaging>=20.0", +] + +TEST_REQUIRES = ["pytest", "pre-commit", "py-spy", "pytest-asyncio"] +PRIME_REQUIRES = ["pyext"] +GEO_REQUIRES = ["mathruler", "torchvision", "qwen_vl_utils"] +GPU_REQUIRES = ["liger-kernel", "flash-attn"] +MATH_REQUIRES = ["math-verify"] # Add math-verify as an optional dependency +VLLM_REQUIRES = ["tensordict>=0.8.0,<=0.9.1,!=0.9.0", "vllm>=0.7.3,<=0.8.5"] +SGLANG_REQUIRES = [ + "tensordict>=0.8.0,<=0.9.1,!=0.9.0", + "sglang[srt,openai]==0.4.6.post5", + "torch-memory-saver>=0.0.5", + "torch==2.6.0", +] +TRL_REQUIRES = ["trl<=0.9.6"] +MCORE_REQUIRES = ["mbridge"] + +extras_require = { + "test": TEST_REQUIRES, + "prime": PRIME_REQUIRES, + "geo": GEO_REQUIRES, + "gpu": GPU_REQUIRES, + "math": MATH_REQUIRES, + "vllm": VLLM_REQUIRES, + "sglang": SGLANG_REQUIRES, + "trl": TRL_REQUIRES, + "mcore": MCORE_REQUIRES, +} + + +this_directory = Path(__file__).parent +long_description = (this_directory / "README.md").read_text() + +if "VERL_LOCAL_VERSION_IDENTIFIER" in os.environ: + __version__ += "+" + str(os.environ['VERL_LOCAL_VERSION_IDENTIFIER']) + +setup( + name="verl", + version=__version__, + package_dir={"": "."}, + packages=find_packages(where="."), + # url="https://github.com/volcengine/verl", + # license="Apache 2.0", + # author="Bytedance - Seed - MLSys", + # author_email="zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk", + # description="verl: Volcano Engine Reinforcement Learning for LLM", + # install_requires=install_requires, + extras_require=extras_require, + package_data={ + "": ["version/*"], + "verl": ["trainer/config/**/*", "trainer/config/*"], + }, + include_package_data=True, + long_description=long_description, + long_description_content_type="text/markdown", +) diff --git a/toolbox/verl/v0.5.0/patches/verl/third_party/vllm/__init__.py b/toolbox/verl/v0.5.0/patches/verl/third_party/vllm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d0fdeb208cbe50976bdc34be3f7564b2ada29dfb --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/verl/third_party/vllm/__init__.py @@ -0,0 +1,59 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from importlib.metadata import PackageNotFoundError, version + +from packaging import version as vs + +from verl.utils.import_utils import is_sglang_available + + +def get_version(pkg): + try: + ver = version(pkg) + if "+" in ver: + return ver.split("+")[0] + else: + return version(pkg) + except PackageNotFoundError: + return None + + +package_name = "vllm" +package_version = get_version(package_name) +vllm_version = None + +if package_version is None: + if not is_sglang_available(): + raise ValueError( + f"vllm version {package_version} not supported and SGLang also not Found. Currently supported " + f"vllm versions are 0.7.0+" + ) +elif vs.parse(package_version) >= vs.parse("0.7.0"): + vllm_version = package_version + from vllm import LLM + from vllm.distributed import parallel_state +else: + if vs.parse(package_version) in [vs.parse("0.5.4"), vs.parse("0.6.3")]: + raise ValueError( + f"vLLM version {package_version} support has been removed. vLLM 0.5.4 and 0.6.3 are no longer " + f"supported. Please use vLLM 0.7.0 or later." + ) + if not is_sglang_available(): + raise ValueError( + f"vllm version {package_version} not supported and SGLang also not Found. Currently supported " + f"vllm versions are 0.7.0+" + ) + +__all__ = ["LLM", "parallel_state"] diff --git a/toolbox/verl/v0.5.0/patches/verl/trainer/main_ppo.py b/toolbox/verl/v0.5.0/patches/verl/trainer/main_ppo.py new file mode 100644 index 0000000000000000000000000000000000000000..75ddaa6211bf2f959e894927a00d71e65992a7f2 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/verl/trainer/main_ppo.py @@ -0,0 +1,338 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Note that we don't combine the main with ray_trainer as ray_trainer is used by other main. +""" + +import os +import socket + +import hydra +import ray +from omegaconf import OmegaConf + +from verl.experimental.dataset.sampler import AbstractSampler +from verl.trainer.constants_ppo import get_ppo_ray_runtime_env +from verl.trainer.ppo.ray_trainer import RayPPOTrainer +from verl.trainer.ppo.reward import load_reward_manager +from verl.utils.device import is_cuda_available +from verl.utils.import_utils import load_extern_type + + +@hydra.main(config_path="config", config_name="ppo_trainer", version_base=None) +def main(config): + """Main entry point for PPO training with Hydra configuration management. + + Args: + config_dict: Hydra configuration dictionary containing training parameters. + """ + run_ppo(config) + + +# Define a function to run the PPO-like training process +def run_ppo(config) -> None: + """Initialize Ray cluster and run distributed PPO training process. + + Args: + config: Training configuration object containing all necessary parameters + for distributed PPO training including Ray initialization settings, + model paths, and training hyperparameters. + """ + # Check if Ray is not initialized + if not ray.is_initialized(): + # Initialize Ray with a local cluster configuration + # Set environment variables in the runtime environment to control tokenizer parallelism, + # NCCL debug level, VLLM logging level, and allow runtime LoRA updating + # `num_cpus` specifies the number of CPU cores Ray can use, obtained from the configuration + ray.init( + runtime_env=get_ppo_ray_runtime_env(), + num_cpus=config.ray_init.num_cpus, + num_gpus=config.trainer.n_gpus_per_node, + ) + + # Create a remote instance of the TaskRunner class, and + # Execute the `run` method of the TaskRunner instance remotely and wait for it to complete + if ( + is_cuda_available + and config.trainer.get("profile_steps") is not None + and len(config.trainer.get("profile_steps", [])) > 0 + ): + nsight_options = OmegaConf.to_container(config.trainer.controller_nsight_options) + runner = TaskRunner.options(runtime_env={"nsight": nsight_options}).remote() + else: + runner = TaskRunner.remote() + ray.get(runner.run.remote(config)) + + # [Optional] get the path of the timeline trace file from the configuration, default to None + # This file is used for performance analysis + timeline_json_file = config.ray_init.get("timeline_json_file", None) + if timeline_json_file: + ray.timeline(filename=timeline_json_file) + + +@ray.remote(num_cpus=1) # please make sure main_task is not scheduled on head +class TaskRunner: + """Ray remote class for executing distributed PPO training tasks. + + This class encapsulates the main training logic and runs as a Ray remote actor + to enable distributed execution across multiple nodes and GPUs. + """ + + def run(self, config): + """Execute the main PPO training workflow. + + This method sets up the distributed training environment, initializes + workers, datasets, and reward functions, then starts the training process. + + Args: + config: Training configuration object containing all parameters needed + for setting up and running the PPO training process. + """ + # Print the initial configuration. `resolve=True` will evaluate symbolic values. + from pprint import pprint + + from omegaconf import OmegaConf + + from verl.utils.fs import copy_to_local + + print(f"TaskRunner hostname: {socket.gethostname()}, PID: {os.getpid()}") + pprint(OmegaConf.to_container(config, resolve=True)) + OmegaConf.resolve(config) + + # Download the checkpoint from HDFS to the local machine. + # `use_shm` determines whether to use shared memory, which could lead to faster model loading if turned on + local_path = copy_to_local( + config.actor_rollout_ref.model.path, use_shm=config.actor_rollout_ref.model.get("use_shm", False) + ) + + # Instantiate the tokenizer and processor. + from verl.utils import hf_processor, hf_tokenizer + + trust_remote_code = config.data.get("trust_remote_code", False) + tokenizer = hf_tokenizer(local_path, trust_remote_code=trust_remote_code) + # Used for multimodal LLM, could be None + processor = hf_processor(local_path, trust_remote_code=trust_remote_code, use_fast=True) + + # Define worker classes based on the actor strategy. + if config.actor_rollout_ref.actor.strategy in {"fsdp", "fsdp2"}: + assert config.critic.strategy in {"fsdp", "fsdp2"} + from verl.single_controller.ray import RayWorkerGroup + from verl.workers.fsdp_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker + + use_legacy_worker_impl = config.trainer.get("use_legacy_worker_impl", "auto") + if use_legacy_worker_impl in ["auto", "enable"]: + # import warnings + # warnings.warn(f"Legacy worker impl is going to be deprecated, will be removed in the future. \ + # Please set trainer.use_legacy_worker_impl = false to switch to the new worker implementation.") + from verl.workers.fsdp_workers import CriticWorker + elif use_legacy_worker_impl == "disable": + from verl.workers.roles import CriticWorker + + print("Using new worker implementation") + else: + raise ValueError(f"Invalid use_legacy_worker_impl: {use_legacy_worker_impl}") + + actor_rollout_cls = ( + AsyncActorRolloutRefWorker + if config.actor_rollout_ref.rollout.mode == "async" + else ActorRolloutRefWorker + ) + ray_worker_group_cls = RayWorkerGroup + + elif config.actor_rollout_ref.actor.strategy == "megatron": + assert config.actor_rollout_ref.actor.strategy == config.critic.strategy + from verl.single_controller.ray.megatron import NVMegatronRayWorkerGroup + from verl.workers.megatron_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker, CriticWorker + + actor_rollout_cls = ( + AsyncActorRolloutRefWorker + if config.actor_rollout_ref.rollout.mode == "async" + else ActorRolloutRefWorker + ) + ray_worker_group_cls = NVMegatronRayWorkerGroup + + else: + raise NotImplementedError + + from verl.trainer.ppo.ray_trainer import ResourcePoolManager, Role + + # Map roles to their corresponding remote worker classes. + role_worker_mapping = { + Role.ActorRollout: ray.remote(actor_rollout_cls), + Role.Critic: ray.remote(CriticWorker), + } + + # Define the resource pool specification. + # Map roles to the resource pool. + global_pool_id = "global_pool" + resource_pool_spec = { + global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes, + } + mapping = { + Role.ActorRollout: global_pool_id, + Role.Critic: global_pool_id, + } + + # We should adopt a multi-source reward function here: + # - for rule-based rm, we directly call a reward score + # - for model-based rm, we call a model + # - for code related prompt, we send to a sandbox if there are test cases + # finally, we combine all the rewards together + # The reward type depends on the tag of the data + if config.reward_model.enable: + if config.reward_model.strategy in {"fsdp", "fsdp2"}: + from verl.workers.fsdp_workers import RewardModelWorker + elif config.reward_model.strategy == "megatron": + from verl.workers.megatron_workers import RewardModelWorker + else: + raise NotImplementedError + role_worker_mapping[Role.RewardModel] = ray.remote(RewardModelWorker) + mapping[Role.RewardModel] = global_pool_id + + # Add a reference policy worker if KL loss or KL reward is used. + if config.algorithm.use_kl_in_reward or config.actor_rollout_ref.actor.use_kl_loss: + role_worker_mapping[Role.RefPolicy] = ray.remote(ActorRolloutRefWorker) + mapping[Role.RefPolicy] = global_pool_id + + # Load the reward manager for training and validation. + reward_fn = load_reward_manager( + config, tokenizer, num_examine=0, **config.reward_model.get("reward_kwargs", {}) + ) + val_reward_fn = load_reward_manager( + config, tokenizer, num_examine=1, **config.reward_model.get("reward_kwargs", {}) + ) + resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=mapping) + + from verl.utils.dataset.rl_dataset import collate_fn + + # Create training and validation datasets. + train_dataset = create_rl_dataset(config.data.train_files, config.data, tokenizer, processor, is_train=True) + val_dataset = create_rl_dataset(config.data.val_files, config.data, tokenizer, processor, is_train=False) + train_sampler = create_rl_sampler(config.data, train_dataset) + + # Initialize the PPO trainer. + trainer = RayPPOTrainer( + config=config, + tokenizer=tokenizer, + processor=processor, + role_worker_mapping=role_worker_mapping, + resource_pool_manager=resource_pool_manager, + ray_worker_group_cls=ray_worker_group_cls, + reward_fn=reward_fn, + val_reward_fn=val_reward_fn, + train_dataset=train_dataset, + val_dataset=val_dataset, + collate_fn=collate_fn, + train_sampler=train_sampler, + ) + # Initialize the workers of the trainer. + trainer.init_workers() + # Start the training process. + trainer.fit() + + +def create_rl_dataset(data_paths, data_config, tokenizer, processor, is_train=True): + """Create a dataset. + + Arguments: + data_paths: List of paths to data files. + data_config: The data config. + tokenizer (Tokenizer): The tokenizer. + processor (Processor): The processor. + + Returns: + dataset (Dataset): The dataset. + """ + from torch.utils.data import Dataset + + from verl.utils.dataset.rl_dataset import RLHFDataset + + # Check if a custom dataset class is specified in the data configuration + # and if the path to the custom class is provided + if "custom_cls" in data_config and data_config.custom_cls.get("path", None) is not None: + # Dynamically load the custom dataset class + dataset_cls = load_extern_type(data_config.custom_cls.path, data_config.custom_cls.name) + # Verify that the custom dataset class inherits from torch.utils.data.Dataset + if not issubclass(dataset_cls, Dataset): + raise TypeError( + f"The custom dataset class '{data_config.custom_cls.name}' from " + f"'{data_config.custom_cls.path}' must inherit from torch.utils.data.Dataset" + ) + elif "datagen" in data_config and data_config.datagen.get("path", None) is not None and is_train: + # If a data generation strategy is specified, use the DynamicGenDataset class + from verl.utils.dataset.dynamicgen_dataset import DynamicGenDataset + + dataset_cls = DynamicGenDataset + print("Using DynamicGenDataset for data generation.") + + else: + # Use the default RLHFDataset class if no custom class is specified + dataset_cls = RLHFDataset + print(f"Using dataset class: {dataset_cls.__name__}") + + # Instantiate the dataset using the determined dataset class + dataset = dataset_cls( + data_files=data_paths, + tokenizer=tokenizer, + processor=processor, + config=data_config, + ) + + return dataset + + +def create_rl_sampler(data_config, dataset): + """Create a sampler for the dataset. + + Arguments: + data_config: The data config. + dataset (Dataset): The dataset. + + Returns: + sampler (Sampler): The sampler. + """ + import torch + from torch.utils.data import RandomSampler, SequentialSampler + + if data_config.sampler is not None and data_config.sampler.get("class_path", None) is not None: + curriculum_class = load_extern_type( + data_config.sampler.class_path, + data_config.sampler.class_name, + ) + sampler = curriculum_class( + data_source=dataset, + data_config=data_config, + ) + assert isinstance(sampler, AbstractSampler) + assert data_config.get("dataloader_num_workers", 8) == 0, ( + "If using curriculum, num_workers must be 0 to prevent data caching. " + "If the dataloader caches data before the batch is done the " + "curriculum sampler won't have the opportunity to reorder it. " + ) + + # Use a sampler to facilitate checkpoint resumption. + # If shuffling is enabled in the data configuration, create a random sampler. + elif data_config.shuffle: + train_dataloader_generator = torch.Generator() + train_dataloader_generator.manual_seed(data_config.get("seed", 1)) + sampler = RandomSampler(data_source=dataset, generator=train_dataloader_generator) + else: + # If shuffling is disabled, use a sequential sampler to iterate through the dataset in order. + sampler = SequentialSampler(data_source=dataset) + + return sampler + + +if __name__ == "__main__": + main() diff --git a/toolbox/verl/v0.5.0/patches/verl/workers/rollout/vllm_rollout/__init__.py b/toolbox/verl/v0.5.0/patches/verl/workers/rollout/vllm_rollout/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..28da1086c1a611ac90784099599952d4ed4abb22 --- /dev/null +++ b/toolbox/verl/v0.5.0/patches/verl/workers/rollout/vllm_rollout/__init__.py @@ -0,0 +1,46 @@ +# Copyright 2024 Bytedance Ltd. and/or its affiliates +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from importlib.metadata import PackageNotFoundError, version + +from .vllm_rollout_spmd import vLLMAsyncRollout, vLLMRollout # noqa: F401 + + +def get_version(pkg): + try: + ver = version(pkg) + if "+" in ver: + return ver.split("+")[0] + else: + return version(pkg) + except PackageNotFoundError: + return None + + +vllm_package_name = "vllm" +vllm_package_version = get_version(vllm_package_name) +if vllm_package_version is None: + raise PackageNotFoundError( + "To use vllm rollout, please ensure the 'vllm' package is properly installed. See " + "https://verl.readthedocs.io/en/latest/start/install.html for more details" + ) + +if "ROCM_PATH" in os.environ: + import re + + match = re.match(r"(\d+\.\d+\.?\d*)", vllm_package_version) + if match: + vllm_package_version = match.group(1) + else: + raise ValueError(f"Warning: Could not parse version format: {vllm_package_version}")