diff --git a/README.md b/README.md
index c561791d46e7a3962287222c49e52221b95b6d27..7b11611b380e92196d1055f783e56d7f0ca6b7a8 100644
--- a/README.md
+++ b/README.md
@@ -7,30 +7,30 @@ DeepSparkHub甄选上百个应用算法和模型，覆盖AI和通用计算各领
 
 ### LLM (Large Language Model)
 
-| Model                                                       | Framework | ToolBox            | Dataset/Weight        |
-|-------------------------------------------------------------|-----------|--------------------|-----------------------|
-| [Aquila2-34B](nlp/llm/aquila2-34b/megatron-deepspeed)       | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
-| [Baichuan2-7B](nlp/llm/baichuan2-7b/baichuan2)              | PyTorch   | DeepSpeed          | baichuan2-7b-base     |
-| [Bloom-7B1](nlp/llm/bloom-7b1/firefly)                      | PyTorch   | Firefly            | school_math_0.25M     |
-| [ChatGLM-6B](nlp/llm/chatglm-6b/deepspeed)                  | PyTorch   | DeepSpeed          | ADGEN & chatglm-6b    |
-| [ChatGLM2-6B SFT](nlp/llm/chatglm2-6b-sft)                  | PyTorch   | DeepSpeed          | ADGEN & chatglm2-6b   |
-| [ChatGLM3-6B](nlp/llm/chatglm3-6b/deepspeed/finetune_demo)  | PyTorch   | DeepSpeed          | ADGEN & chatglm3-6b   |
-| [DeepSeekMoE 7B](nlp/llm/deepseek_moe_7b/colossalai)        | PyTorch   | ColossalAI         | deepseek-moe-16b-base |
-| [Llama-7B](nlp/llm/llama-7b/colossalai)                     | PyTorch   | ColossalAI         | llama-7b-hf           |
-| [Llama2-7B](nlp/llm/llama2-7b/megatron-deepspeed)           | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
-| [Llama2-7B RMF](nlp/llm/llama2-7b_reward_sft/deepspeed)     | PyTorch   | DeepSpeed          | Dahoas/rm-static      |
-| [Llama2-7B RLHF](nlp/llm/llama2-7b_rlhf/megatron-deepspeed) | PyTorch   | Megatron-DeepSpeed | llama2-7b&tiny-llama  |
-| [Llama2-7B SFT](nlp/llm/llama2-7b_sft/megatron-deepspeed)   | PyTorch   | Megatron-DeepSpeed | GPT Small-117M        |
-| [Llama2-13B](nlp/llm/llama2-13b/megatron-deepspeed)         | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
-| [Llama2-34B](nlp/llm/llama2-34b/megatron-deepspeed)         | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
-| [Llama3-8B](nlp/llm/llama3_8b/megatron-deepspeed)           | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
-| [Llama3-8B SFT](nlp/llm/llama3_8b/colossalai)               | PyTorch   | ColossalAI         | school_math_0.25M     |
-| [Mamba-2](nlp/llm/mamba-2/megatron-lm)                      | PyTorch   | Megatron-LM        | GPT Small-117M        |
-| [Mixtral 8x7B](nlp/llm/mixtral/megatron-lm)                 | PyTorch   | Megatron-LM        | GPT Small-117M        |
-| [QWen-7B](nlp/llm/qwen-7b/firefly)                          | PyTorch   | Firefly            | qwen-7b               |
-| [QWen1.5-7B](nlp/llm/qwen1.5-7b/firefly)                    | PyTorch   | Firefly            | school_math           |
-| [QWen1.5-14B](nlp/llm/qwen1.5-14b/firefly)                  | PyTorch   | Firefly            | school_math           |
-| [Qwen2.5-7B SFT](nlp/llm/qwen2.5-7b/llama-factory)          | PyTorch   | LLaMA-Factory      | qwen2.5-7b            |
+| Model                                                 | Framework | ToolBox            | Dataset/Weight        |
+|-------------------------------------------------------|-----------|--------------------|-----------------------|
+| [Aquila2-34B](nlp/llm/aquila2-34b/pytorch)            | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
+| [Baichuan2-7B](nlp/llm/baichuan2-7b/pytorch)          | PyTorch   | DeepSpeed          | baichuan2-7b-base     |
+| [Bloom-7B1](nlp/llm/bloom-7b1/pytorch)                | PyTorch   | Firefly            | school_math_0.25M     |
+| [ChatGLM-6B](nlp/llm/chatglm-6b/pytorch)              | PyTorch   | DeepSpeed          | ADGEN & chatglm-6b    |
+| [ChatGLM2-6B SFT](nlp/llm/chatglm2-6b-sft/pytorch)    | PyTorch   | DeepSpeed          | ADGEN & chatglm2-6b   |
+| [ChatGLM3-6B](nlp/llm/chatglm3-6b/pytorch)            | PyTorch   | DeepSpeed          | ADGEN & chatglm3-6b   |
+| [DeepSeekMoE 7B](nlp/llm/deepseek_moe_7b/pytorch)     | PyTorch   | ColossalAI         | deepseek-moe-16b-base |
+| [Llama-7B](nlp/llm/llama-7b/pytorch)                  | PyTorch   | ColossalAI         | llama-7b-hf           |
+| [Llama2-7B](nlp/llm/llama2-7b/pytorch)                | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
+| [Llama2-7B RMF](nlp/llm/llama2-7b_reward_sft/pytorch) | PyTorch   | DeepSpeed          | Dahoas/rm-static      |
+| [Llama2-7B RLHF](nlp/llm/llama2-7b_rlhf/pytorch)      | PyTorch   | Megatron-DeepSpeed | llama2-7b&tiny-llama  |
+| [Llama2-7B SFT](nlp/llm/llama2-7b_sft/pytorch)        | PyTorch   | Megatron-DeepSpeed | GPT Small-117M        |
+| [Llama2-13B](nlp/llm/llama2-13b/pytorch)              | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
+| [Llama2-34B](nlp/llm/llama2-34b/pytorch)              | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
+| [Llama3-8B](nlp/llm/llama3_8b/pytorch)                | PyTorch   | Megatron-DeepSpeed | Bookcorpus            |
+| [Llama3-8B SFT](nlp/llm/llama3_8b_sft/pytorch)        | PyTorch   | ColossalAI         | school_math_0.25M     |
+| [Mamba-2](nlp/llm/mamba-2/pytorch)                    | PyTorch   | Megatron-LM        | GPT Small-117M        |
+| [Mixtral 8x7B](nlp/llm/mixtral/pytorch)               | PyTorch   | Megatron-LM        | GPT Small-117M        |
+| [QWen-7B](nlp/llm/qwen-7b/pytorch)                    | PyTorch   | Firefly            | qwen-7b               |
+| [QWen1.5-7B](nlp/llm/qwen1.5-7b/pytorch)              | PyTorch   | Firefly            | school_math           |
+| [QWen1.5-14B](nlp/llm/qwen1.5-14b/pytorch)            | PyTorch   | Firefly            | school_math           |
+| [Qwen2.5-7B SFT](nlp/llm/qwen2.5-7b/pytorch)          | PyTorch   | LLaMA-Factory      | qwen2.5-7b            |
 
 ### Computer Vision
 
@@ -338,19 +338,19 @@ DeepSparkHub甄选上百个应用算法和模型，覆盖AI和通用计算各领
 
 ### Multimodal
 
-| Model                                                                  | Framework | Dataset        |
-|------------------------------------------------------------------------|-----------|----------------|
-| [BLIP](multimodal/blip/pytorch)                                        | PyTorch   | COCO           |
-| [CLIP](multimodal/language-image_pre-training/clip/pytorch)            | PyTorch   | CIFAR100       |
-| [ControlNet](multimodal/diffusion/controlnet)                          | PyTorch   | Fill50K        |
-| [DDPM](multimodal/diffusion/ddpm)                                      | PyTorch   | CIFAR-10       |
-| [LLaVA 1.5](multimodal/llava/pytorch)                                  | PyTorch   | LLaVA-Pretrain |
-| [L-Verse](multimodal/language-image_pre-training/l-verse/pytorch)      | PyTorch   | ImageNet       |
-| [Stable Diffusion 1.4](multimodal/diffusion/stable-diffusion/training) | PyTorch   | pokemon-images |
-| [Stable Diffusion 1.5](multimodal/diffusion/stable-diffusion/sd_1.5)   | PyTorch   | pokemon-images |
-| [Stable Diffusion 2.1](multimodal/diffusion/stable-diffusion/sd_2.1)   | PyTorch   | pokemon-images |
-| [Stable Diffusion 3](multimodal/diffusion/stable-diffusion/sd_3)       | PyTorch   | dog-example    |
-| [Stable Diffusion XL](multimodal/diffusion/stable-diffusion/sd_xl)     | PyTorch   | pokemon-images |
+| Model                                                                           | Framework | Dataset        |
+|---------------------------------------------------------------------------------|-----------|----------------|
+| [BLIP](multimodal/vision-language_model/blip/pytorch)                           | PyTorch   | COCO           |
+| [CLIP](multimodal/contrastive_learning/clip/pytorch)                            | PyTorch   | CIFAR100       |
+| [ControlNet](multimodal/diffusion_model/controlnet)                             | PyTorch   | Fill50K        |
+| [DDPM](multimodal/diffusion_model/ddpm)                                         | PyTorch   | CIFAR-10       |
+| [LLaVA 1.5](multimodal/vision-language_model/llava/pytorch)                     | PyTorch   | LLaVA-Pretrain |
+| [L-Verse](multimodal/vision-language_model/l-verse/pytorch)                     | PyTorch   | ImageNet       |
+| [Stable Diffusion 1.4](multimodal/diffusion_model/stable-diffusion-1.4/pytorch) | PyTorch   | pokemon-images |
+| [Stable Diffusion 1.5](multimodal/diffusion_model/stable-diffusion-1.5/pytorch) | PyTorch   | pokemon-images |
+| [Stable Diffusion 2.1](multimodal/diffusion_model/stable-diffusion-2.1/pytorch) | PyTorch   | pokemon-images |
+| [Stable Diffusion 3](multimodal/diffusion_model/stable-diffusion-3/pytorch)     | PyTorch   | dog-example    |
+| [Stable Diffusion XL](multimodal/diffusion_model/stable-diffusion-xl/pytorch)   | PyTorch   | pokemon-images |
 
 ### NLP (Natural Language Processing)
 
diff --git a/multimodal/language-image_pre-training/clip/pytorch/.gitignore b/multimodal/contrastive_learning/clip/pytorch/.gitignore
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/.gitignore
rename to multimodal/contrastive_learning/clip/pytorch/.gitignore
diff --git a/multimodal/language-image_pre-training/clip/pytorch/CLIP.png b/multimodal/contrastive_learning/clip/pytorch/CLIP.png
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/CLIP.png
rename to multimodal/contrastive_learning/clip/pytorch/CLIP.png
diff --git a/multimodal/language-image_pre-training/clip/pytorch/LICENSE b/multimodal/contrastive_learning/clip/pytorch/LICENSE
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/LICENSE
rename to multimodal/contrastive_learning/clip/pytorch/LICENSE
diff --git a/multimodal/language-image_pre-training/clip/pytorch/MANIFEST.in b/multimodal/contrastive_learning/clip/pytorch/MANIFEST.in
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/MANIFEST.in
rename to multimodal/contrastive_learning/clip/pytorch/MANIFEST.in
diff --git a/multimodal/language-image_pre-training/clip/pytorch/README.md b/multimodal/contrastive_learning/clip/pytorch/README.md
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/README.md
rename to multimodal/contrastive_learning/clip/pytorch/README.md
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/CLIP.png b/multimodal/contrastive_learning/clip/pytorch/clip/CLIP.png
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/CLIP.png
rename to multimodal/contrastive_learning/clip/pytorch/clip/CLIP.png
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/Linear_probe_evaluation.py b/multimodal/contrastive_learning/clip/pytorch/clip/Linear_probe_evaluation.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/Linear_probe_evaluation.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/Linear_probe_evaluation.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/__init__.py b/multimodal/contrastive_learning/clip/pytorch/clip/__init__.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/__init__.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/__init__.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/clip.py b/multimodal/contrastive_learning/clip/pytorch/clip/clip.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/clip.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/clip.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/model.py b/multimodal/contrastive_learning/clip/pytorch/clip/model.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/model.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/model.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/simple_tokenizer.py b/multimodal/contrastive_learning/clip/pytorch/clip/simple_tokenizer.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/simple_tokenizer.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/simple_tokenizer.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/test.py b/multimodal/contrastive_learning/clip/pytorch/clip/test.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/test.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/test.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/zero_shot_prediction_top1.py b/multimodal/contrastive_learning/clip/pytorch/clip/zero_shot_prediction_top1.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/zero_shot_prediction_top1.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/zero_shot_prediction_top1.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/clip/zero_shot_prediction_top5.py b/multimodal/contrastive_learning/clip/pytorch/clip/zero_shot_prediction_top5.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/clip/zero_shot_prediction_top5.py
rename to multimodal/contrastive_learning/clip/pytorch/clip/zero_shot_prediction_top5.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/data/country211.md b/multimodal/contrastive_learning/clip/pytorch/data/country211.md
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/data/country211.md
rename to multimodal/contrastive_learning/clip/pytorch/data/country211.md
diff --git a/multimodal/language-image_pre-training/clip/pytorch/data/prompts.md b/multimodal/contrastive_learning/clip/pytorch/data/prompts.md
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/data/prompts.md
rename to multimodal/contrastive_learning/clip/pytorch/data/prompts.md
diff --git a/multimodal/language-image_pre-training/clip/pytorch/data/rendered-sst2.md b/multimodal/contrastive_learning/clip/pytorch/data/rendered-sst2.md
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/data/rendered-sst2.md
rename to multimodal/contrastive_learning/clip/pytorch/data/rendered-sst2.md
diff --git a/multimodal/language-image_pre-training/clip/pytorch/data/yfcc100m.md b/multimodal/contrastive_learning/clip/pytorch/data/yfcc100m.md
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/data/yfcc100m.md
rename to multimodal/contrastive_learning/clip/pytorch/data/yfcc100m.md
diff --git a/multimodal/language-image_pre-training/clip/pytorch/model-card.md b/multimodal/contrastive_learning/clip/pytorch/model-card.md
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/model-card.md
rename to multimodal/contrastive_learning/clip/pytorch/model-card.md
diff --git a/multimodal/language-image_pre-training/clip/pytorch/requirements.txt b/multimodal/contrastive_learning/clip/pytorch/requirements.txt
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/requirements.txt
rename to multimodal/contrastive_learning/clip/pytorch/requirements.txt
diff --git a/multimodal/language-image_pre-training/clip/pytorch/setup.py b/multimodal/contrastive_learning/clip/pytorch/setup.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/setup.py
rename to multimodal/contrastive_learning/clip/pytorch/setup.py
diff --git a/multimodal/language-image_pre-training/clip/pytorch/tests/test_consistency.py b/multimodal/contrastive_learning/clip/pytorch/tests/test_consistency.py
similarity index 100%
rename from multimodal/language-image_pre-training/clip/pytorch/tests/test_consistency.py
rename to multimodal/contrastive_learning/clip/pytorch/tests/test_consistency.py
diff --git a/multimodal/diffusion/controlnet/.gitignore b/multimodal/diffusion_model/controlnet/pytorch/.gitignore
similarity index 100%
rename from multimodal/diffusion/controlnet/.gitignore
rename to multimodal/diffusion_model/controlnet/pytorch/.gitignore
diff --git a/multimodal/diffusion/controlnet/LICENSE b/multimodal/diffusion_model/controlnet/pytorch/LICENSE
similarity index 100%
rename from multimodal/diffusion/controlnet/LICENSE
rename to multimodal/diffusion_model/controlnet/pytorch/LICENSE
diff --git a/multimodal/diffusion/controlnet/README.md b/multimodal/diffusion_model/controlnet/pytorch/README.md
similarity index 100%
rename from multimodal/diffusion/controlnet/README.md
rename to multimodal/diffusion_model/controlnet/pytorch/README.md
diff --git a/multimodal/diffusion/controlnet/annotator/canny/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/canny/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/canny/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/canny/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/ckpts/ckpts.txt b/multimodal/diffusion_model/controlnet/pytorch/annotator/ckpts/ckpts.txt
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/ckpts/ckpts.txt
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/ckpts/ckpts.txt
diff --git a/multimodal/diffusion/controlnet/annotator/hed/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/hed/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/hed/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/hed/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/LICENSE b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/LICENSE
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/LICENSE
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/LICENSE
diff --git a/multimodal/diffusion/controlnet/annotator/midas/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/api.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/api.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/api.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/api.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/base_model.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/base_model.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/base_model.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/base_model.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/blocks.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/blocks.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/blocks.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/blocks.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/dpt_depth.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/dpt_depth.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/dpt_depth.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/dpt_depth.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/midas_net.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/midas_net.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/midas_net.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/midas_net.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/midas_net_custom.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/midas_net_custom.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/midas_net_custom.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/midas_net_custom.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/transforms.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/transforms.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/transforms.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/transforms.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/midas/vit.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/vit.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/midas/vit.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/midas/vit.py
diff --git a/multimodal/diffusion/controlnet/annotator/midas/utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/midas/utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/midas/utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/midas/utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/mlsd/LICENSE b/multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/LICENSE
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/mlsd/LICENSE
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/LICENSE
diff --git a/multimodal/diffusion/controlnet/annotator/mlsd/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/mlsd/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/mlsd/models/mbv2_mlsd_large.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/models/mbv2_mlsd_large.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/mlsd/models/mbv2_mlsd_large.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/models/mbv2_mlsd_large.py
diff --git a/multimodal/diffusion/controlnet/annotator/mlsd/models/mbv2_mlsd_tiny.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/models/mbv2_mlsd_tiny.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/mlsd/models/mbv2_mlsd_tiny.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/models/mbv2_mlsd_tiny.py
diff --git a/multimodal/diffusion/controlnet/annotator/mlsd/utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/mlsd/utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/mlsd/utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/openpose/LICENSE b/multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/LICENSE
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/openpose/LICENSE
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/LICENSE
diff --git a/multimodal/diffusion/controlnet/annotator/openpose/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/openpose/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/openpose/body.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/body.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/openpose/body.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/body.py
diff --git a/multimodal/diffusion/controlnet/annotator/openpose/hand.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/hand.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/openpose/hand.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/hand.py
diff --git a/multimodal/diffusion/controlnet/annotator/openpose/model.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/model.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/openpose/model.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/model.py
diff --git a/multimodal/diffusion/controlnet/annotator/openpose/util.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/util.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/openpose/util.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/openpose/util.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/LICENSE b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/LICENSE
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/LICENSE
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/LICENSE
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/ade20k.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/ade20k.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/ade20k.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/ade20k.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/chase_db1.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/chase_db1.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/chase_db1.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/chase_db1.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/cityscapes.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/cityscapes.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/cityscapes.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/cityscapes.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/drive.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/drive.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/drive.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/drive.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/hrf.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/hrf.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/hrf.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/hrf.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_context.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_context.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_context.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_context.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/stare.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/stare.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/datasets/stare.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/datasets/stare.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/default_runtime.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/default_runtime.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/default_runtime.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/default_runtime.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ann_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ann_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/cgnet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/cgnet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/cgnet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/cgnet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/danet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/danet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/dnl_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/emanet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/encnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fast_scnn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fast_scnn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fast_scnn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fast_scnn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fcn_hr18.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fcn_hr18.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fcn_hr18.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fcn_hr18.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fcn_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fcn_unet_s5-d16.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fpn_r50.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fpn_r50.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fpn_r50.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fpn_r50.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fpn_uniformer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fpn_uniformer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/fpn_uniformer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/fpn_uniformer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/gcnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/lraspp_m-v3-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/nonlocal_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ocrnet_hr18.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/ocrnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/pointrend_r50.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/pointrend_r50.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/pointrend_r50.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/pointrend_r50.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/psanet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/pspnet_r50-d8.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/pspnet_unet_s5-d16.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/upernet_r50.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/upernet_r50.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/upernet_r50.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/upernet_r50.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/upernet_uniformer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/upernet_uniformer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/models/upernet_uniformer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/models/upernet_uniformer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_160k.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_160k.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_160k.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_160k.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_20k.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_20k.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_20k.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_20k.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_40k.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_40k.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_40k.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_40k.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_80k.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_80k.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/configs/_base_/schedules/schedule_80k.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/configs/_base_/schedules/schedule_80k.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/config.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/config.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/config.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/config.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/run.sh b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/run.sh
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/run.sh
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/run.sh
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test.sh b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test.sh
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test.sh
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test.sh
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test_config_g.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test_config_g.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test_config_g.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test_config_g.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test_config_h32.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test_config_h32.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test_config_h32.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test_config_h32.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test_config_w32.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test_config_w32.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/exp/upernet_global_small/test_config_w32.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/exp/upernet_global_small/test_config_w32.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/arraymisc/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/arraymisc/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/arraymisc/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/arraymisc/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/arraymisc/quantization.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/arraymisc/quantization.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/arraymisc/quantization.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/arraymisc/quantization.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/alexnet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/alexnet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/alexnet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/alexnet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/activation.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/activation.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/activation.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/activation.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/context_block.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/context_block.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/context_block.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/context_block.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv2d_adaptive_padding.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv_module.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv_module.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv_module.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv_module.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/conv_ws.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/depthwise_separable_conv_module.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/drop.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/drop.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/drop.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/drop.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/generalized_attention.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/hsigmoid.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/hswish.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/hswish.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/hswish.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/hswish.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/non_local.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/non_local.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/non_local.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/non_local.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/norm.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/norm.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/norm.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/norm.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/padding.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/padding.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/padding.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/padding.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/plugin.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/plugin.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/plugin.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/plugin.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/registry.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/registry.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/registry.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/registry.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/scale.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/scale.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/scale.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/scale.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/swish.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/swish.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/swish.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/swish.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/transformer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/transformer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/transformer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/transformer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/upsample.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/upsample.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/upsample.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/upsample.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/wrappers.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/wrappers.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/bricks/wrappers.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/bricks/wrappers.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/builder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/builder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/builder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/builder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/resnet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/resnet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/resnet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/resnet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/flops_counter.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/flops_counter.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/flops_counter.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/flops_counter.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/fuse_conv_bn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/sync_bn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/sync_bn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/sync_bn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/sync_bn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/weight_init.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/weight_init.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/utils/weight_init.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/utils/weight_init.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/vgg.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/vgg.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/cnn/vgg.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/cnn/vgg.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/engine/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/engine/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/engine/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/engine/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/engine/test.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/engine/test.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/engine/test.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/engine/test.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/file_client.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/file_client.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/file_client.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/file_client.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/base.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/base.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/base.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/base.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/json_handler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/json_handler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/json_handler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/json_handler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/pickle_handler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/handlers/yaml_handler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/io.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/io.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/io.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/io.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/parse.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/parse.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/fileio/parse.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/fileio/parse.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/colorspace.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/colorspace.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/colorspace.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/colorspace.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/geometric.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/geometric.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/geometric.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/geometric.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/io.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/io.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/io.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/io.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/misc.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/misc.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/misc.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/misc.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/photometric.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/photometric.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/image/photometric.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/image/photometric.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/model_zoo/deprecated.json b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/model_zoo/deprecated.json
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/model_zoo/deprecated.json
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/model_zoo/deprecated.json
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/model_zoo/mmcls.json b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/model_zoo/mmcls.json
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/model_zoo/mmcls.json
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/model_zoo/mmcls.json
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/model_zoo/open_mmlab.json b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/model_zoo/open_mmlab.json
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/model_zoo/open_mmlab.json
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/model_zoo/open_mmlab.json
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/assign_score_withk.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/assign_score_withk.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/assign_score_withk.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/assign_score_withk.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/ball_query.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/ball_query.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/ball_query.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/ball_query.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/bbox.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/bbox.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/bbox.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/bbox.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/border_align.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/border_align.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/border_align.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/border_align.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/box_iou_rotated.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/box_iou_rotated.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/box_iou_rotated.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/box_iou_rotated.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/carafe.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/carafe.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/carafe.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/carafe.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/cc_attention.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/cc_attention.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/cc_attention.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/cc_attention.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/contour_expand.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/contour_expand.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/contour_expand.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/contour_expand.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/corner_pool.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/corner_pool.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/corner_pool.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/corner_pool.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/correlation.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/correlation.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/correlation.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/correlation.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/deform_conv.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/deform_conv.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/deform_conv.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/deform_conv.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/deform_roi_pool.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/deform_roi_pool.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/deform_roi_pool.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/deform_roi_pool.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/deprecated_wrappers.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/deprecated_wrappers.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/deprecated_wrappers.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/deprecated_wrappers.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/focal_loss.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/focal_loss.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/focal_loss.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/focal_loss.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/furthest_point_sample.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/furthest_point_sample.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/furthest_point_sample.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/furthest_point_sample.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/fused_bias_leakyrelu.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/gather_points.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/gather_points.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/gather_points.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/gather_points.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/group_points.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/group_points.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/group_points.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/group_points.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/info.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/info.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/info.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/info.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/iou3d.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/iou3d.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/iou3d.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/iou3d.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/knn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/knn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/knn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/knn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/masked_conv.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/masked_conv.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/masked_conv.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/masked_conv.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/merge_cells.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/merge_cells.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/merge_cells.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/merge_cells.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/modulated_deform_conv.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/modulated_deform_conv.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/modulated_deform_conv.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/modulated_deform_conv.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/multi_scale_deform_attn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/nms.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/nms.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/nms.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/nms.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/pixel_group.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/pixel_group.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/pixel_group.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/pixel_group.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/point_sample.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/point_sample.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/point_sample.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/point_sample.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/points_in_boxes.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/points_in_boxes.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/points_in_boxes.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/points_in_boxes.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/points_sampler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/points_sampler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/points_sampler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/points_sampler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/psa_mask.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/psa_mask.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/psa_mask.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/psa_mask.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roi_align.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roi_align.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roi_align.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roi_align.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roi_align_rotated.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roi_align_rotated.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roi_align_rotated.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roi_align_rotated.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roi_pool.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roi_pool.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roi_pool.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roi_pool.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roiaware_pool3d.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roiaware_pool3d.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roipoint_pool3d.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/roipoint_pool3d.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/saconv.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/saconv.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/saconv.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/saconv.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/scatter_points.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/scatter_points.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/scatter_points.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/scatter_points.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/sync_bn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/sync_bn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/sync_bn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/sync_bn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/three_interpolate.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/three_interpolate.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/three_interpolate.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/three_interpolate.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/three_nn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/three_nn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/three_nn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/three_nn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/tin_shift.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/tin_shift.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/tin_shift.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/tin_shift.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/upfirdn2d.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/upfirdn2d.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/upfirdn2d.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/upfirdn2d.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/voxelize.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/voxelize.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/ops/voxelize.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/ops/voxelize.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/_functions.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/_functions.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/_functions.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/_functions.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/collate.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/collate.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/collate.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/collate.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/data_container.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/data_container.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/data_container.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/data_container.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/data_parallel.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/data_parallel.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/data_parallel.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/data_parallel.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/distributed.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/distributed.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/distributed.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/distributed.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/distributed_deprecated.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/distributed_deprecated.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/registry.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/registry.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/registry.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/registry.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/scatter_gather.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/scatter_gather.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/scatter_gather.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/scatter_gather.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/parallel/utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/parallel/utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/base_module.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/base_module.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/base_module.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/base_module.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/base_runner.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/base_runner.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/base_runner.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/base_runner.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/builder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/builder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/builder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/builder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/checkpoint.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/checkpoint.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/checkpoint.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/checkpoint.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/default_constructor.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/default_constructor.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/default_constructor.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/default_constructor.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/dist_utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/dist_utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/dist_utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/dist_utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/epoch_based_runner.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/epoch_based_runner.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/epoch_based_runner.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/epoch_based_runner.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/fp16_utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/fp16_utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/fp16_utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/fp16_utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/checkpoint.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/checkpoint.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/checkpoint.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/checkpoint.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/closure.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/closure.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/closure.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/closure.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/ema.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/ema.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/ema.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/ema.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/evaluation.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/evaluation.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/evaluation.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/evaluation.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/hook.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/hook.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/hook.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/hook.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/iter_timer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/iter_timer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/iter_timer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/iter_timer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/base.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/base.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/base.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/base.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/text.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/text.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/text.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/text.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/lr_updater.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/memory.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/memory.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/memory.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/memory.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/optimizer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/optimizer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/optimizer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/optimizer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/profiler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/profiler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/profiler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/profiler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/iter_based_runner.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/iter_based_runner.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/iter_based_runner.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/iter_based_runner.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/log_buffer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/log_buffer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/log_buffer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/log_buffer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/optimizer/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/optimizer/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/optimizer/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/optimizer/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/optimizer/builder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/optimizer/builder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/optimizer/builder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/optimizer/builder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/priority.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/priority.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/priority.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/priority.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/runner/utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/runner/utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/config.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/config.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/config.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/config.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/env.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/env.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/env.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/env.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/ext_loader.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/ext_loader.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/ext_loader.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/ext_loader.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/logging.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/logging.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/logging.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/logging.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/misc.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/misc.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/misc.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/misc.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/parrots_jit.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/parrots_jit.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/parrots_jit.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/parrots_jit.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/parrots_wrapper.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/parrots_wrapper.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/parrots_wrapper.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/parrots_wrapper.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/path.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/path.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/path.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/path.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/progressbar.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/progressbar.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/progressbar.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/progressbar.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/registry.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/registry.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/registry.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/registry.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/testing.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/testing.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/testing.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/testing.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/timer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/timer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/timer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/timer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/trace.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/trace.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/trace.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/trace.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/version_utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/version_utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/utils/version_utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/utils/version_utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/version.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/version.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/version.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/version.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/io.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/io.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/io.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/io.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/optflow.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/optflow.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/optflow.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/optflow.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/processing.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/processing.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/video/processing.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/video/processing.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/color.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/color.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/color.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/color.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/image.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/image.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/image.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/optflow.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/optflow.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv/visualization/optflow.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv/visualization/optflow.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv_custom/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv_custom/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv_custom/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv_custom/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmcv_custom/checkpoint.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv_custom/checkpoint.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmcv_custom/checkpoint.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmcv_custom/checkpoint.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/inference.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/inference.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/inference.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/inference.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/test.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/test.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/test.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/test.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/train.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/train.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/apis/train.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/apis/train.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/class_names.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/class_names.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/class_names.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/class_names.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/eval_hooks.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/eval_hooks.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/eval_hooks.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/eval_hooks.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/metrics.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/metrics.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/evaluation/metrics.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/evaluation/metrics.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/builder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/builder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/builder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/builder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/sampler/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/sampler/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/sampler/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/sampler/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/sampler/base_pixel_sampler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/seg/sampler/ohem_pixel_sampler.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/utils/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/utils/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/utils/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/utils/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/utils/misc.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/utils/misc.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/core/utils/misc.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/core/utils/misc.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/ade.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/ade.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/ade.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/ade.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/builder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/builder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/builder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/builder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/chase_db1.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/chase_db1.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/chase_db1.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/chase_db1.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/cityscapes.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/cityscapes.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/cityscapes.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/cityscapes.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/custom.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/custom.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/custom.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/custom.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/dataset_wrappers.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/dataset_wrappers.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/drive.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/drive.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/drive.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/drive.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/hrf.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/hrf.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/hrf.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/hrf.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pascal_context.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pascal_context.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pascal_context.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pascal_context.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/compose.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/compose.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/compose.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/compose.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/formating.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/formating.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/formating.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/formating.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/loading.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/loading.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/loading.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/loading.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/test_time_aug.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/transforms.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/pipelines/transforms.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/stare.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/stare.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/stare.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/stare.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/voc.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/voc.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/datasets/voc.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/datasets/voc.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/cgnet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/cgnet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/cgnet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/cgnet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/fast_scnn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/fast_scnn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/fast_scnn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/fast_scnn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/hrnet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/hrnet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/hrnet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/hrnet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/mobilenet_v2.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/mobilenet_v2.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/mobilenet_v2.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/mobilenet_v2.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/mobilenet_v3.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/mobilenet_v3.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/mobilenet_v3.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/mobilenet_v3.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/resnest.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/resnest.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/resnest.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/resnest.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/resnet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/resnet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/resnet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/resnet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/resnext.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/resnext.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/resnext.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/resnext.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/unet.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/unet.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/unet.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/unet.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/uniformer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/uniformer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/uniformer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/uniformer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/vit.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/vit.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/backbones/vit.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/backbones/vit.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/builder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/builder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/builder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/builder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/ann_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/ann_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/ann_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/ann_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/apc_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/apc_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/apc_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/apc_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/aspp_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/aspp_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/aspp_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/aspp_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/cascade_decode_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/cascade_decode_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/cascade_decode_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/cascade_decode_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/cc_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/cc_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/cc_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/cc_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/da_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/da_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/da_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/da_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/decode_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/decode_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/decode_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/decode_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/dm_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/dm_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/dm_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/dm_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/dnl_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/dnl_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/dnl_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/dnl_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/ema_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/ema_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/ema_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/ema_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/enc_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/enc_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/enc_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/enc_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/fcn_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/fcn_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/fcn_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/fcn_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/fpn_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/fpn_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/fpn_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/fpn_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/gc_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/gc_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/gc_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/gc_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/lraspp_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/lraspp_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/lraspp_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/lraspp_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/nl_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/nl_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/nl_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/nl_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/ocr_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/ocr_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/ocr_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/ocr_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/point_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/point_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/point_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/point_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/psa_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/psa_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/psa_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/psa_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/psp_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/psp_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/psp_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/psp_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/sep_aspp_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/sep_aspp_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/sep_aspp_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/sep_aspp_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/sep_fcn_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/uper_head.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/uper_head.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/decode_heads/uper_head.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/decode_heads/uper_head.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/accuracy.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/accuracy.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/accuracy.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/accuracy.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/cross_entropy_loss.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/cross_entropy_loss.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/cross_entropy_loss.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/cross_entropy_loss.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/dice_loss.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/dice_loss.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/dice_loss.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/dice_loss.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/lovasz_loss.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/lovasz_loss.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/lovasz_loss.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/lovasz_loss.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/utils.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/losses/utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/losses/utils.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/necks/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/necks/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/necks/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/necks/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/necks/fpn.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/necks/fpn.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/necks/fpn.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/necks/fpn.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/necks/multilevel_neck.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/necks/multilevel_neck.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/necks/multilevel_neck.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/necks/multilevel_neck.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/base.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/base.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/base.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/base.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/cascade_encoder_decoder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/encoder_decoder.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/encoder_decoder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/segmentors/encoder_decoder.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/segmentors/encoder_decoder.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/drop.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/drop.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/drop.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/drop.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/inverted_residual.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/inverted_residual.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/inverted_residual.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/inverted_residual.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/make_divisible.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/make_divisible.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/make_divisible.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/make_divisible.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/res_layer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/res_layer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/res_layer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/res_layer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/se_layer.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/se_layer.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/se_layer.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/se_layer.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/self_attention_block.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/self_attention_block.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/self_attention_block.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/self_attention_block.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/up_conv_block.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/up_conv_block.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/up_conv_block.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/up_conv_block.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/weight_init.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/weight_init.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/models/utils/weight_init.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/models/utils/weight_init.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/ops/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/ops/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/ops/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/ops/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/ops/encoding.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/ops/encoding.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/ops/encoding.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/ops/encoding.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/ops/wrappers.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/ops/wrappers.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/ops/wrappers.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/ops/wrappers.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/utils/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/utils/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/utils/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/utils/__init__.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/utils/collect_env.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/utils/collect_env.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/utils/collect_env.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/utils/collect_env.py
diff --git a/multimodal/diffusion/controlnet/annotator/uniformer/mmseg/utils/logger.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/utils/logger.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/uniformer/mmseg/utils/logger.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/uniformer/mmseg/utils/logger.py
diff --git a/multimodal/diffusion/controlnet/annotator/util.py b/multimodal/diffusion_model/controlnet/pytorch/annotator/util.py
similarity index 100%
rename from multimodal/diffusion/controlnet/annotator/util.py
rename to multimodal/diffusion_model/controlnet/pytorch/annotator/util.py
diff --git a/multimodal/diffusion/controlnet/cldm/cldm.py b/multimodal/diffusion_model/controlnet/pytorch/cldm/cldm.py
similarity index 100%
rename from multimodal/diffusion/controlnet/cldm/cldm.py
rename to multimodal/diffusion_model/controlnet/pytorch/cldm/cldm.py
diff --git a/multimodal/diffusion/controlnet/cldm/ddim_hacked.py b/multimodal/diffusion_model/controlnet/pytorch/cldm/ddim_hacked.py
similarity index 100%
rename from multimodal/diffusion/controlnet/cldm/ddim_hacked.py
rename to multimodal/diffusion_model/controlnet/pytorch/cldm/ddim_hacked.py
diff --git a/multimodal/diffusion/controlnet/cldm/hack.py b/multimodal/diffusion_model/controlnet/pytorch/cldm/hack.py
similarity index 100%
rename from multimodal/diffusion/controlnet/cldm/hack.py
rename to multimodal/diffusion_model/controlnet/pytorch/cldm/hack.py
diff --git a/multimodal/diffusion/controlnet/cldm/logger.py b/multimodal/diffusion_model/controlnet/pytorch/cldm/logger.py
similarity index 100%
rename from multimodal/diffusion/controlnet/cldm/logger.py
rename to multimodal/diffusion_model/controlnet/pytorch/cldm/logger.py
diff --git a/multimodal/diffusion/controlnet/cldm/model.py b/multimodal/diffusion_model/controlnet/pytorch/cldm/model.py
similarity index 100%
rename from multimodal/diffusion/controlnet/cldm/model.py
rename to multimodal/diffusion_model/controlnet/pytorch/cldm/model.py
diff --git a/multimodal/diffusion/controlnet/config.py b/multimodal/diffusion_model/controlnet/pytorch/config.py
similarity index 100%
rename from multimodal/diffusion/controlnet/config.py
rename to multimodal/diffusion_model/controlnet/pytorch/config.py
diff --git a/multimodal/diffusion/controlnet/docs/annotator.md b/multimodal/diffusion_model/controlnet/pytorch/docs/annotator.md
similarity index 100%
rename from multimodal/diffusion/controlnet/docs/annotator.md
rename to multimodal/diffusion_model/controlnet/pytorch/docs/annotator.md
diff --git a/multimodal/diffusion/controlnet/docs/faq.md b/multimodal/diffusion_model/controlnet/pytorch/docs/faq.md
similarity index 100%
rename from multimodal/diffusion/controlnet/docs/faq.md
rename to multimodal/diffusion_model/controlnet/pytorch/docs/faq.md
diff --git a/multimodal/diffusion/controlnet/docs/low_vram.md b/multimodal/diffusion_model/controlnet/pytorch/docs/low_vram.md
similarity index 100%
rename from multimodal/diffusion/controlnet/docs/low_vram.md
rename to multimodal/diffusion_model/controlnet/pytorch/docs/low_vram.md
diff --git a/multimodal/diffusion/controlnet/environment.yaml b/multimodal/diffusion_model/controlnet/pytorch/environment.yaml
similarity index 100%
rename from multimodal/diffusion/controlnet/environment.yaml
rename to multimodal/diffusion_model/controlnet/pytorch/environment.yaml
diff --git a/multimodal/diffusion/controlnet/font/DejaVuSans.ttf b/multimodal/diffusion_model/controlnet/pytorch/font/DejaVuSans.ttf
similarity index 100%
rename from multimodal/diffusion/controlnet/font/DejaVuSans.ttf
rename to multimodal/diffusion_model/controlnet/pytorch/font/DejaVuSans.ttf
diff --git a/multimodal/diffusion/controlnet/gradio_annotator.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_annotator.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_annotator.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_annotator.py
diff --git a/multimodal/diffusion/controlnet/gradio_canny2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_canny2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_canny2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_canny2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_depth2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_depth2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_depth2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_depth2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_fake_scribble2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_fake_scribble2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_fake_scribble2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_fake_scribble2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_hed2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_hed2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_hed2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_hed2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_hough2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_hough2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_hough2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_hough2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_normal2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_normal2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_normal2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_normal2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_pose2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_pose2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_pose2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_pose2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_scribble2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_scribble2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_scribble2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_scribble2image.py
diff --git a/multimodal/diffusion/controlnet/gradio_scribble2image_interactive.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_scribble2image_interactive.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_scribble2image_interactive.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_scribble2image_interactive.py
diff --git a/multimodal/diffusion/controlnet/gradio_seg2image.py b/multimodal/diffusion_model/controlnet/pytorch/gradio_seg2image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/gradio_seg2image.py
rename to multimodal/diffusion_model/controlnet/pytorch/gradio_seg2image.py
diff --git a/multimodal/diffusion/controlnet/ldm/data/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/data/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/data/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/data/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/data/util.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/data/util.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/data/util.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/data/util.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/autoencoder.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/autoencoder.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/autoencoder.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/autoencoder.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/ddim.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/ddim.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/ddim.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/ddim.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/ddpm.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/ddpm.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/ddpm.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/ddpm.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/dpm_solver/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/dpm_solver/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/dpm_solver/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/dpm_solver/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/dpm_solver/dpm_solver.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/dpm_solver/dpm_solver.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/dpm_solver/dpm_solver.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/dpm_solver/dpm_solver.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/dpm_solver/sampler.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/dpm_solver/sampler.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/dpm_solver/sampler.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/dpm_solver/sampler.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/plms.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/plms.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/plms.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/plms.py
diff --git a/multimodal/diffusion/controlnet/ldm/models/diffusion/sampling_util.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/sampling_util.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/models/diffusion/sampling_util.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/models/diffusion/sampling_util.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/attention.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/attention.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/attention.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/attention.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/model.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/model.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/model.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/model.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/openaimodel.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/openaimodel.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/openaimodel.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/openaimodel.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/upscaling.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/upscaling.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/upscaling.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/upscaling.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/util.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/util.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/diffusionmodules/util.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/diffusionmodules/util.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/distributions/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/distributions/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/distributions/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/distributions/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/distributions/distributions.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/distributions/distributions.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/distributions/distributions.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/distributions/distributions.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/ema.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/ema.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/ema.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/ema.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/encoders/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/encoders/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/encoders/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/encoders/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/encoders/modules.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/encoders/modules.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/encoders/modules.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/encoders/modules.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/image_degradation/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/image_degradation/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/image_degradation/bsrgan.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/bsrgan.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/image_degradation/bsrgan.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/bsrgan.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/image_degradation/bsrgan_light.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/bsrgan_light.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/image_degradation/bsrgan_light.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/bsrgan_light.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/image_degradation/utils/test.png b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/utils/test.png
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/image_degradation/utils/test.png
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/utils/test.png
diff --git a/multimodal/diffusion/controlnet/ldm/modules/image_degradation/utils_image.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/utils_image.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/image_degradation/utils_image.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/image_degradation/utils_image.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/api.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/api.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/api.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/api.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/__init__.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/__init__.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/__init__.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/__init__.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/base_model.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/base_model.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/base_model.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/base_model.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/blocks.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/blocks.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/blocks.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/blocks.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/dpt_depth.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/dpt_depth.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/dpt_depth.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/dpt_depth.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/midas_net.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/midas_net.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/midas_net.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/midas_net.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/midas_net_custom.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/midas_net_custom.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/midas_net_custom.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/midas_net_custom.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/transforms.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/transforms.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/transforms.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/transforms.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/midas/vit.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/vit.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/midas/vit.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/midas/vit.py
diff --git a/multimodal/diffusion/controlnet/ldm/modules/midas/utils.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/utils.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/modules/midas/utils.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/modules/midas/utils.py
diff --git a/multimodal/diffusion/controlnet/ldm/util.py b/multimodal/diffusion_model/controlnet/pytorch/ldm/util.py
similarity index 100%
rename from multimodal/diffusion/controlnet/ldm/util.py
rename to multimodal/diffusion_model/controlnet/pytorch/ldm/util.py
diff --git a/multimodal/diffusion/controlnet/models/cldm_v15.yaml b/multimodal/diffusion_model/controlnet/pytorch/models/cldm_v15.yaml
similarity index 100%
rename from multimodal/diffusion/controlnet/models/cldm_v15.yaml
rename to multimodal/diffusion_model/controlnet/pytorch/models/cldm_v15.yaml
diff --git a/multimodal/diffusion/controlnet/models/cldm_v21.yaml b/multimodal/diffusion_model/controlnet/pytorch/models/cldm_v21.yaml
similarity index 100%
rename from multimodal/diffusion/controlnet/models/cldm_v21.yaml
rename to multimodal/diffusion_model/controlnet/pytorch/models/cldm_v21.yaml
diff --git a/multimodal/diffusion/controlnet/share.py b/multimodal/diffusion_model/controlnet/pytorch/share.py
similarity index 100%
rename from multimodal/diffusion/controlnet/share.py
rename to multimodal/diffusion_model/controlnet/pytorch/share.py
diff --git a/multimodal/diffusion/controlnet/tool_add_control.py b/multimodal/diffusion_model/controlnet/pytorch/tool_add_control.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tool_add_control.py
rename to multimodal/diffusion_model/controlnet/pytorch/tool_add_control.py
diff --git a/multimodal/diffusion/controlnet/tool_add_control_sd21.py b/multimodal/diffusion_model/controlnet/pytorch/tool_add_control_sd21.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tool_add_control_sd21.py
rename to multimodal/diffusion_model/controlnet/pytorch/tool_add_control_sd21.py
diff --git a/multimodal/diffusion/controlnet/tool_transfer_control.py b/multimodal/diffusion_model/controlnet/pytorch/tool_transfer_control.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tool_transfer_control.py
rename to multimodal/diffusion_model/controlnet/pytorch/tool_transfer_control.py
diff --git a/multimodal/diffusion/controlnet/tutorial_dataset.py b/multimodal/diffusion_model/controlnet/pytorch/tutorial_dataset.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tutorial_dataset.py
rename to multimodal/diffusion_model/controlnet/pytorch/tutorial_dataset.py
diff --git a/multimodal/diffusion/controlnet/tutorial_dataset_test.py b/multimodal/diffusion_model/controlnet/pytorch/tutorial_dataset_test.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tutorial_dataset_test.py
rename to multimodal/diffusion_model/controlnet/pytorch/tutorial_dataset_test.py
diff --git a/multimodal/diffusion/controlnet/tutorial_train.py b/multimodal/diffusion_model/controlnet/pytorch/tutorial_train.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tutorial_train.py
rename to multimodal/diffusion_model/controlnet/pytorch/tutorial_train.py
diff --git a/multimodal/diffusion/controlnet/tutorial_train_dist.py b/multimodal/diffusion_model/controlnet/pytorch/tutorial_train_dist.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tutorial_train_dist.py
rename to multimodal/diffusion_model/controlnet/pytorch/tutorial_train_dist.py
diff --git a/multimodal/diffusion/controlnet/tutorial_train_sd21.py b/multimodal/diffusion_model/controlnet/pytorch/tutorial_train_sd21.py
similarity index 100%
rename from multimodal/diffusion/controlnet/tutorial_train_sd21.py
rename to multimodal/diffusion_model/controlnet/pytorch/tutorial_train_sd21.py
diff --git a/multimodal/diffusion/ddpm/.gitignore b/multimodal/diffusion_model/ddpm/.gitignore
similarity index 100%
rename from multimodal/diffusion/ddpm/.gitignore
rename to multimodal/diffusion_model/ddpm/.gitignore
diff --git a/multimodal/diffusion/ddpm/LICENSE b/multimodal/diffusion_model/ddpm/LICENSE
similarity index 100%
rename from multimodal/diffusion/ddpm/LICENSE
rename to multimodal/diffusion_model/ddpm/LICENSE
diff --git a/multimodal/diffusion/ddpm/README.md b/multimodal/diffusion_model/ddpm/README.md
similarity index 100%
rename from multimodal/diffusion/ddpm/README.md
rename to multimodal/diffusion_model/ddpm/README.md
diff --git a/multimodal/diffusion/ddpm/config/CIFAR10.txt b/multimodal/diffusion_model/ddpm/config/CIFAR10.txt
similarity index 100%
rename from multimodal/diffusion/ddpm/config/CIFAR10.txt
rename to multimodal/diffusion_model/ddpm/config/CIFAR10.txt
diff --git a/multimodal/diffusion/ddpm/diffusion.py b/multimodal/diffusion_model/ddpm/diffusion.py
similarity index 100%
rename from multimodal/diffusion/ddpm/diffusion.py
rename to multimodal/diffusion_model/ddpm/diffusion.py
diff --git a/multimodal/diffusion/ddpm/images/cifar10_samples.png b/multimodal/diffusion_model/ddpm/images/cifar10_samples.png
similarity index 100%
rename from multimodal/diffusion/ddpm/images/cifar10_samples.png
rename to multimodal/diffusion_model/ddpm/images/cifar10_samples.png
diff --git a/multimodal/diffusion/ddpm/main.py b/multimodal/diffusion_model/ddpm/main.py
similarity index 100%
rename from multimodal/diffusion/ddpm/main.py
rename to multimodal/diffusion_model/ddpm/main.py
diff --git a/multimodal/diffusion/ddpm/model.py b/multimodal/diffusion_model/ddpm/model.py
similarity index 100%
rename from multimodal/diffusion/ddpm/model.py
rename to multimodal/diffusion_model/ddpm/model.py
diff --git a/multimodal/diffusion/ddpm/requirements.txt b/multimodal/diffusion_model/ddpm/requirements.txt
similarity index 100%
rename from multimodal/diffusion/ddpm/requirements.txt
rename to multimodal/diffusion_model/ddpm/requirements.txt
diff --git a/multimodal/diffusion/ddpm/score/both.py b/multimodal/diffusion_model/ddpm/score/both.py
similarity index 100%
rename from multimodal/diffusion/ddpm/score/both.py
rename to multimodal/diffusion_model/ddpm/score/both.py
diff --git a/multimodal/diffusion/ddpm/score/fid.py b/multimodal/diffusion_model/ddpm/score/fid.py
similarity index 100%
rename from multimodal/diffusion/ddpm/score/fid.py
rename to multimodal/diffusion_model/ddpm/score/fid.py
diff --git a/multimodal/diffusion/ddpm/score/inception.py b/multimodal/diffusion_model/ddpm/score/inception.py
similarity index 100%
rename from multimodal/diffusion/ddpm/score/inception.py
rename to multimodal/diffusion_model/ddpm/score/inception.py
diff --git a/multimodal/diffusion/ddpm/score/inception_score.py b/multimodal/diffusion_model/ddpm/score/inception_score.py
similarity index 100%
rename from multimodal/diffusion/ddpm/score/inception_score.py
rename to multimodal/diffusion_model/ddpm/score/inception_score.py
diff --git a/multimodal/diffusion/stable-diffusion/training/IMG/multi.png b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/IMG/multi.png
old mode 100755
new mode 100644
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/training/IMG/multi.png
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/IMG/multi.png
diff --git a/multimodal/diffusion/stable-diffusion/training/IMG/result.png b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/IMG/result.png
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/training/IMG/result.png
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/IMG/result.png
diff --git a/multimodal/diffusion/stable-diffusion/training/IMG/single.png b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/IMG/single.png
old mode 100755
new mode 100644
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/training/IMG/single.png
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/IMG/single.png
diff --git a/multimodal/diffusion/stable-diffusion/training/README.md b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/README.md
old mode 100755
new mode 100644
similarity index 98%
rename from multimodal/diffusion/stable-diffusion/training/README.md
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/README.md
index 3f295d14826a67e208bd949c91d678052a70e8a4..6a6360838ebe381108ddba213fa8603440b65d0d
--- a/multimodal/diffusion/stable-diffusion/training/README.md
+++ b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/README.md
@@ -1,4 +1,4 @@
-# Stable Diffusion
+# Stable Diffusion 1.4
 
 ## Model description
 
@@ -30,6 +30,7 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/openmpi3/lib/
 ### Install requirements
 
 ```bash
+
 pip3 install -r requirements.txt
 ```
 
diff --git a/multimodal/diffusion/stable-diffusion/training/requirements.txt b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/requirements.txt
old mode 100755
new mode 100644
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/training/requirements.txt
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/requirements.txt
diff --git a/multimodal/diffusion/stable-diffusion/training/test.py b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/test.py
old mode 100755
new mode 100644
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/training/test.py
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/test.py
diff --git a/multimodal/diffusion/stable-diffusion/training/train_text_to_image.py b/multimodal/diffusion_model/stable-diffusion-1.4/pytorch/train_text_to_image.py
old mode 100755
new mode 100644
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/training/train_text_to_image.py
rename to multimodal/diffusion_model/stable-diffusion-1.4/pytorch/train_text_to_image.py
diff --git a/multimodal/diffusion/stable-diffusion/sd_1.5/README.md b/multimodal/diffusion_model/stable-diffusion-1.5/pytorch/README.md
similarity index 95%
rename from multimodal/diffusion/stable-diffusion/sd_1.5/README.md
rename to multimodal/diffusion_model/stable-diffusion-1.5/pytorch/README.md
index fd94b132c92fae4cc19cbbaee966e43f49f49552..d96d16ba09210b6f95b9b7780a57766f5a3fc13f 100644
--- a/multimodal/diffusion/stable-diffusion/sd_1.5/README.md
+++ b/multimodal/diffusion_model/stable-diffusion-1.5/pytorch/README.md
@@ -40,7 +40,7 @@ export DATASET_PATH=/path/to/data
 
 ```bash
 # Go to diffusers path
-cd ${PROJ_ROOT}/multimodal/diffusion/stable-diffusion/diffusers
+cd ${PROJ_ROOT}/toolbox/diffusers
 
 # Single GPU
 bash run_sd_1.5_single.sh
diff --git a/multimodal/diffusion/stable-diffusion/sd_2.1/README.md b/multimodal/diffusion_model/stable-diffusion-2.1/pytorch/README.md
similarity index 95%
rename from multimodal/diffusion/stable-diffusion/sd_2.1/README.md
rename to multimodal/diffusion_model/stable-diffusion-2.1/pytorch/README.md
index e857499b58762c8b3bccf80e163997c21500f0ff..2f60182b43dd8b7b22fc3e52c4cdf08d6956fd04 100644
--- a/multimodal/diffusion/stable-diffusion/sd_2.1/README.md
+++ b/multimodal/diffusion_model/stable-diffusion-2.1/pytorch/README.md
@@ -40,7 +40,7 @@ export DATASET_PATH=/path/to/data
 
 ```bash
 # Go to diffusers path
-cd ${PROJ_ROOT}/multimodal/diffusion/stable-diffusion/diffusers
+cd ${PROJ_ROOT}/toolbox/diffusers
 
 # Single GPU
 bash run_sd_2.1_single.sh
diff --git a/multimodal/diffusion/stable-diffusion/sd_3/README.md b/multimodal/diffusion_model/stable-diffusion-3/pytorch/README.md
similarity index 95%
rename from multimodal/diffusion/stable-diffusion/sd_3/README.md
rename to multimodal/diffusion_model/stable-diffusion-3/pytorch/README.md
index 5d958039490e70cd5486597bd65747f1a9785e15..e19d1d9b422249d6274a7fc7f1a29e270d7f9024 100644
--- a/multimodal/diffusion/stable-diffusion/sd_3/README.md
+++ b/multimodal/diffusion_model/stable-diffusion-3/pytorch/README.md
@@ -40,7 +40,7 @@ export DATASET_PATH=/path/to/data
 
 ```bash
 # Go to diffusers path
-cd ${PROJ_ROOT}/multimodal/diffusion/stable-diffusion/diffusers
+cd ${PROJ_ROOT}/toolbox/diffusers
 
 # train
 bash run_sd3_dreambooth.sh
diff --git a/multimodal/diffusion/stable-diffusion/sd_xl/README.md b/multimodal/diffusion_model/stable-diffusion-xl/pytorch/README.md
similarity index 95%
rename from multimodal/diffusion/stable-diffusion/sd_xl/README.md
rename to multimodal/diffusion_model/stable-diffusion-xl/pytorch/README.md
index a340e18126762f8b4a38a64b257c7415da1aa84a..9d0171334f55e029bdd5b1c6069b40872769f0ef 100644
--- a/multimodal/diffusion/stable-diffusion/sd_xl/README.md
+++ b/multimodal/diffusion_model/stable-diffusion-xl/pytorch/README.md
@@ -45,7 +45,7 @@ export VAE_PATH=/path/to/vae_weights
 
 ```sh
 # Go to diffusers path
-cd ${PROJ_ROOT}/multimodal/diffusion/stable-diffusion/diffusers
+cd ${PROJ_ROOT}/toolbox/diffusers
 
 bash run_sd_xl.sh
 ```
diff --git a/multimodal/blip/pytorch/README.md b/multimodal/vision-language_model/blip/pytorch/README.md
similarity index 100%
rename from multimodal/blip/pytorch/README.md
rename to multimodal/vision-language_model/blip/pytorch/README.md
diff --git a/multimodal/blip/pytorch/train_caption.py b/multimodal/vision-language_model/blip/pytorch/train_caption.py
similarity index 100%
rename from multimodal/blip/pytorch/train_caption.py
rename to multimodal/vision-language_model/blip/pytorch/train_caption.py
diff --git a/multimodal/language-image_pre-training/L-Verse/pytorch/README.md b/multimodal/vision-language_model/l-verse/pytorch/README.md
similarity index 100%
rename from multimodal/language-image_pre-training/L-Verse/pytorch/README.md
rename to multimodal/vision-language_model/l-verse/pytorch/README.md
diff --git a/multimodal/llava/pytorch/README.md b/multimodal/vision-language_model/llava-1.5/pytorch/README.md
similarity index 100%
rename from multimodal/llava/pytorch/README.md
rename to multimodal/vision-language_model/llava-1.5/pytorch/README.md
diff --git a/multimodal/llava/pytorch/train.sh b/multimodal/vision-language_model/llava-1.5/pytorch/train.sh
similarity index 100%
rename from multimodal/llava/pytorch/train.sh
rename to multimodal/vision-language_model/llava-1.5/pytorch/train.sh
diff --git a/nlp/llm/aquila2-34b/megatron-deepspeed/README.md b/nlp/llm/aquila2-34b/pytorch/README.md
similarity index 100%
rename from nlp/llm/aquila2-34b/megatron-deepspeed/README.md
rename to nlp/llm/aquila2-34b/pytorch/README.md
diff --git a/nlp/llm/baichuan2-7b/baichuan2/LICENSE b/nlp/llm/baichuan2-7b/pytorch/LICENSE
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/LICENSE
rename to nlp/llm/baichuan2-7b/pytorch/LICENSE
diff --git a/nlp/llm/baichuan2-7b/baichuan2/OpenAI_api.py b/nlp/llm/baichuan2-7b/pytorch/OpenAI_api.py
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/OpenAI_api.py
rename to nlp/llm/baichuan2-7b/pytorch/OpenAI_api.py
diff --git a/nlp/llm/baichuan2-7b/baichuan2/README.md b/nlp/llm/baichuan2-7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/README.md
rename to nlp/llm/baichuan2-7b/pytorch/README.md
diff --git a/nlp/llm/baichuan2-7b/baichuan2/cli_demo.py b/nlp/llm/baichuan2-7b/pytorch/cli_demo.py
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/cli_demo.py
rename to nlp/llm/baichuan2-7b/pytorch/cli_demo.py
diff --git a/nlp/llm/baichuan2-7b/baichuan2/fine-tune/configuration_baichuan.py b/nlp/llm/baichuan2-7b/pytorch/fine-tune/configuration_baichuan.py
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/fine-tune/configuration_baichuan.py
rename to nlp/llm/baichuan2-7b/pytorch/fine-tune/configuration_baichuan.py
diff --git a/nlp/llm/baichuan2-7b/baichuan2/fine-tune/ds_config.json b/nlp/llm/baichuan2-7b/pytorch/fine-tune/ds_config.json
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/fine-tune/ds_config.json
rename to nlp/llm/baichuan2-7b/pytorch/fine-tune/ds_config.json
diff --git a/nlp/llm/baichuan2-7b/baichuan2/fine-tune/fine-tune.py b/nlp/llm/baichuan2-7b/pytorch/fine-tune/fine-tune.py
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/fine-tune/fine-tune.py
rename to nlp/llm/baichuan2-7b/pytorch/fine-tune/fine-tune.py
diff --git a/nlp/llm/baichuan2-7b/baichuan2/fine-tune/get_Baichuan2_model.py b/nlp/llm/baichuan2-7b/pytorch/fine-tune/get_Baichuan2_model.py
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/fine-tune/get_Baichuan2_model.py
rename to nlp/llm/baichuan2-7b/pytorch/fine-tune/get_Baichuan2_model.py
diff --git a/nlp/llm/baichuan2-7b/baichuan2/fine-tune/requirements.txt b/nlp/llm/baichuan2-7b/pytorch/fine-tune/requirements.txt
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/fine-tune/requirements.txt
rename to nlp/llm/baichuan2-7b/pytorch/fine-tune/requirements.txt
diff --git a/nlp/llm/baichuan2-7b/baichuan2/fine-tune/run_sft.sh b/nlp/llm/baichuan2-7b/pytorch/fine-tune/run_sft.sh
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/fine-tune/run_sft.sh
rename to nlp/llm/baichuan2-7b/pytorch/fine-tune/run_sft.sh
diff --git a/nlp/llm/baichuan2-7b/baichuan2/media/checkpoints.jpeg b/nlp/llm/baichuan2-7b/pytorch/media/checkpoints.jpeg
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/media/checkpoints.jpeg
rename to nlp/llm/baichuan2-7b/pytorch/media/checkpoints.jpeg
diff --git a/nlp/llm/baichuan2-7b/baichuan2/requirements.txt b/nlp/llm/baichuan2-7b/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/requirements.txt
rename to nlp/llm/baichuan2-7b/pytorch/requirements.txt
diff --git a/nlp/llm/baichuan2-7b/baichuan2/web_demo.py b/nlp/llm/baichuan2-7b/pytorch/web_demo.py
similarity index 100%
rename from nlp/llm/baichuan2-7b/baichuan2/web_demo.py
rename to nlp/llm/baichuan2-7b/pytorch/web_demo.py
diff --git a/nlp/llm/bloom-7b1/firefly/README.md b/nlp/llm/bloom-7b1/pytorch/README.md
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/README.md
rename to nlp/llm/bloom-7b1/pytorch/README.md
diff --git a/nlp/llm/bloom-7b1/firefly/configs/bloom-sft-full.json b/nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-full.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/bloom-sft-full.json
rename to nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-full.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/bloom-sft-lora.json b/nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-lora.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/bloom-sft-lora.json
rename to nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-lora.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/bloom-sft-ptuning.json b/nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-ptuning.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/bloom-sft-ptuning.json
rename to nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-ptuning.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/bloom-sft-ptuning_v2.json b/nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-ptuning_v2.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/bloom-sft-ptuning_v2.json
rename to nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-ptuning_v2.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/bloom-sft-qlora.json b/nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-qlora.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/bloom-sft-qlora.json
rename to nlp/llm/bloom-7b1/pytorch/configs/bloom-sft-qlora.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z2_config.json b/nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z2_config.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z2_config.json
rename to nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z2_config.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z2_config_bf16.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z2_config_bf16.json
rename to nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z2_config_bf16.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z2_config_offload.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z2_config_offload.json
rename to nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z2_config_offload.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z3_config.json b/nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z3_config.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z3_config.json
rename to nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z3_config.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z3_config_bf16.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z3_config_bf16.json
rename to nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z3_config_bf16.json
diff --git a/nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z3_config_offload.json
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/configs/ds_config/ds_z3_config_offload.json
rename to nlp/llm/bloom-7b1/pytorch/configs/ds_config/ds_z3_config_offload.json
diff --git a/nlp/llm/bloom-7b1/firefly/main.py b/nlp/llm/bloom-7b1/pytorch/main.py
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/main.py
rename to nlp/llm/bloom-7b1/pytorch/main.py
diff --git a/nlp/llm/bloom-7b1/firefly/models/bloom/configuration_bloom.py b/nlp/llm/bloom-7b1/pytorch/models/bloom/configuration_bloom.py
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/models/bloom/configuration_bloom.py
rename to nlp/llm/bloom-7b1/pytorch/models/bloom/configuration_bloom.py
diff --git a/nlp/llm/bloom-7b1/firefly/models/bloom/modeling_bloom.py b/nlp/llm/bloom-7b1/pytorch/models/bloom/modeling_bloom.py
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/models/bloom/modeling_bloom.py
rename to nlp/llm/bloom-7b1/pytorch/models/bloom/modeling_bloom.py
diff --git a/nlp/llm/bloom-7b1/firefly/train.sh b/nlp/llm/bloom-7b1/pytorch/train.sh
similarity index 100%
rename from nlp/llm/bloom-7b1/firefly/train.sh
rename to nlp/llm/bloom-7b1/pytorch/train.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/LICENSE b/nlp/llm/chatglm-6b/pytorch/LICENSE
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/LICENSE
rename to nlp/llm/chatglm-6b/pytorch/LICENSE
diff --git a/nlp/llm/chatglm-6b/deepspeed/MODEL_LICENSE b/nlp/llm/chatglm-6b/pytorch/MODEL_LICENSE
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/MODEL_LICENSE
rename to nlp/llm/chatglm-6b/pytorch/MODEL_LICENSE
diff --git a/nlp/llm/chatglm-6b/deepspeed/PROJECT.md b/nlp/llm/chatglm-6b/pytorch/PROJECT.md
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/PROJECT.md
rename to nlp/llm/chatglm-6b/pytorch/PROJECT.md
diff --git a/nlp/llm/chatglm-6b/deepspeed/README.md b/nlp/llm/chatglm-6b/pytorch/README.md
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/README.md
rename to nlp/llm/chatglm-6b/pytorch/README.md
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/README.md b/nlp/llm/chatglm-6b/pytorch/ptuning/README.md
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/README.md
rename to nlp/llm/chatglm-6b/pytorch/ptuning/README.md
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/README_bi.md b/nlp/llm/chatglm-6b/pytorch/ptuning/README_bi.md
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/README_bi.md
rename to nlp/llm/chatglm-6b/pytorch/ptuning/README_bi.md
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/README_en.md b/nlp/llm/chatglm-6b/pytorch/ptuning/README_en.md
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/README_en.md
rename to nlp/llm/chatglm-6b/pytorch/ptuning/README_en.md
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/arguments.py b/nlp/llm/chatglm-6b/pytorch/ptuning/arguments.py
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/arguments.py
rename to nlp/llm/chatglm-6b/pytorch/ptuning/arguments.py
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/deepspeed.json b/nlp/llm/chatglm-6b/pytorch/ptuning/deepspeed.json
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/deepspeed.json
rename to nlp/llm/chatglm-6b/pytorch/ptuning/deepspeed.json
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/deepspeed_no_cpu_offload.json b/nlp/llm/chatglm-6b/pytorch/ptuning/deepspeed_no_cpu_offload.json
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/deepspeed_no_cpu_offload.json
rename to nlp/llm/chatglm-6b/pytorch/ptuning/deepspeed_no_cpu_offload.json
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/ds_train_finetune.sh b/nlp/llm/chatglm-6b/pytorch/ptuning/ds_train_finetune.sh
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/ds_train_finetune.sh
rename to nlp/llm/chatglm-6b/pytorch/ptuning/ds_train_finetune.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/ds_train_finetune_multi_nodes.sh b/nlp/llm/chatglm-6b/pytorch/ptuning/ds_train_finetune_multi_nodes.sh
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/ds_train_finetune_multi_nodes.sh
rename to nlp/llm/chatglm-6b/pytorch/ptuning/ds_train_finetune_multi_nodes.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/evaluate.sh b/nlp/llm/chatglm-6b/pytorch/ptuning/evaluate.sh
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/evaluate.sh
rename to nlp/llm/chatglm-6b/pytorch/ptuning/evaluate.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/evaluate_finetune.sh b/nlp/llm/chatglm-6b/pytorch/ptuning/evaluate_finetune.sh
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/evaluate_finetune.sh
rename to nlp/llm/chatglm-6b/pytorch/ptuning/evaluate_finetune.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/hostfile b/nlp/llm/chatglm-6b/pytorch/ptuning/hostfile
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/hostfile
rename to nlp/llm/chatglm-6b/pytorch/ptuning/hostfile
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/main.py b/nlp/llm/chatglm-6b/pytorch/ptuning/main.py
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/main.py
rename to nlp/llm/chatglm-6b/pytorch/ptuning/main.py
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/train.sh b/nlp/llm/chatglm-6b/pytorch/ptuning/train.sh
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/train.sh
rename to nlp/llm/chatglm-6b/pytorch/ptuning/train.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/train_chat.sh b/nlp/llm/chatglm-6b/pytorch/ptuning/train_chat.sh
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/train_chat.sh
rename to nlp/llm/chatglm-6b/pytorch/ptuning/train_chat.sh
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/trainer.py b/nlp/llm/chatglm-6b/pytorch/ptuning/trainer.py
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/trainer.py
rename to nlp/llm/chatglm-6b/pytorch/ptuning/trainer.py
diff --git a/nlp/llm/chatglm-6b/deepspeed/ptuning/trainer_seq2seq.py b/nlp/llm/chatglm-6b/pytorch/ptuning/trainer_seq2seq.py
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/ptuning/trainer_seq2seq.py
rename to nlp/llm/chatglm-6b/pytorch/ptuning/trainer_seq2seq.py
diff --git a/nlp/llm/chatglm-6b/deepspeed/requirements.txt b/nlp/llm/chatglm-6b/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/chatglm-6b/deepspeed/requirements.txt
rename to nlp/llm/chatglm-6b/pytorch/requirements.txt
diff --git a/nlp/llm/chatglm2-6b-sft/FAQ.md b/nlp/llm/chatglm2-6b-sft/pytorch/FAQ.md
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/FAQ.md
rename to nlp/llm/chatglm2-6b-sft/pytorch/FAQ.md
diff --git a/nlp/llm/chatglm2-6b-sft/MODEL_LICENSE b/nlp/llm/chatglm2-6b-sft/pytorch/MODEL_LICENSE
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/MODEL_LICENSE
rename to nlp/llm/chatglm2-6b-sft/pytorch/MODEL_LICENSE
diff --git a/nlp/llm/chatglm2-6b-sft/README.md b/nlp/llm/chatglm2-6b-sft/pytorch/README.md
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/README.md
rename to nlp/llm/chatglm2-6b-sft/pytorch/README.md
diff --git a/nlp/llm/chatglm2-6b-sft/README_EN.md b/nlp/llm/chatglm2-6b-sft/pytorch/README_EN.md
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/README_EN.md
rename to nlp/llm/chatglm2-6b-sft/pytorch/README_EN.md
diff --git a/nlp/llm/chatglm2-6b-sft/README_origin.md b/nlp/llm/chatglm2-6b-sft/pytorch/README_origin.md
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/README_origin.md
rename to nlp/llm/chatglm2-6b-sft/pytorch/README_origin.md
diff --git a/nlp/llm/chatglm2-6b-sft/api.py b/nlp/llm/chatglm2-6b-sft/pytorch/api.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/api.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/api.py
diff --git a/nlp/llm/chatglm2-6b-sft/cli_demo.py b/nlp/llm/chatglm2-6b-sft/pytorch/cli_demo.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/cli_demo.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/cli_demo.py
diff --git a/nlp/llm/chatglm2-6b-sft/evaluation/README.md b/nlp/llm/chatglm2-6b-sft/pytorch/evaluation/README.md
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/evaluation/README.md
rename to nlp/llm/chatglm2-6b-sft/pytorch/evaluation/README.md
diff --git a/nlp/llm/chatglm2-6b-sft/evaluation/evaluate_ceval.py b/nlp/llm/chatglm2-6b-sft/pytorch/evaluation/evaluate_ceval.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/evaluation/evaluate_ceval.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/evaluation/evaluate_ceval.py
diff --git a/nlp/llm/chatglm2-6b-sft/openai_api.py b/nlp/llm/chatglm2-6b-sft/pytorch/openai_api.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/openai_api.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/openai_api.py
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/dev.json b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/dev.json
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/dev.json
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/dev.json
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/dev_small.json b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/dev_small.json
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/dev_small.json
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/dev_small.json
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/train.json b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/train.json
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/train.json
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/train.json
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/train_small.json b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/train_small.json
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/AdvertiseGen/train_small.json
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/AdvertiseGen/train_small.json
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/arguments.py b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/arguments.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/arguments.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/arguments.py
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/deepspeed.json b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/deepspeed.json
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/deepspeed.json
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/deepspeed.json
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/ds_train_finetune.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/ds_train_finetune.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/ds_train_finetune.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/ds_train_finetune.sh
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/ds_train_finetune_multinodes.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/ds_train_finetune_multinodes.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/ds_train_finetune_multinodes.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/ds_train_finetune_multinodes.sh
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/evaluate_finetune.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/evaluate_finetune.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/evaluate_finetune.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/evaluate_finetune.sh
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/evaluate_ptuning_v2.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/evaluate_ptuning_v2.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/evaluate_ptuning_v2.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/evaluate_ptuning_v2.sh
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/main.py b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/main.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/main.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/main.py
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/train_chat.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/train_chat.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/train_chat.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/train_chat.sh
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/train_ptuning_v2.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/train_ptuning_v2.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/train_ptuning_v2.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/train_ptuning_v2.sh
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/trainer.py b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/trainer.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/trainer.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/trainer.py
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/trainer_seq2seq.py b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/trainer_seq2seq.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/trainer_seq2seq.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/trainer_seq2seq.py
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/web_demo.py b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/web_demo.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/web_demo.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/web_demo.py
diff --git a/nlp/llm/chatglm2-6b-sft/ptuning/web_demo.sh b/nlp/llm/chatglm2-6b-sft/pytorch/ptuning/web_demo.sh
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/ptuning/web_demo.sh
rename to nlp/llm/chatglm2-6b-sft/pytorch/ptuning/web_demo.sh
diff --git a/nlp/llm/chatglm2-6b-sft/requirements.txt b/nlp/llm/chatglm2-6b-sft/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/requirements.txt
rename to nlp/llm/chatglm2-6b-sft/pytorch/requirements.txt
diff --git a/nlp/llm/chatglm2-6b-sft/resources/WECHAT.md b/nlp/llm/chatglm2-6b-sft/pytorch/resources/WECHAT.md
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/resources/WECHAT.md
rename to nlp/llm/chatglm2-6b-sft/pytorch/resources/WECHAT.md
diff --git a/nlp/llm/chatglm2-6b-sft/resources/cli-demo.png b/nlp/llm/chatglm2-6b-sft/pytorch/resources/cli-demo.png
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/resources/cli-demo.png
rename to nlp/llm/chatglm2-6b-sft/pytorch/resources/cli-demo.png
diff --git a/nlp/llm/chatglm2-6b-sft/resources/knowledge.png b/nlp/llm/chatglm2-6b-sft/pytorch/resources/knowledge.png
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/resources/knowledge.png
rename to nlp/llm/chatglm2-6b-sft/pytorch/resources/knowledge.png
diff --git a/nlp/llm/chatglm2-6b-sft/resources/long-context.png b/nlp/llm/chatglm2-6b-sft/pytorch/resources/long-context.png
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/resources/long-context.png
rename to nlp/llm/chatglm2-6b-sft/pytorch/resources/long-context.png
diff --git a/nlp/llm/chatglm2-6b-sft/resources/math.png b/nlp/llm/chatglm2-6b-sft/pytorch/resources/math.png
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/resources/math.png
rename to nlp/llm/chatglm2-6b-sft/pytorch/resources/math.png
diff --git a/multimodal/GLM-4/pytorch/resources/wechat.jpg b/nlp/llm/chatglm2-6b-sft/pytorch/resources/wechat.jpg
similarity index 100%
rename from multimodal/GLM-4/pytorch/resources/wechat.jpg
rename to nlp/llm/chatglm2-6b-sft/pytorch/resources/wechat.jpg
diff --git a/nlp/llm/chatglm2-6b-sft/utils.py b/nlp/llm/chatglm2-6b-sft/pytorch/utils.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/utils.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/utils.py
diff --git a/nlp/llm/chatglm2-6b-sft/web_demo.py b/nlp/llm/chatglm2-6b-sft/pytorch/web_demo.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/web_demo.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/web_demo.py
diff --git a/nlp/llm/chatglm2-6b-sft/web_demo2.py b/nlp/llm/chatglm2-6b-sft/pytorch/web_demo2.py
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/web_demo2.py
rename to nlp/llm/chatglm2-6b-sft/pytorch/web_demo2.py
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/README.md b/nlp/llm/chatglm3-6b/pytorch/README.md
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/README.md
rename to nlp/llm/chatglm3-6b/pytorch/README.md
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/configs/ds_zero_2.json b/nlp/llm/chatglm3-6b/pytorch/configs/ds_zero_2.json
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/configs/ds_zero_2.json
rename to nlp/llm/chatglm3-6b/pytorch/configs/ds_zero_2.json
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/ds_zero_3.json b/nlp/llm/chatglm3-6b/pytorch/configs/ds_zero_3.json
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/ds_zero_3.json
rename to nlp/llm/chatglm3-6b/pytorch/configs/ds_zero_3.json
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/lora.yaml b/nlp/llm/chatglm3-6b/pytorch/configs/lora.yaml
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/lora.yaml
rename to nlp/llm/chatglm3-6b/pytorch/configs/lora.yaml
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/ptuning_v2.yaml b/nlp/llm/chatglm3-6b/pytorch/configs/ptuning_v2.yaml
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/ptuning_v2.yaml
rename to nlp/llm/chatglm3-6b/pytorch/configs/ptuning_v2.yaml
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/sft.yaml b/nlp/llm/chatglm3-6b/pytorch/configs/sft.yaml
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/sft.yaml
rename to nlp/llm/chatglm3-6b/pytorch/configs/sft.yaml
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/finetune_hf.py b/nlp/llm/chatglm3-6b/pytorch/finetune_hf.py
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/finetune_hf.py
rename to nlp/llm/chatglm3-6b/pytorch/finetune_hf.py
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/inference_hf.py b/nlp/llm/chatglm3-6b/pytorch/inference_hf.py
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/inference_hf.py
rename to nlp/llm/chatglm3-6b/pytorch/inference_hf.py
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/models/modeling_chatglm.py b/nlp/llm/chatglm3-6b/pytorch/models/modeling_chatglm.py
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/models/modeling_chatglm.py
rename to nlp/llm/chatglm3-6b/pytorch/models/modeling_chatglm.py
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/process_data.py b/nlp/llm/chatglm3-6b/pytorch/process_data.py
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/process_data.py
rename to nlp/llm/chatglm3-6b/pytorch/process_data.py
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/requirements.txt b/nlp/llm/chatglm3-6b/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/requirements.txt
rename to nlp/llm/chatglm3-6b/pytorch/requirements.txt
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/run.sh b/nlp/llm/chatglm3-6b/pytorch/run.sh
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/run.sh
rename to nlp/llm/chatglm3-6b/pytorch/run.sh
diff --git a/nlp/llm/deepseek_moe_7b/colossalai/README.md b/nlp/llm/deepseek_moe_7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/deepseek_moe_7b/colossalai/README.md
rename to nlp/llm/deepseek_moe_7b/pytorch/README.md
diff --git a/multimodal/GLM-4/pytorch/LICENSE b/nlp/llm/glm-4/pytorch/LICENSE
similarity index 100%
rename from multimodal/GLM-4/pytorch/LICENSE
rename to nlp/llm/glm-4/pytorch/LICENSE
diff --git a/multimodal/GLM-4/pytorch/README.md b/nlp/llm/glm-4/pytorch/README.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/README.md
rename to nlp/llm/glm-4/pytorch/README.md
diff --git a/multimodal/GLM-4/pytorch/README_en.md b/nlp/llm/glm-4/pytorch/README_en.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/README_en.md
rename to nlp/llm/glm-4/pytorch/README_en.md
diff --git a/multimodal/GLM-4/pytorch/basic_demo/README.md b/nlp/llm/glm-4/pytorch/basic_demo/README.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/README.md
rename to nlp/llm/glm-4/pytorch/basic_demo/README.md
diff --git a/multimodal/GLM-4/pytorch/basic_demo/README_en.md b/nlp/llm/glm-4/pytorch/basic_demo/README_en.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/README_en.md
rename to nlp/llm/glm-4/pytorch/basic_demo/README_en.md
diff --git a/multimodal/GLM-4/pytorch/basic_demo/glm4v_server.py b/nlp/llm/glm-4/pytorch/basic_demo/glm4v_server.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/glm4v_server.py
rename to nlp/llm/glm-4/pytorch/basic_demo/glm4v_server.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/glm_server.py b/nlp/llm/glm-4/pytorch/basic_demo/glm_server.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/glm_server.py
rename to nlp/llm/glm-4/pytorch/basic_demo/glm_server.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/openai_api_request.py b/nlp/llm/glm-4/pytorch/basic_demo/openai_api_request.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/openai_api_request.py
rename to nlp/llm/glm-4/pytorch/basic_demo/openai_api_request.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/openai_api_server.py b/nlp/llm/glm-4/pytorch/basic_demo/openai_api_server.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/openai_api_server.py
rename to nlp/llm/glm-4/pytorch/basic_demo/openai_api_server.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/requirements.txt b/nlp/llm/glm-4/pytorch/basic_demo/requirements.txt
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/requirements.txt
rename to nlp/llm/glm-4/pytorch/basic_demo/requirements.txt
diff --git a/multimodal/GLM-4/pytorch/basic_demo/trans_batch_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/trans_batch_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/trans_batch_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/trans_batch_demo.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/trans_cli_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/trans_cli_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/trans_cli_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/trans_cli_demo.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/trans_cli_vision_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/trans_cli_vision_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/trans_cli_vision_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/trans_cli_vision_demo.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/trans_stress_test.py b/nlp/llm/glm-4/pytorch/basic_demo/trans_stress_test.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/trans_stress_test.py
rename to nlp/llm/glm-4/pytorch/basic_demo/trans_stress_test.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/trans_web_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/trans_web_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/trans_web_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/trans_web_demo.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/trans_web_vision_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/trans_web_vision_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/trans_web_vision_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/trans_web_vision_demo.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/vllm_cli_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/vllm_cli_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/vllm_cli_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/vllm_cli_demo.py
diff --git a/multimodal/GLM-4/pytorch/basic_demo/vllm_cli_vision_demo.py b/nlp/llm/glm-4/pytorch/basic_demo/vllm_cli_vision_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/basic_demo/vllm_cli_vision_demo.py
rename to nlp/llm/glm-4/pytorch/basic_demo/vllm_cli_vision_demo.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/.gitignore b/nlp/llm/glm-4/pytorch/composite_demo/.gitignore
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/.gitignore
rename to nlp/llm/glm-4/pytorch/composite_demo/.gitignore
diff --git a/multimodal/GLM-4/pytorch/composite_demo/README.md b/nlp/llm/glm-4/pytorch/composite_demo/README.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/README.md
rename to nlp/llm/glm-4/pytorch/composite_demo/README.md
diff --git a/multimodal/GLM-4/pytorch/composite_demo/README_en.md b/nlp/llm/glm-4/pytorch/composite_demo/README_en.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/README_en.md
rename to nlp/llm/glm-4/pytorch/composite_demo/README_en.md
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/.gitignore b/nlp/llm/glm-4/pytorch/composite_demo/browser/.gitignore
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/.gitignore
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/.gitignore
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/package-lock.json b/nlp/llm/glm-4/pytorch/composite_demo/browser/package-lock.json
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/package-lock.json
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/package-lock.json
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/package.json b/nlp/llm/glm-4/pytorch/composite_demo/browser/package.json
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/package.json
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/package.json
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/pnpm-lock.yaml b/nlp/llm/glm-4/pytorch/composite_demo/browser/pnpm-lock.yaml
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/pnpm-lock.yaml
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/pnpm-lock.yaml
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/src/browser.ts b/nlp/llm/glm-4/pytorch/composite_demo/browser/src/browser.ts
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/src/browser.ts
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/src/browser.ts
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/src/config.ts b/nlp/llm/glm-4/pytorch/composite_demo/browser/src/config.ts
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/src/config.ts
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/src/config.ts
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/src/server.ts b/nlp/llm/glm-4/pytorch/composite_demo/browser/src/server.ts
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/src/server.ts
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/src/server.ts
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/src/types.ts b/nlp/llm/glm-4/pytorch/composite_demo/browser/src/types.ts
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/src/types.ts
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/src/types.ts
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/src/utils.ts b/nlp/llm/glm-4/pytorch/composite_demo/browser/src/utils.ts
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/src/utils.ts
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/src/utils.ts
diff --git a/multimodal/GLM-4/pytorch/composite_demo/browser/tsconfig.json b/nlp/llm/glm-4/pytorch/composite_demo/browser/tsconfig.json
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/browser/tsconfig.json
rename to nlp/llm/glm-4/pytorch/composite_demo/browser/tsconfig.json
diff --git a/multimodal/GLM-4/pytorch/composite_demo/requirements.txt b/nlp/llm/glm-4/pytorch/composite_demo/requirements.txt
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/requirements.txt
rename to nlp/llm/glm-4/pytorch/composite_demo/requirements.txt
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/client.py b/nlp/llm/glm-4/pytorch/composite_demo/src/client.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/client.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/client.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/clients/hf.py b/nlp/llm/glm-4/pytorch/composite_demo/src/clients/hf.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/clients/hf.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/clients/hf.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/clients/openai.py b/nlp/llm/glm-4/pytorch/composite_demo/src/clients/openai.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/clients/openai.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/clients/openai.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/clients/vllm.py b/nlp/llm/glm-4/pytorch/composite_demo/src/clients/vllm.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/clients/vllm.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/clients/vllm.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/conversation.py b/nlp/llm/glm-4/pytorch/composite_demo/src/conversation.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/conversation.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/conversation.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/main.py b/nlp/llm/glm-4/pytorch/composite_demo/src/main.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/main.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/main.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/tools/browser.py b/nlp/llm/glm-4/pytorch/composite_demo/src/tools/browser.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/tools/browser.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/tools/browser.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/tools/cogview.py b/nlp/llm/glm-4/pytorch/composite_demo/src/tools/cogview.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/tools/cogview.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/tools/cogview.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/tools/config.py b/nlp/llm/glm-4/pytorch/composite_demo/src/tools/config.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/tools/config.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/tools/config.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/tools/interface.py b/nlp/llm/glm-4/pytorch/composite_demo/src/tools/interface.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/tools/interface.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/tools/interface.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/tools/python.py b/nlp/llm/glm-4/pytorch/composite_demo/src/tools/python.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/tools/python.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/tools/python.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/tools/tool_registry.py b/nlp/llm/glm-4/pytorch/composite_demo/src/tools/tool_registry.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/tools/tool_registry.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/tools/tool_registry.py
diff --git a/multimodal/GLM-4/pytorch/composite_demo/src/utils.py b/nlp/llm/glm-4/pytorch/composite_demo/src/utils.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/composite_demo/src/utils.py
rename to nlp/llm/glm-4/pytorch/composite_demo/src/utils.py
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/README.md b/nlp/llm/glm-4/pytorch/finetune_demo/README.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/README.md
rename to nlp/llm/glm-4/pytorch/finetune_demo/README.md
diff --git a/nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/ds_zero_2.json b/nlp/llm/glm-4/pytorch/finetune_demo/configs/ds_zero_2.json
similarity index 100%
rename from nlp/llm/chatglm3-6b/deepspeed/finetune_demo/configs/ds_zero_2.json
rename to nlp/llm/glm-4/pytorch/finetune_demo/configs/ds_zero_2.json
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/configs/ds_zero_3.json b/nlp/llm/glm-4/pytorch/finetune_demo/configs/ds_zero_3.json
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/configs/ds_zero_3.json
rename to nlp/llm/glm-4/pytorch/finetune_demo/configs/ds_zero_3.json
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/configs/lora.yaml b/nlp/llm/glm-4/pytorch/finetune_demo/configs/lora.yaml
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/configs/lora.yaml
rename to nlp/llm/glm-4/pytorch/finetune_demo/configs/lora.yaml
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/configs/ptuning_v2.yaml b/nlp/llm/glm-4/pytorch/finetune_demo/configs/ptuning_v2.yaml
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/configs/ptuning_v2.yaml
rename to nlp/llm/glm-4/pytorch/finetune_demo/configs/ptuning_v2.yaml
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/configs/sft.yaml b/nlp/llm/glm-4/pytorch/finetune_demo/configs/sft.yaml
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/configs/sft.yaml
rename to nlp/llm/glm-4/pytorch/finetune_demo/configs/sft.yaml
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/data/kto_en_demo.json b/nlp/llm/glm-4/pytorch/finetune_demo/data/kto_en_demo.json
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/data/kto_en_demo.json
rename to nlp/llm/glm-4/pytorch/finetune_demo/data/kto_en_demo.json
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/finetune.py b/nlp/llm/glm-4/pytorch/finetune_demo/finetune.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/finetune.py
rename to nlp/llm/glm-4/pytorch/finetune_demo/finetune.py
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/finetune_vision.py b/nlp/llm/glm-4/pytorch/finetune_demo/finetune_vision.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/finetune_vision.py
rename to nlp/llm/glm-4/pytorch/finetune_demo/finetune_vision.py
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/inference.py b/nlp/llm/glm-4/pytorch/finetune_demo/inference.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/inference.py
rename to nlp/llm/glm-4/pytorch/finetune_demo/inference.py
diff --git a/multimodal/GLM-4/pytorch/finetune_demo/requirements.txt b/nlp/llm/glm-4/pytorch/finetune_demo/requirements.txt
similarity index 100%
rename from multimodal/GLM-4/pytorch/finetune_demo/requirements.txt
rename to nlp/llm/glm-4/pytorch/finetune_demo/requirements.txt
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/itrex/README.md b/nlp/llm/glm-4/pytorch/intel_device_demo/itrex/README.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/itrex/README.md
rename to nlp/llm/glm-4/pytorch/intel_device_demo/itrex/README.md
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/itrex/README_en.md b/nlp/llm/glm-4/pytorch/intel_device_demo/itrex/README_en.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/itrex/README_en.md
rename to nlp/llm/glm-4/pytorch/intel_device_demo/itrex/README_en.md
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/itrex/itrex_cli_demo.py b/nlp/llm/glm-4/pytorch/intel_device_demo/itrex/itrex_cli_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/itrex/itrex_cli_demo.py
rename to nlp/llm/glm-4/pytorch/intel_device_demo/itrex/itrex_cli_demo.py
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/itrex/requirements.txt b/nlp/llm/glm-4/pytorch/intel_device_demo/itrex/requirements.txt
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/itrex/requirements.txt
rename to nlp/llm/glm-4/pytorch/intel_device_demo/itrex/requirements.txt
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/openvino/README.md b/nlp/llm/glm-4/pytorch/intel_device_demo/openvino/README.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/openvino/README.md
rename to nlp/llm/glm-4/pytorch/intel_device_demo/openvino/README.md
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/openvino/README_en.md b/nlp/llm/glm-4/pytorch/intel_device_demo/openvino/README_en.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/openvino/README_en.md
rename to nlp/llm/glm-4/pytorch/intel_device_demo/openvino/README_en.md
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/openvino/convert.py b/nlp/llm/glm-4/pytorch/intel_device_demo/openvino/convert.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/openvino/convert.py
rename to nlp/llm/glm-4/pytorch/intel_device_demo/openvino/convert.py
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/openvino/openvino_cli_demo.py b/nlp/llm/glm-4/pytorch/intel_device_demo/openvino/openvino_cli_demo.py
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/openvino/openvino_cli_demo.py
rename to nlp/llm/glm-4/pytorch/intel_device_demo/openvino/openvino_cli_demo.py
diff --git a/multimodal/GLM-4/pytorch/intel_device_demo/openvino/requirements.txt b/nlp/llm/glm-4/pytorch/intel_device_demo/openvino/requirements.txt
similarity index 100%
rename from multimodal/GLM-4/pytorch/intel_device_demo/openvino/requirements.txt
rename to nlp/llm/glm-4/pytorch/intel_device_demo/openvino/requirements.txt
diff --git a/multimodal/GLM-4/pytorch/resources/WECHAT.md b/nlp/llm/glm-4/pytorch/resources/WECHAT.md
similarity index 100%
rename from multimodal/GLM-4/pytorch/resources/WECHAT.md
rename to nlp/llm/glm-4/pytorch/resources/WECHAT.md
diff --git a/multimodal/GLM-4/pytorch/resources/eval_needle.jpeg b/nlp/llm/glm-4/pytorch/resources/eval_needle.jpeg
similarity index 100%
rename from multimodal/GLM-4/pytorch/resources/eval_needle.jpeg
rename to nlp/llm/glm-4/pytorch/resources/eval_needle.jpeg
diff --git a/multimodal/GLM-4/pytorch/resources/longbench.png b/nlp/llm/glm-4/pytorch/resources/longbench.png
similarity index 100%
rename from multimodal/GLM-4/pytorch/resources/longbench.png
rename to nlp/llm/glm-4/pytorch/resources/longbench.png
diff --git a/nlp/llm/chatglm2-6b-sft/resources/wechat.jpg b/nlp/llm/glm-4/pytorch/resources/wechat.jpg
similarity index 100%
rename from nlp/llm/chatglm2-6b-sft/resources/wechat.jpg
rename to nlp/llm/glm-4/pytorch/resources/wechat.jpg
diff --git a/nlp/llm/llama-7b/colossalai/README.md b/nlp/llm/llama-7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama-7b/colossalai/README.md
rename to nlp/llm/llama-7b/pytorch/README.md
diff --git a/nlp/llm/llama2-13b/megatron-deepspeed/README.md b/nlp/llm/llama2-13b/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama2-13b/megatron-deepspeed/README.md
rename to nlp/llm/llama2-13b/pytorch/README.md
diff --git a/nlp/llm/llama2-34b/megatron-deepspeed/README.md b/nlp/llm/llama2-34b/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama2-34b/megatron-deepspeed/README.md
rename to nlp/llm/llama2-34b/pytorch/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/CODEOWNERS b/nlp/llm/llama2-7b/pytorch/CODEOWNERS
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/CODEOWNERS
rename to nlp/llm/llama2-7b/pytorch/CODEOWNERS
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/ILUVATAR.md b/nlp/llm/llama2-7b/pytorch/ILUVATAR.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/ILUVATAR.md
rename to nlp/llm/llama2-7b/pytorch/ILUVATAR.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/LICENSE b/nlp/llm/llama2-7b/pytorch/LICENSE
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/LICENSE
rename to nlp/llm/llama2-7b/pytorch/LICENSE
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/MANIFEST.in b/nlp/llm/llama2-7b/pytorch/MANIFEST.in
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/MANIFEST.in
rename to nlp/llm/llama2-7b/pytorch/MANIFEST.in
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/README.md b/nlp/llm/llama2-7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/README.md
rename to nlp/llm/llama2-7b/pytorch/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/README_origin.md b/nlp/llm/llama2-7b/pytorch/README_origin.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/README_origin.md
rename to nlp/llm/llama2-7b/pytorch/README_origin.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/SECURITY.md b/nlp/llm/llama2-7b/pytorch/SECURITY.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/SECURITY.md
rename to nlp/llm/llama2-7b/pytorch/SECURITY.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/build_megatron-deepspeed.sh b/nlp/llm/llama2-7b/pytorch/build_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/build_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b/pytorch/build_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/clean_megatron-deepspeed.sh b/nlp/llm/llama2-7b/pytorch/clean_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/clean_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b/pytorch/clean_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/dataset/README.md b/nlp/llm/llama2-7b/pytorch/dataset/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/dataset/README.md
rename to nlp/llm/llama2-7b/pytorch/dataset/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_books.sh b/nlp/llm/llama2-7b/pytorch/dataset/download_books.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_books.sh
rename to nlp/llm/llama2-7b/pytorch/dataset/download_books.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_ckpt.sh b/nlp/llm/llama2-7b/pytorch/dataset/download_ckpt.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_ckpt.sh
rename to nlp/llm/llama2-7b/pytorch/dataset/download_ckpt.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_dataset.sh b/nlp/llm/llama2-7b/pytorch/dataset/download_dataset.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_dataset.sh
rename to nlp/llm/llama2-7b/pytorch/dataset/download_dataset.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_vocab.sh b/nlp/llm/llama2-7b/pytorch/dataset/download_vocab.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/dataset/download_vocab.sh
rename to nlp/llm/llama2-7b/pytorch/dataset/download_vocab.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/README.md b/nlp/llm/llama2-7b/pytorch/examples/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/README.md
rename to nlp/llm/llama2-7b/pytorch/examples/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/run_aquila_7b_node1_bf16.sh b/nlp/llm/llama2-7b/pytorch/examples/aquila/run_aquila_7b_node1_bf16.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/run_aquila_7b_node1_bf16.sh
rename to nlp/llm/llama2-7b/pytorch/examples/aquila/run_aquila_7b_node1_bf16.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/run_aquila_7b_node2_bf16.sh b/nlp/llm/llama2-7b/pytorch/examples/aquila/run_aquila_7b_node2_bf16.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/run_aquila_7b_node2_bf16.sh
rename to nlp/llm/llama2-7b/pytorch/examples/aquila/run_aquila_7b_node2_bf16.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/tokenizer/merges.txt b/nlp/llm/llama2-7b/pytorch/examples/aquila/tokenizer/merges.txt
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/tokenizer/merges.txt
rename to nlp/llm/llama2-7b/pytorch/examples/aquila/tokenizer/merges.txt
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/tokenizer/vocab.json b/nlp/llm/llama2-7b/pytorch/examples/aquila/tokenizer/vocab.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/aquila/tokenizer/vocab.json
rename to nlp/llm/llama2-7b/pytorch/examples/aquila/tokenizer/vocab.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/README.md b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/README.md
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/annotations/filter-selfgeneration.py b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/annotations/filter-selfgeneration.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/annotations/filter-selfgeneration.py
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/annotations/filter-selfgeneration.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/annotations/perspective_api_annotate.py b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/annotations/perspective_api_annotate.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/annotations/perspective_api_annotate.py
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/annotations/perspective_api_annotate.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/annotations/preprocess.sh b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/annotations/preprocess.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/annotations/preprocess.sh
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/annotations/preprocess.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/finetune_gpt.py b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/finetune_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/finetune_gpt.py
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/finetune_gpt.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/generate-1.3b.sh b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/generate-1.3b.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/generate-1.3b.sh
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/generate-1.3b.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/generate_samples_gpt.py b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/generate_samples_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/generate_samples_gpt.py
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/generate_samples_gpt.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/perspective_api.py b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/perspective_api.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/perspective_api.py
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/perspective_api.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh b/nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
rename to nlp/llm/llama2-7b/pytorch/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/evaluate_retriever_nq.sh b/nlp/llm/llama2-7b/pytorch/examples/evaluate_retriever_nq.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/evaluate_retriever_nq.sh
rename to nlp/llm/llama2-7b/pytorch/examples/evaluate_retriever_nq.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/evaluate_zeroshot_gpt.sh b/nlp/llm/llama2-7b/pytorch/examples/evaluate_zeroshot_gpt.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/evaluate_zeroshot_gpt.sh
rename to nlp/llm/llama2-7b/pytorch/examples/evaluate_zeroshot_gpt.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/finetune_mnli_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples/finetune_mnli_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/finetune_mnli_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples/finetune_mnli_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/finetune_race_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples/finetune_race_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/finetune_race_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples/finetune_race_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/finetune_retriever_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples/finetune_retriever_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/finetune_retriever_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples/finetune_retriever_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/run_llama2_13b_node2.sh b/nlp/llm/llama2-7b/pytorch/examples/llama2/run_llama2_13b_node2.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/run_llama2_13b_node2.sh
rename to nlp/llm/llama2-7b/pytorch/examples/llama2/run_llama2_13b_node2.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/run_llama2_70b_node4.sh b/nlp/llm/llama2-7b/pytorch/examples/llama2/run_llama2_70b_node4.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/run_llama2_70b_node4.sh
rename to nlp/llm/llama2-7b/pytorch/examples/llama2/run_llama2_70b_node4.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/run_llama2_7b_node1.sh b/nlp/llm/llama2-7b/pytorch/examples/llama2/run_llama2_7b_node1.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/run_llama2_7b_node1.sh
rename to nlp/llm/llama2-7b/pytorch/examples/llama2/run_llama2_7b_node1.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/tokenizer/merges.txt b/nlp/llm/llama2-7b/pytorch/examples/llama2/tokenizer/merges.txt
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/tokenizer/merges.txt
rename to nlp/llm/llama2-7b/pytorch/examples/llama2/tokenizer/merges.txt
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/tokenizer/tokenizer.model b/nlp/llm/llama2-7b/pytorch/examples/llama2/tokenizer/tokenizer.model
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/tokenizer/tokenizer.model
rename to nlp/llm/llama2-7b/pytorch/examples/llama2/tokenizer/tokenizer.model
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/tokenizer/vocab.json b/nlp/llm/llama2-7b/pytorch/examples/llama2/tokenizer/vocab.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/llama2/tokenizer/vocab.json
rename to nlp/llm/llama2-7b/pytorch/examples/llama2/tokenizer/vocab.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/merge_mp_bert.sh b/nlp/llm/llama2-7b/pytorch/examples/merge_mp_bert.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/merge_mp_bert.sh
rename to nlp/llm/llama2-7b/pytorch/examples/merge_mp_bert.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/README.md b/nlp/llm/llama2-7b/pytorch/examples/msdp/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/README.md
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/data_processing.sh b/nlp/llm/llama2-7b/pytorch/examples/msdp/data_processing.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/data_processing.sh
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/data_processing.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/eval_knwl_generation.sh b/nlp/llm/llama2-7b/pytorch/examples/msdp/eval_knwl_generation.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/eval_knwl_generation.sh
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/eval_knwl_generation.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/eval_resp_generation.sh b/nlp/llm/llama2-7b/pytorch/examples/msdp/eval_resp_generation.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/eval_resp_generation.sh
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/eval_resp_generation.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/prep_resp_gen.sh b/nlp/llm/llama2-7b/pytorch/examples/msdp/prep_resp_gen.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/prep_resp_gen.sh
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/prep_resp_gen.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/prompt_knwl_gen.sh b/nlp/llm/llama2-7b/pytorch/examples/msdp/prompt_knwl_gen.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/prompt_knwl_gen.sh
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/prompt_knwl_gen.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/prompt_resp_gen.sh b/nlp/llm/llama2-7b/pytorch/examples/msdp/prompt_resp_gen.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/msdp/prompt_resp_gen.sh
rename to nlp/llm/llama2-7b/pytorch/examples/msdp/prompt_resp_gen.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_bert.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_bert.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_bert.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_bert.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_bert_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_bert_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_bert_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_bert_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_bert_distributed_with_mp.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_bert_distributed_with_mp.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_bert_distributed_with_mp.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_bert_distributed_with_mp.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt3_175B.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt3_175B.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt3_175B.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt3_175B.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt_distributed_with_mp.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt_distributed_with_mp.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_gpt_distributed_with_mp.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_gpt_distributed_with_mp.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_ict.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_ict.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_ict.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_ict.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_t5.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_t5.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_t5.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_t5.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_t5_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_t5_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_t5_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_t5_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_t5_distributed_with_mp.sh b/nlp/llm/llama2-7b/pytorch/examples/pretrain_t5_distributed_with_mp.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/pretrain_t5_distributed_with_mp.sh
rename to nlp/llm/llama2-7b/pytorch/examples/pretrain_t5_distributed_with_mp.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/run_text_generation_server_345M.sh b/nlp/llm/llama2-7b/pytorch/examples/run_text_generation_server_345M.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/run_text_generation_server_345M.sh
rename to nlp/llm/llama2-7b/pytorch/examples/run_text_generation_server_345M.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/run_text_generation_server_345M_8_tensor_parallel.sh b/nlp/llm/llama2-7b/pytorch/examples/run_text_generation_server_345M_8_tensor_parallel.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/run_text_generation_server_345M_8_tensor_parallel.sh
rename to nlp/llm/llama2-7b/pytorch/examples/run_text_generation_server_345M_8_tensor_parallel.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/CONFIG.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/CONFIG.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/CONFIG.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/CONFIG.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/README.md b/nlp/llm/llama2-7b/pytorch/examples/sc21/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/README.md
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/SBATCH.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/SBATCH.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/SBATCH.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/SBATCH.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/SRUN.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/SRUN.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/SRUN.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/SRUN.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_11.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_11.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_11.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_11.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_12.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_12.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_12.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_12.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_13.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_13.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_13.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_13.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_14.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_14.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_14.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_14.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_15.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_15.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_15.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_15.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_16.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_16.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_16.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_16.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_17.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_17.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_17.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_17.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_18.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_18.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_figure_18.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_figure_18.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_table_1.sh b/nlp/llm/llama2-7b/pytorch/examples/sc21/run_table_1.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples/sc21/run_table_1.sh
rename to nlp/llm/llama2-7b/pytorch/examples/sc21/run_table_1.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_evalharness.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_evalharness.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_evalharness.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_evalharness.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/readme_evalharness.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/readme_evalharness.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/MoE/readme_evalharness.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/MoE/readme_evalharness.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/run-175b.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/run-175b.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/run-175b.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/run-175b.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/run-1t.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/run-1t.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/run-1t.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/run-1t.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/run-benchmark-model.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/run-benchmark-model.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azure/run-benchmark-model.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azure/run-benchmark-model.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/Dockerfile.dockerfile b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/Dockerfile.dockerfile
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/Dockerfile.dockerfile
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/Dockerfile.dockerfile
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/aml_submit.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/aml_submit.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/aml_submit.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/aml_submit.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/prepare_dataset.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/prepare_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/azureml/prepare_dataset.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/azureml/prepare_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/prepare_pile_data.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/prepare_pile_data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/bert_with_pile/prepare_pile_data.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/bert_with_pile/prepare_pile_data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_evalharness.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_evalharness.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_evalharness.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_evalharness.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_train.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_train.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_train.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_train.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/analyze_data.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/analyze_data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/analyze_data.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/analyze_data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/generate_text.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/generate_text.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/generate_text.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/generate_text.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/pretrain_llama2_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/pretrain_llama2_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/pretrain_llama2_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/pretrain_llama2_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/pretrain_llama_distributed.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/pretrain_llama_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/pretrain_llama_distributed.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/pretrain_llama_distributed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/run_deepspeed_example.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/run_deepspeed_example.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/run_deepspeed_example.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/run_deepspeed_example.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/README.md b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/README.md
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh b/nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh
rename to nlp/llm/llama2-7b/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/images/Achieved_petaFLOPs.png b/nlp/llm/llama2-7b/pytorch/images/Achieved_petaFLOPs.png
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/images/Achieved_petaFLOPs.png
rename to nlp/llm/llama2-7b/pytorch/images/Achieved_petaFLOPs.png
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/images/cases_april2021.png b/nlp/llm/llama2-7b/pytorch/images/cases_april2021.png
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/images/cases_april2021.png
rename to nlp/llm/llama2-7b/pytorch/images/cases_april2021.png
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/install_megatron-deepspeed.sh b/nlp/llm/llama2-7b/pytorch/install_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/install_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b/pytorch/install_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/arguments.py b/nlp/llm/llama2-7b/pytorch/megatron/arguments.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/arguments.py
rename to nlp/llm/llama2-7b/pytorch/megatron/arguments.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/checkpointing.py b/nlp/llm/llama2-7b/pytorch/megatron/checkpointing.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/checkpointing.py
rename to nlp/llm/llama2-7b/pytorch/megatron/checkpointing.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/README.md b/nlp/llm/llama2-7b/pytorch/megatron/core/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/README.md
rename to nlp/llm/llama2-7b/pytorch/megatron/core/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/enums.py b/nlp/llm/llama2-7b/pytorch/megatron/core/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/enums.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/enums.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/fusions/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/fusions/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_bias_dropout.py b/nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_bias_dropout.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_bias_dropout.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_bias_dropout.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_bias_gelu.py b/nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_bias_gelu.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_bias_gelu.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_bias_gelu.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_layer_norm.py b/nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_layer_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_layer_norm.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_layer_norm.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_softmax.py b/nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_softmax.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/fusions/fused_softmax.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/fusions/fused_softmax.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/model_parallel_config.py b/nlp/llm/llama2-7b/pytorch/megatron/core/model_parallel_config.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/model_parallel_config.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/model_parallel_config.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/models/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/models/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/gpt/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/models/gpt/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/gpt/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/models/gpt/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/gpt/gpt_embedding.py b/nlp/llm/llama2-7b/pytorch/megatron/core/models/gpt/gpt_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/gpt/gpt_embedding.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/models/gpt/gpt_embedding.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/gpt/gpt_model.py b/nlp/llm/llama2-7b/pytorch/megatron/core/models/gpt/gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/models/gpt/gpt_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/models/gpt/gpt_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/package_info.py b/nlp/llm/llama2-7b/pytorch/megatron/core/package_info.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/package_info.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/package_info.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/parallel_state.py b/nlp/llm/llama2-7b/pytorch/megatron/core/parallel_state.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/parallel_state.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/parallel_state.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/pipeline_parallel/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/pipeline_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/pipeline_parallel/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/pipeline_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/pipeline_parallel/p2p_communication.py b/nlp/llm/llama2-7b/pytorch/megatron/core/pipeline_parallel/p2p_communication.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/pipeline_parallel/p2p_communication.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/pipeline_parallel/p2p_communication.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/pipeline_parallel/schedules.py b/nlp/llm/llama2-7b/pytorch/megatron/core/pipeline_parallel/schedules.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/pipeline_parallel/schedules.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/pipeline_parallel/schedules.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/requirements.txt b/nlp/llm/llama2-7b/pytorch/megatron/core/requirements.txt
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/requirements.txt
rename to nlp/llm/llama2-7b/pytorch/megatron/core/requirements.txt
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/sequence_parallel/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/sequence_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/sequence_parallel/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/sequence_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/sequence_parallel/cross_entropy.py b/nlp/llm/llama2-7b/pytorch/megatron/core/sequence_parallel/cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/sequence_parallel/cross_entropy.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/sequence_parallel/cross_entropy.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/cross_entropy.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/cross_entropy.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/cross_entropy.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/data.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/data.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/layers.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/layers.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/layers.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/layers.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/mappings.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/mappings.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/mappings.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/mappings.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/random.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/random.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/random.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/random.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/utils.py b/nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/tensor_parallel/utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/tensor_parallel/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/attention.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/attention.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/attention.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/attention.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/core_attention.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/core_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/core_attention.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/core_attention.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/custom_layers/transformer_engine.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/custom_layers/transformer_engine.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/custom_layers/transformer_engine.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/custom_layers/transformer_engine.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/enums.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/enums.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/enums.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/mlp.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/mlp.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/mlp.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/mlp.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/module.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/module.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/module.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/module.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/transformer_block.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/transformer_block.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/transformer_block.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/transformer_block.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/transformer_config.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/transformer_config.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/transformer_config.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/transformer_config.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/transformer_layer.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/transformer_layer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/transformer_layer.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/transformer_layer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/utils.py b/nlp/llm/llama2-7b/pytorch/megatron/core/transformer/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/transformer/utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/transformer/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/utils.py b/nlp/llm/llama2-7b/pytorch/megatron/core/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/core/utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/core/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/Makefile b/nlp/llm/llama2-7b/pytorch/megatron/data/Makefile
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/Makefile
rename to nlp/llm/llama2-7b/pytorch/megatron/data/Makefile
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/data/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/autoaugment.py b/nlp/llm/llama2-7b/pytorch/megatron/data/autoaugment.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/autoaugment.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/autoaugment.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/bert_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/bert_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/bert_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/bert_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/biencoder_dataset_utils.py b/nlp/llm/llama2-7b/pytorch/megatron/data/biencoder_dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/biencoder_dataset_utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/biencoder_dataset_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/blendable_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/blendable_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/blendable_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/blendable_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/data_samplers.py b/nlp/llm/llama2-7b/pytorch/megatron/data/data_samplers.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/data_samplers.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/data_samplers.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/dataset_utils.py b/nlp/llm/llama2-7b/pytorch/megatron/data/dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/dataset_utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/dataset_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/gpt_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/gpt_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/gpt_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/gpt_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/helpers.cpp b/nlp/llm/llama2-7b/pytorch/megatron/data/helpers.cpp
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/helpers.cpp
rename to nlp/llm/llama2-7b/pytorch/megatron/data/helpers.cpp
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/ict_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/ict_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/ict_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/ict_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/image_folder.py b/nlp/llm/llama2-7b/pytorch/megatron/data/image_folder.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/image_folder.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/image_folder.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/indexed_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/indexed_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/indexed_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/orqa_wiki_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/orqa_wiki_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/orqa_wiki_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/orqa_wiki_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/realm_dataset_utils.py b/nlp/llm/llama2-7b/pytorch/megatron/data/realm_dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/realm_dataset_utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/realm_dataset_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/realm_index.py b/nlp/llm/llama2-7b/pytorch/megatron/data/realm_index.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/realm_index.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/realm_index.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/t5_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/t5_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/t5_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/t5_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/test/test_indexed_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/test/test_indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/test/test_indexed_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/test/test_indexed_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/test/test_preprocess_data.sh b/nlp/llm/llama2-7b/pytorch/megatron/data/test/test_preprocess_data.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/test/test_preprocess_data.sh
rename to nlp/llm/llama2-7b/pytorch/megatron/data/test/test_preprocess_data.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/vit_dataset.py b/nlp/llm/llama2-7b/pytorch/megatron/data/vit_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/data/vit_dataset.py
rename to nlp/llm/llama2-7b/pytorch/megatron/data/vit_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/dist_signal_handler.py b/nlp/llm/llama2-7b/pytorch/megatron/dist_signal_handler.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/dist_signal_handler.py
rename to nlp/llm/llama2-7b/pytorch/megatron/dist_signal_handler.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/enums.py b/nlp/llm/llama2-7b/pytorch/megatron/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/enums.py
rename to nlp/llm/llama2-7b/pytorch/megatron/enums.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fp16_deprecated/loss_scaler.py b/nlp/llm/llama2-7b/pytorch/megatron/fp16_deprecated/loss_scaler.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fp16_deprecated/loss_scaler.py
rename to nlp/llm/llama2-7b/pytorch/megatron/fp16_deprecated/loss_scaler.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/compat.h b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/compat.h
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/compat.h
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/compat.h
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_masked_softmax.cpp b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_masked_softmax.cpp
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_masked_softmax.cpp
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_masked_softmax.cpp
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_masked_softmax.h b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_masked_softmax.h
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_masked_softmax.h
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_masked_softmax.h
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_masked_softmax_cuda.cu
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_masked_softmax_cuda.cu
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_softmax.cpp b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_softmax.cpp
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_softmax.cpp
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_softmax.cpp
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_softmax_cuda.cu b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_softmax_cuda.cu
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_softmax_cuda.cu
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_softmax_cuda.cu
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/tests/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/tests/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/tests/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/tests/test_fused_kernels.py b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/tests/test_fused_kernels.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/tests/test_fused_kernels.py
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/tests/test_fused_kernels.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/type_shim.h b/nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/type_shim.h
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/fused_kernels/type_shim.h
rename to nlp/llm/llama2-7b/pytorch/megatron/fused_kernels/type_shim.h
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/global_vars.py b/nlp/llm/llama2-7b/pytorch/megatron/global_vars.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/global_vars.py
rename to nlp/llm/llama2-7b/pytorch/megatron/global_vars.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/indexer.py b/nlp/llm/llama2-7b/pytorch/megatron/indexer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/indexer.py
rename to nlp/llm/llama2-7b/pytorch/megatron/indexer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/initialize.py b/nlp/llm/llama2-7b/pytorch/megatron/initialize.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/initialize.py
rename to nlp/llm/llama2-7b/pytorch/megatron/initialize.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/memory.py b/nlp/llm/llama2-7b/pytorch/megatron/memory.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/memory.py
rename to nlp/llm/llama2-7b/pytorch/megatron/memory.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/microbatches.py b/nlp/llm/llama2-7b/pytorch/megatron/microbatches.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/microbatches.py
rename to nlp/llm/llama2-7b/pytorch/megatron/microbatches.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/model/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/bert_model.py b/nlp/llm/llama2-7b/pytorch/megatron/model/bert_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/bert_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/bert_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/biencoder_model.py b/nlp/llm/llama2-7b/pytorch/megatron/model/biencoder_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/biencoder_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/biencoder_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/classification.py b/nlp/llm/llama2-7b/pytorch/megatron/model/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/classification.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/classification.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/distributed.py b/nlp/llm/llama2-7b/pytorch/megatron/model/distributed.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/distributed.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/distributed.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/enums.py b/nlp/llm/llama2-7b/pytorch/megatron/model/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/enums.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/enums.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/fused_bias_gelu.py b/nlp/llm/llama2-7b/pytorch/megatron/model/fused_bias_gelu.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/fused_bias_gelu.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/fused_bias_gelu.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/fused_layer_norm.py b/nlp/llm/llama2-7b/pytorch/megatron/model/fused_layer_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/fused_layer_norm.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/fused_layer_norm.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/fused_softmax.py b/nlp/llm/llama2-7b/pytorch/megatron/model/fused_softmax.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/fused_softmax.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/fused_softmax.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/gpt_model.py b/nlp/llm/llama2-7b/pytorch/megatron/model/gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/gpt_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/gpt_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/language_model.py b/nlp/llm/llama2-7b/pytorch/megatron/model/language_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/language_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/language_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/module.py b/nlp/llm/llama2-7b/pytorch/megatron/model/module.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/module.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/module.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/multiple_choice.py b/nlp/llm/llama2-7b/pytorch/megatron/model/multiple_choice.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/multiple_choice.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/multiple_choice.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/realm_model.py b/nlp/llm/llama2-7b/pytorch/megatron/model/realm_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/realm_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/realm_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/rotary_pos_embedding.py b/nlp/llm/llama2-7b/pytorch/megatron/model/rotary_pos_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/rotary_pos_embedding.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/rotary_pos_embedding.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/t5_model.py b/nlp/llm/llama2-7b/pytorch/megatron/model/t5_model.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/t5_model.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/t5_model.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/transformer.py b/nlp/llm/llama2-7b/pytorch/megatron/model/transformer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/transformer.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/transformer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/utils.py b/nlp/llm/llama2-7b/pytorch/megatron/model/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/classification.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/classification.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/classification.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/dino.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/dino.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/dino.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/dino.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/esvit_swin_backbone.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/esvit_swin_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/esvit_swin_backbone.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/esvit_swin_backbone.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/inpainting.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/inpainting.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/inpainting.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/inpainting.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/knn_monitor.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/knn_monitor.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/knn_monitor.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/knn_monitor.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/mit_backbone.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/mit_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/mit_backbone.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/mit_backbone.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/swin_backbone.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/swin_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/swin_backbone.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/swin_backbone.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/utils.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/vit_backbone.py b/nlp/llm/llama2-7b/pytorch/megatron/model/vision/vit_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/model/vision/vit_backbone.py
rename to nlp/llm/llama2-7b/pytorch/megatron/model/vision/vit_backbone.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/commons.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/commons.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/commons.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/commons.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_cross_entropy.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_cross_entropy.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_cross_entropy.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_data.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_data.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_initialize.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_initialize.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_initialize.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_initialize.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_layers.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_layers.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_layers.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_layers.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_random.py b/nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_random.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/mpu/tests/test_random.py
rename to nlp/llm/llama2-7b/pytorch/megatron/mpu/tests/test_random.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/optimizer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/optimizer/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/clip_grads.py b/nlp/llm/llama2-7b/pytorch/megatron/optimizer/clip_grads.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/clip_grads.py
rename to nlp/llm/llama2-7b/pytorch/megatron/optimizer/clip_grads.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/distrib_optimizer.py b/nlp/llm/llama2-7b/pytorch/megatron/optimizer/distrib_optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/distrib_optimizer.py
rename to nlp/llm/llama2-7b/pytorch/megatron/optimizer/distrib_optimizer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/grad_scaler.py b/nlp/llm/llama2-7b/pytorch/megatron/optimizer/grad_scaler.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/grad_scaler.py
rename to nlp/llm/llama2-7b/pytorch/megatron/optimizer/grad_scaler.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/optimizer.py b/nlp/llm/llama2-7b/pytorch/megatron/optimizer/optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer/optimizer.py
rename to nlp/llm/llama2-7b/pytorch/megatron/optimizer/optimizer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer_param_scheduler.py b/nlp/llm/llama2-7b/pytorch/megatron/optimizer_param_scheduler.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/optimizer_param_scheduler.py
rename to nlp/llm/llama2-7b/pytorch/megatron/optimizer_param_scheduler.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/p2p_communication.py b/nlp/llm/llama2-7b/pytorch/megatron/p2p_communication.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/p2p_communication.py
rename to nlp/llm/llama2-7b/pytorch/megatron/p2p_communication.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/static/index.html b/nlp/llm/llama2-7b/pytorch/megatron/static/index.html
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/static/index.html
rename to nlp/llm/llama2-7b/pytorch/megatron/static/index.html
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/api.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/api.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/api.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/api.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/beam_utils.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/beam_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/beam_utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/beam_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/communication.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/communication.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/communication.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/communication.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/forward_step.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/forward_step.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/forward_step.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/forward_step.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/generation.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/generation.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/generation.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/generation.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/sampling.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/sampling.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/sampling.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/sampling.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/tokenization.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation/tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation/tokenization.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation/tokenization.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation_server.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation_server.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation_server.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation_server.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation_utils.py b/nlp/llm/llama2-7b/pytorch/megatron/text_generation_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/text_generation_utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/text_generation_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/timers.py b/nlp/llm/llama2-7b/pytorch/megatron/timers.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/timers.py
rename to nlp/llm/llama2-7b/pytorch/megatron/timers.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/__init__.py b/nlp/llm/llama2-7b/pytorch/megatron/tokenizer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/__init__.py
rename to nlp/llm/llama2-7b/pytorch/megatron/tokenizer/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/bert_tokenization.py b/nlp/llm/llama2-7b/pytorch/megatron/tokenizer/bert_tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/bert_tokenization.py
rename to nlp/llm/llama2-7b/pytorch/megatron/tokenizer/bert_tokenization.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/gpt2_tokenization.py b/nlp/llm/llama2-7b/pytorch/megatron/tokenizer/gpt2_tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/gpt2_tokenization.py
rename to nlp/llm/llama2-7b/pytorch/megatron/tokenizer/gpt2_tokenization.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/tokenizer.py b/nlp/llm/llama2-7b/pytorch/megatron/tokenizer/tokenizer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/tokenizer/tokenizer.py
rename to nlp/llm/llama2-7b/pytorch/megatron/tokenizer/tokenizer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/training.py b/nlp/llm/llama2-7b/pytorch/megatron/training.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/training.py
rename to nlp/llm/llama2-7b/pytorch/megatron/training.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/megatron/utils.py b/nlp/llm/llama2-7b/pytorch/megatron/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/megatron/utils.py
rename to nlp/llm/llama2-7b/pytorch/megatron/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_bert.py b/nlp/llm/llama2-7b/pytorch/pretrain_bert.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_bert.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_bert.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_gpt.py b/nlp/llm/llama2-7b/pytorch/pretrain_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_gpt.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_gpt.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_gpt_core.py b/nlp/llm/llama2-7b/pytorch/pretrain_gpt_core.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_gpt_core.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_gpt_core.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_ict.py b/nlp/llm/llama2-7b/pytorch/pretrain_ict.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_ict.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_ict.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_retro.py b/nlp/llm/llama2-7b/pytorch/pretrain_retro.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_retro.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_retro.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_t5.py b/nlp/llm/llama2-7b/pytorch/pretrain_t5.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_t5.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_t5.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_vision_classify.py b/nlp/llm/llama2-7b/pytorch/pretrain_vision_classify.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_vision_classify.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_vision_classify.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_vision_dino.py b/nlp/llm/llama2-7b/pytorch/pretrain_vision_dino.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_vision_dino.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_vision_dino.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/pretrain_vision_inpaint.py b/nlp/llm/llama2-7b/pytorch/pretrain_vision_inpaint.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/pretrain_vision_inpaint.py
rename to nlp/llm/llama2-7b/pytorch/pretrain_vision_inpaint.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/run_llama2_7b_1node.sh b/nlp/llm/llama2-7b/pytorch/run_llama2_7b_1node.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/run_llama2_7b_1node.sh
rename to nlp/llm/llama2-7b/pytorch/run_llama2_7b_1node.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/setup.py b/nlp/llm/llama2-7b/pytorch/setup.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/setup.py
rename to nlp/llm/llama2-7b/pytorch/setup.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/data_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/data_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/data_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/data_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/ensemble_classifier.py b/nlp/llm/llama2-7b/pytorch/tasks/ensemble_classifier.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/ensemble_classifier.py
rename to nlp/llm/llama2-7b/pytorch/tasks/ensemble_classifier.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_harness/download.py b/nlp/llm/llama2-7b/pytorch/tasks/eval_harness/download.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_harness/download.py
rename to nlp/llm/llama2-7b/pytorch/tasks/eval_harness/download.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_harness/evaluate.py b/nlp/llm/llama2-7b/pytorch/tasks/eval_harness/evaluate.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_harness/evaluate.py
rename to nlp/llm/llama2-7b/pytorch/tasks/eval_harness/evaluate.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_harness/report-to-csv.py b/nlp/llm/llama2-7b/pytorch/tasks/eval_harness/report-to-csv.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_harness/report-to-csv.py
rename to nlp/llm/llama2-7b/pytorch/tasks/eval_harness/report-to-csv.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/eval_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/eval_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/eval_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/finetune_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/finetune_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/finetune_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/finetune_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/cola.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/cola.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/cola.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/cola.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/data.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/data.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/finetune.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/finetune.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/finetune.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/finetune.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/mnli.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/mnli.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/mnli.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/mnli.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/mrpc.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/mrpc.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/mrpc.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/mrpc.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/qnli.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/qnli.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/qnli.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/qnli.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/qqp.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/qqp.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/qqp.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/qqp.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/rte.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/rte.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/rte.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/rte.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/sst2.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/sst2.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/sst2.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/sst2.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/stsb.py b/nlp/llm/llama2-7b/pytorch/tasks/glue/stsb.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/glue/stsb.py
rename to nlp/llm/llama2-7b/pytorch/tasks/glue/stsb.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/main.py b/nlp/llm/llama2-7b/pytorch/tasks/main.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/main.py
rename to nlp/llm/llama2-7b/pytorch/tasks/main.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/README.md b/nlp/llm/llama2-7b/pytorch/tasks/msdp/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/README.md
rename to nlp/llm/llama2-7b/pytorch/tasks/msdp/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/evaluate.py b/nlp/llm/llama2-7b/pytorch/tasks/msdp/evaluate.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/evaluate.py
rename to nlp/llm/llama2-7b/pytorch/tasks/msdp/evaluate.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/main.py b/nlp/llm/llama2-7b/pytorch/tasks/msdp/main.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/main.py
rename to nlp/llm/llama2-7b/pytorch/tasks/msdp/main.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/metrics.py b/nlp/llm/llama2-7b/pytorch/tasks/msdp/metrics.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/metrics.py
rename to nlp/llm/llama2-7b/pytorch/tasks/msdp/metrics.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/preprocessing.py b/nlp/llm/llama2-7b/pytorch/tasks/msdp/preprocessing.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/preprocessing.py
rename to nlp/llm/llama2-7b/pytorch/tasks/msdp/preprocessing.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/prompt.py b/nlp/llm/llama2-7b/pytorch/tasks/msdp/prompt.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/msdp/prompt.py
rename to nlp/llm/llama2-7b/pytorch/tasks/msdp/prompt.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/README.md b/nlp/llm/llama2-7b/pytorch/tasks/orqa/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/README.md
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/evaluate_orqa.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/evaluate_orqa.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/evaluate_orqa.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/evaluate_orqa.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/evaluate_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/evaluate_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/evaluate_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/evaluate_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/supervised/data.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/supervised/data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/supervised/data.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/supervised/data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/supervised/eval_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/supervised/eval_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/supervised/eval_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/supervised/eval_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/supervised/finetune.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/supervised/finetune.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/supervised/finetune.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/supervised/finetune.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/unsupervised/nq.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/unsupervised/nq.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/unsupervised/nq.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/unsupervised/nq.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/unsupervised/qa_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/unsupervised/qa_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/unsupervised/qa_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/unsupervised/qa_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/unsupervised/tokenizers.py b/nlp/llm/llama2-7b/pytorch/tasks/orqa/unsupervised/tokenizers.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/orqa/unsupervised/tokenizers.py
rename to nlp/llm/llama2-7b/pytorch/tasks/orqa/unsupervised/tokenizers.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/race/data.py b/nlp/llm/llama2-7b/pytorch/tasks/race/data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/race/data.py
rename to nlp/llm/llama2-7b/pytorch/tasks/race/data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/race/finetune.py b/nlp/llm/llama2-7b/pytorch/tasks/race/finetune.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/race/finetune.py
rename to nlp/llm/llama2-7b/pytorch/tasks/race/finetune.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/classification/classification.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/classification/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/classification/classification.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/classification/classification.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/classification/eval_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/classification/eval_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/classification/eval_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/classification/eval_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/finetune_utils.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/finetune_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/finetune_utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/finetune_utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/main.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/main.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/main.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/main.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/cityscapes.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/cityscapes.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/cityscapes.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/cityscapes.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/data.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/data.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/finetune_segformer.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/finetune_segformer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/finetune_segformer.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/finetune_segformer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/finetune_setr.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/finetune_setr.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/finetune_setr.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/finetune_setr.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/metrics.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/metrics.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/metrics.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/metrics.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/seg_heads.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/seg_heads.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/seg_heads.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/seg_heads.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/seg_models.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/seg_models.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/seg_models.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/seg_models.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/transforms.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/transforms.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/transforms.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/transforms.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/utils.py b/nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/vision/segmentation/utils.py
rename to nlp/llm/llama2-7b/pytorch/tasks/vision/segmentation/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/zeroshot_gpt/datasets.py b/nlp/llm/llama2-7b/pytorch/tasks/zeroshot_gpt/datasets.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/zeroshot_gpt/datasets.py
rename to nlp/llm/llama2-7b/pytorch/tasks/zeroshot_gpt/datasets.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/zeroshot_gpt/detokenizer.py b/nlp/llm/llama2-7b/pytorch/tasks/zeroshot_gpt/detokenizer.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/zeroshot_gpt/detokenizer.py
rename to nlp/llm/llama2-7b/pytorch/tasks/zeroshot_gpt/detokenizer.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tasks/zeroshot_gpt/evaluate.py b/nlp/llm/llama2-7b/pytorch/tasks/zeroshot_gpt/evaluate.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tasks/zeroshot_gpt/evaluate.py
rename to nlp/llm/llama2-7b/pytorch/tasks/zeroshot_gpt/evaluate.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tokenizer/tokenizer.model b/nlp/llm/llama2-7b/pytorch/tokenizer/tokenizer.model
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tokenizer/tokenizer.model
rename to nlp/llm/llama2-7b/pytorch/tokenizer/tokenizer.model
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/bert_embedding/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/bert_embedding/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/dataset.py b/nlp/llm/llama2-7b/pytorch/tools/bert_embedding/dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/dataset.py
rename to nlp/llm/llama2-7b/pytorch/tools/bert_embedding/dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/embed.py b/nlp/llm/llama2-7b/pytorch/tools/bert_embedding/embed.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/embed.py
rename to nlp/llm/llama2-7b/pytorch/tools/bert_embedding/embed.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/external_libs.py b/nlp/llm/llama2-7b/pytorch/tools/bert_embedding/external_libs.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/external_libs.py
rename to nlp/llm/llama2-7b/pytorch/tools/bert_embedding/external_libs.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/huggingface.py b/nlp/llm/llama2-7b/pytorch/tools/bert_embedding/huggingface.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/huggingface.py
rename to nlp/llm/llama2-7b/pytorch/tools/bert_embedding/huggingface.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/utils.py b/nlp/llm/llama2-7b/pytorch/tools/bert_embedding/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/bert_embedding/utils.py
rename to nlp/llm/llama2-7b/pytorch/tools/bert_embedding/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/checkpoint_loader_megatron.py b/nlp/llm/llama2-7b/pytorch/tools/checkpoint_loader_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/checkpoint_loader_megatron.py
rename to nlp/llm/llama2-7b/pytorch/tools/checkpoint_loader_megatron.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/checkpoint_saver_megatron.py b/nlp/llm/llama2-7b/pytorch/tools/checkpoint_saver_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/checkpoint_saver_megatron.py
rename to nlp/llm/llama2-7b/pytorch/tools/checkpoint_saver_megatron.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/checkpoint_util.py b/nlp/llm/llama2-7b/pytorch/tools/checkpoint_util.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/checkpoint_util.py
rename to nlp/llm/llama2-7b/pytorch/tools/checkpoint_util.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/README.md b/nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/README.md
rename to nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/deepspeed_checkpoint.py b/nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/deepspeed_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/deepspeed_checkpoint.py
rename to nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/deepspeed_checkpoint.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_megatron.py b/nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/deepspeed_to_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_megatron.py
rename to nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/deepspeed_to_megatron.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_transformers.py b/nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/deepspeed_to_transformers.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_transformers.py
rename to nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/deepspeed_to_transformers.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/inspect_checkpoint.py b/nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/inspect_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/inspect_checkpoint.py
rename to nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/inspect_checkpoint.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py b/nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
rename to nlp/llm/llama2-7b/pytorch/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/generate_samples_gpt.py b/nlp/llm/llama2-7b/pytorch/tools/generate_samples_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/generate_samples_gpt.py
rename to nlp/llm/llama2-7b/pytorch/tools/generate_samples_gpt.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/linter.py b/nlp/llm/llama2-7b/pytorch/tools/linter.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/linter.py
rename to nlp/llm/llama2-7b/pytorch/tools/linter.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/merge_datasets.py b/nlp/llm/llama2-7b/pytorch/tools/merge_datasets.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/merge_datasets.py
rename to nlp/llm/llama2-7b/pytorch/tools/merge_datasets.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/README.md b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/README.md
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/add_id.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/add_id.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/add_id.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/add_id.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/blacklist_urls.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/blacklist_urls.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/blacklist_urls.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/blacklist_urls.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/cleanup_dataset.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/cleanup_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/cleanup_dataset.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/cleanup_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/cleanup_fix_dataset.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/cleanup_fix_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/cleanup_fix_dataset.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/cleanup_fix_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/filter_ngrams.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/filter_ngrams.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/filter_ngrams.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/filter_ngrams.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/find_duplicates.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/find_duplicates.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/find_duplicates.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/find_duplicates.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/group_duplicate_url.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/group_duplicate_url.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/group_duplicate_url.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/group_duplicate_url.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/merge_jsons.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/merge_jsons.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/merge_jsons.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/merge_jsons.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/remove_group_duplicates.py b/nlp/llm/llama2-7b/pytorch/tools/openwebtext/remove_group_duplicates.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/openwebtext/remove_group_duplicates.py
rename to nlp/llm/llama2-7b/pytorch/tools/openwebtext/remove_group_duplicates.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/preprocess_data.py b/nlp/llm/llama2-7b/pytorch/tools/preprocess_data.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/preprocess_data.py
rename to nlp/llm/llama2-7b/pytorch/tools/preprocess_data.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/preprocess_data_nmt.py b/nlp/llm/llama2-7b/pytorch/tools/preprocess_data_nmt.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/preprocess_data_nmt.py
rename to nlp/llm/llama2-7b/pytorch/tools/preprocess_data_nmt.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/README.md b/nlp/llm/llama2-7b/pytorch/tools/retro/README.md
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/README.md
rename to nlp/llm/llama2-7b/pytorch/tools/retro/README.md
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/cli/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/cli/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/cli/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/cli/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/cli/__main__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/cli/__main__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/cli/__main__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/cli/__main__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/cli/cli.py b/nlp/llm/llama2-7b/pytorch/tools/retro/cli/cli.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/cli/cli.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/cli/cli.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/db/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/db/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/build.py b/nlp/llm/llama2-7b/pytorch/tools/retro/db/build.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/build.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/db/build.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/dataset.py b/nlp/llm/llama2-7b/pytorch/tools/retro/db/dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/dataset.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/db/dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/utils.py b/nlp/llm/llama2-7b/pytorch/tools/retro/db/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/db/utils.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/db/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/get_dataset_configs.sh b/nlp/llm/llama2-7b/pytorch/tools/retro/examples/get_dataset_configs.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/get_dataset_configs.sh
rename to nlp/llm/llama2-7b/pytorch/tools/retro/examples/get_dataset_configs.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/get_preprocess_cmd.sh b/nlp/llm/llama2-7b/pytorch/tools/retro/examples/get_preprocess_cmd.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/get_preprocess_cmd.sh
rename to nlp/llm/llama2-7b/pytorch/tools/retro/examples/get_preprocess_cmd.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/preprocess_data.sh b/nlp/llm/llama2-7b/pytorch/tools/retro/examples/preprocess_data.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/preprocess_data.sh
rename to nlp/llm/llama2-7b/pytorch/tools/retro/examples/preprocess_data.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/pretrain_model.sh b/nlp/llm/llama2-7b/pytorch/tools/retro/examples/pretrain_model.sh
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/examples/pretrain_model.sh
rename to nlp/llm/llama2-7b/pytorch/tools/retro/examples/pretrain_model.sh
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/external_libs.py b/nlp/llm/llama2-7b/pytorch/tools/retro/external_libs.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/external_libs.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/external_libs.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/build.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/build.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/build.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/build.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/factory.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/factory.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/factory.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/factory.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/index.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/index.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/index.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/index.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/indexes/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/indexes/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/indexes/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/indexes/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/indexes/faiss_base.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/indexes/faiss_base.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/indexes/faiss_base.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/indexes/faiss_base.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/indexes/faiss_par_add.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/indexes/faiss_par_add.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/indexes/faiss_par_add.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/indexes/faiss_par_add.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/utils.py b/nlp/llm/llama2-7b/pytorch/tools/retro/index/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/index/utils.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/index/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/main.py b/nlp/llm/llama2-7b/pytorch/tools/retro/main.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/main.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/main.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/__init__.py b/nlp/llm/llama2-7b/pytorch/tools/retro/query/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/__init__.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/query/__init__.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/chunk_dataset.py b/nlp/llm/llama2-7b/pytorch/tools/retro/query/chunk_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/chunk_dataset.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/query/chunk_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/query.py b/nlp/llm/llama2-7b/pytorch/tools/retro/query/query.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/query.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/query/query.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/retro_dataset.py b/nlp/llm/llama2-7b/pytorch/tools/retro/query/retro_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/retro_dataset.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/query/retro_dataset.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/utils.py b/nlp/llm/llama2-7b/pytorch/tools/retro/query/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/query/utils.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/query/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/utils.py b/nlp/llm/llama2-7b/pytorch/tools/retro/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/retro/utils.py
rename to nlp/llm/llama2-7b/pytorch/tools/retro/utils.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/run_text_generation_server.py b/nlp/llm/llama2-7b/pytorch/tools/run_text_generation_server.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/run_text_generation_server.py
rename to nlp/llm/llama2-7b/pytorch/tools/run_text_generation_server.py
diff --git a/nlp/llm/llama2-7b/megatron-deepspeed/tools/text_generation_cli.py b/nlp/llm/llama2-7b/pytorch/tools/text_generation_cli.py
similarity index 100%
rename from nlp/llm/llama2-7b/megatron-deepspeed/tools/text_generation_cli.py
rename to nlp/llm/llama2-7b/pytorch/tools/text_generation_cli.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/.gitignore b/nlp/llm/llama2-7b_reward_sft/pytorch/.gitignore
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/.gitignore
rename to nlp/llm/llama2-7b_reward_sft/pytorch/.gitignore
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/README.md b/nlp/llm/llama2-7b_reward_sft/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/README.md
rename to nlp/llm/llama2-7b_reward_sft/pytorch/README.md
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/chat.py b/nlp/llm/llama2-7b_reward_sft/pytorch/chat.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/chat.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/chat.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/rlhf/ppo_trainer.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/rlhf/ppo_trainer.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/rlhf/ppo_trainer.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/rlhf/ppo_trainer.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/rlhf/rlhf_engine.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/rlhf/rlhf_engine.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/rlhf/rlhf_engine.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/rlhf/rlhf_engine.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/data/data_utils.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/data/data_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/data/data_utils.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/data/data_utils.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/data/raw_datasets.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/data/raw_datasets.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/data/raw_datasets.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/data/raw_datasets.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/ds_utils.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/ds_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/ds_utils.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/ds_utils.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/model/model_utils.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/model/model_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/model/model_utils.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/model/model_utils.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/model/reward_model.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/model/reward_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/model/reward_model.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/model/reward_model.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/module/lora.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/module/lora.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/module/lora.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/module/lora.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/perf.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/perf.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/perf.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/perf.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/utils.py b/nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/dschat/utils/utils.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/dschat/utils/utils.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/e2e_rlhf.py b/nlp/llm/llama2-7b_reward_sft/pytorch/e2e_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/e2e_rlhf.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/e2e_rlhf.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/inference/chatbot.py b/nlp/llm/llama2-7b_reward_sft/pytorch/inference/chatbot.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/inference/chatbot.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/inference/chatbot.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/requirements.txt b/nlp/llm/llama2-7b_reward_sft/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/requirements.txt
rename to nlp/llm/llama2-7b_reward_sft/pytorch/requirements.txt
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/setup.py b/nlp/llm/llama2-7b_reward_sft/pytorch/setup.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/setup.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/setup.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/tests/test_training.py b/nlp/llm/llama2-7b_reward_sft/pytorch/tests/test_training.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/tests/test_training.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/tests/test_training.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/README_origin.md b/nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/README_origin.md
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/README_origin.md
rename to nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/README_origin.md
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/evaluation_scripts/run_eval.sh b/nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/evaluation_scripts/run_eval.sh
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/evaluation_scripts/run_eval.sh
rename to nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/evaluation_scripts/run_eval.sh
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/main.py b/nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/main.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/main.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/main.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/rw_eval.py b/nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/rw_eval.py
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/rw_eval.py
rename to nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/rw_eval.py
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/training_scripts/README.md b/nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/training_scripts/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/training_scripts/README.md
rename to nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/training_scripts/README.md
diff --git a/nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/training_scripts/llama2/run_llama2_7b.sh b/nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/training_scripts/llama2/run_llama2_7b.sh
similarity index 100%
rename from nlp/llm/llama2-7b_reward_sft/deepspeed/training/step2_reward_model_finetuning/training_scripts/llama2/run_llama2_7b.sh
rename to nlp/llm/llama2-7b_reward_sft/pytorch/training/step2_reward_model_finetuning/training_scripts/llama2/run_llama2_7b.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/.coveragerc b/nlp/llm/llama2-7b_rlhf/pytorch/.coveragerc
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/.coveragerc
rename to nlp/llm/llama2-7b_rlhf/pytorch/.coveragerc
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/.gitignore b/nlp/llm/llama2-7b_rlhf/pytorch/.gitignore
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/.gitignore
rename to nlp/llm/llama2-7b_rlhf/pytorch/.gitignore
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/.gitlab-ci.yml b/nlp/llm/llama2-7b_rlhf/pytorch/.gitlab-ci.yml
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/.gitlab-ci.yml
rename to nlp/llm/llama2-7b_rlhf/pytorch/.gitlab-ci.yml
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/CODEOWNERS b/nlp/llm/llama2-7b_rlhf/pytorch/CODEOWNERS
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/CODEOWNERS
rename to nlp/llm/llama2-7b_rlhf/pytorch/CODEOWNERS
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/ILUVATAR.md b/nlp/llm/llama2-7b_rlhf/pytorch/ILUVATAR.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/ILUVATAR.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/ILUVATAR.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/LICENSE b/nlp/llm/llama2-7b_rlhf/pytorch/LICENSE
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/LICENSE
rename to nlp/llm/llama2-7b_rlhf/pytorch/LICENSE
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/MANIFEST.in b/nlp/llm/llama2-7b_rlhf/pytorch/MANIFEST.in
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/MANIFEST.in
rename to nlp/llm/llama2-7b_rlhf/pytorch/MANIFEST.in
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/README.md b/nlp/llm/llama2-7b_rlhf/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/README.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/README.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/README_origin.md b/nlp/llm/llama2-7b_rlhf/pytorch/README_origin.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/README_origin.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/README_origin.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/SECURITY.md b/nlp/llm/llama2-7b_rlhf/pytorch/SECURITY.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/SECURITY.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/SECURITY.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/build_megatron-deepspeed.sh b/nlp/llm/llama2-7b_rlhf/pytorch/build_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/build_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/build_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/checkpoints/convert_hf_2_meg.sh b/nlp/llm/llama2-7b_rlhf/pytorch/checkpoints/convert_hf_2_meg.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/checkpoints/convert_hf_2_meg.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/checkpoints/convert_hf_2_meg.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/checkpoints/download_rlhf_checkpoints.sh b/nlp/llm/llama2-7b_rlhf/pytorch/checkpoints/download_rlhf_checkpoints.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/checkpoints/download_rlhf_checkpoints.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/checkpoints/download_rlhf_checkpoints.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/clean_megatron-deepspeed.sh b/nlp/llm/llama2-7b_rlhf/pytorch/clean_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/clean_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/clean_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/dataset/download_and_convert_dataset.sh b/nlp/llm/llama2-7b_rlhf/pytorch/dataset/download_and_convert_dataset.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/dataset/download_and_convert_dataset.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/dataset/download_and_convert_dataset.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/examples/llama2/run_llama2_7b_rlhf_node1.sh b/nlp/llm/llama2-7b_rlhf/pytorch/examples/llama2/run_llama2_7b_rlhf_node1.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/examples/llama2/run_llama2_7b_rlhf_node1.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/examples/llama2/run_llama2_7b_rlhf_node1.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/examples/llama2/tokenizer/tokenizer.model b/nlp/llm/llama2-7b_rlhf/pytorch/examples/llama2/tokenizer/tokenizer.model
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/examples/llama2/tokenizer/tokenizer.model
rename to nlp/llm/llama2-7b_rlhf/pytorch/examples/llama2/tokenizer/tokenizer.model
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/install_megatron-deepspeed.sh b/nlp/llm/llama2-7b_rlhf/pytorch/install_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/install_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/install_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/arguments.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/arguments.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/arguments.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/arguments.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/checkpointing.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/checkpointing.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/checkpointing.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/checkpointing.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/README.md b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/README.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/README.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/Makefile b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/Makefile
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/Makefile
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/Makefile
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/blended_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/blended_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/blended_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/blended_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/blended_megatron_dataset_builder.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/blended_megatron_dataset_builder.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/blended_megatron_dataset_builder.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/blended_megatron_dataset_builder.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/blended_megatron_dataset_config.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/blended_megatron_dataset_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/blended_megatron_dataset_config.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/blended_megatron_dataset_config.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/gpt_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/gpt_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/gpt_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/gpt_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/helpers.cpp b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/helpers.cpp
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/helpers.cpp
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/helpers.cpp
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/indexed_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/indexed_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/indexed_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/megatron_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/megatron_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/megatron_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/megatron_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/readme.md b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/readme.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/readme.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/readme.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/datasets/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/datasets/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/core.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/core.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/core.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/core.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/dict_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/dict_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/dict_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/dict_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/mapping.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/mapping.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/mapping.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/mapping.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/optimizer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/optimizer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/optimizer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/serialization.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/serialization.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/serialization.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/serialization.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/base.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/base.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/base.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/base.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/tensorstore.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/tensorstore.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/tensorstore.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/tensorstore.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/two_stage.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/two_stage.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/two_stage.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/two_stage.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/zarr.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/zarr.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/strategies/zarr.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/strategies/zarr.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/dist_checkpointing/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/dist_checkpointing/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/distributed_data_parallel.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/distributed_data_parallel.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/distributed_data_parallel.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/distributed_data_parallel.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/finalize_model_grads.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/finalize_model_grads.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/finalize_model_grads.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/finalize_model_grads.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/grad_buffer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/grad_buffer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/distributed/grad_buffer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/distributed/grad_buffer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/enums.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/enums.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/enums.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_bias_dropout.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_bias_dropout.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_bias_dropout.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_bias_dropout.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_bias_gelu.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_bias_gelu.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_bias_gelu.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_bias_gelu.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_layer_norm.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_layer_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_layer_norm.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_layer_norm.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_softmax.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_softmax.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/fusions/fused_softmax.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/fusions/fused_softmax.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/inference_params.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/inference_params.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/inference_params.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/inference_params.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/model_parallel_config.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/model_parallel_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/model_parallel_config.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/model_parallel_config.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/T5/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/T5/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/T5/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/T5/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/T5/t5_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/T5/t5_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/T5/t5_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/T5/t5_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/T5/t5_spec.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/T5/t5_spec.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/T5/t5_spec.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/T5/t5_spec.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/bert_layer_specs.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/bert_layer_specs.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/bert_layer_specs.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/bert_layer_specs.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/bert_lm_head.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/bert_lm_head.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/bert_lm_head.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/bert_lm_head.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/bert_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/bert_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/bert_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/bert_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/pooler.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/pooler.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/bert/pooler.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/bert/pooler.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/embeddings/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/embeddings/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/embeddings/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/embeddings/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/embeddings/language_model_embedding.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/embeddings/language_model_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/embeddings/language_model_embedding.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/embeddings/language_model_embedding.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/embeddings/rotary_pos_embedding.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/embeddings/rotary_pos_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/embeddings/rotary_pos_embedding.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/embeddings/rotary_pos_embedding.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/language_module/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/language_module/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/language_module/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/language_module/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/language_module/language_module.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/language_module/language_module.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/common/language_module/language_module.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/common/language_module/language_module.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/gpt_embedding.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/gpt_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/gpt_embedding.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/gpt_embedding.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/gpt_layer_specs.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/gpt_layer_specs.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/gpt_layer_specs.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/gpt_layer_specs.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/gpt_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/gpt/gpt_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/gpt/gpt_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/base_attention.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/base_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/base_attention.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/base_attention.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/config.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/config.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/config.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/config.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/decoder_attention.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/decoder_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/decoder_attention.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/decoder_attention.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/decoder_spec.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/decoder_spec.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/decoder_spec.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/decoder_spec.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/encoder_attention.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/encoder_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/encoder_attention.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/encoder_attention.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/encoder_spec.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/encoder_spec.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/encoder_spec.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/encoder_spec.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/models/retro/model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/models/retro/model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/package_info.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/package_info.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/package_info.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/package_info.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/parallel_state.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/parallel_state.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/parallel_state.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/parallel_state.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/pipeline_parallel/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/pipeline_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/pipeline_parallel/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/pipeline_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/pipeline_parallel/p2p_communication.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/pipeline_parallel/p2p_communication.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/pipeline_parallel/p2p_communication.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/pipeline_parallel/p2p_communication.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/pipeline_parallel/schedules.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/pipeline_parallel/schedules.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/pipeline_parallel/schedules.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/pipeline_parallel/schedules.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/requirements.txt b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/requirements.txt
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/requirements.txt
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/requirements.txt
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/sequence_parallel/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/sequence_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/sequence_parallel/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/sequence_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/sequence_parallel/cross_entropy.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/sequence_parallel/cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/sequence_parallel/cross_entropy.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/sequence_parallel/cross_entropy.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/cross_entropy.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/cross_entropy.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/cross_entropy.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/data.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/data.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/data.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/data.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/layers.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/layers.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/layers.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/layers.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/mappings.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/mappings.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/mappings.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/mappings.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/random.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/random.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/random.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/random.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/tensor_parallel/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/tensor_parallel/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/attention.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/attention.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/attention.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/custom_layers/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/custom_layers/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/custom_layers/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/custom_layers/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/custom_layers/transformer_engine.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/custom_layers/transformer_engine.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/custom_layers/transformer_engine.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/custom_layers/transformer_engine.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/dot_product_attention.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/dot_product_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/dot_product_attention.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/dot_product_attention.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/enums.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/enums.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/enums.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/identity_op.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/identity_op.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/identity_op.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/identity_op.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/mlp.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/mlp.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/mlp.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/mlp.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/module.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/module.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/module.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/module.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/spec_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/spec_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/spec_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/spec_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/switch_mlp.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/switch_mlp.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/switch_mlp.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/switch_mlp.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/transformer_block.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/transformer_block.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/transformer_block.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/transformer_block.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/transformer_config.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/transformer_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/transformer_config.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/transformer_config.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/transformer_layer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/transformer_layer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/transformer_layer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/transformer_layer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/transformer/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/transformer/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/core/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/core/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/Makefile b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/Makefile
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/Makefile
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/Makefile
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/autoaugment.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/autoaugment.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/autoaugment.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/autoaugment.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/bert_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/bert_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/bert_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/bert_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/biencoder_dataset_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/biencoder_dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/biencoder_dataset_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/biencoder_dataset_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/blendable_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/blendable_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/blendable_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/blendable_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/data_samplers.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/data_samplers.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/data_samplers.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/data_samplers.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/dataset_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/dataset_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/dataset_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/gpt_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/gpt_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/gpt_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/gpt_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/helpers.cpp b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/helpers.cpp
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/helpers.cpp
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/helpers.cpp
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/ict_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/ict_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/ict_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/ict_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/image_folder.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/image_folder.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/image_folder.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/image_folder.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/indexed_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/indexed_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/indexed_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/multimodal_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/multimodal_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/multimodal_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/multimodal_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/orqa_wiki_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/orqa_wiki_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/orqa_wiki_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/orqa_wiki_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/realm_dataset_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/realm_dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/realm_dataset_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/realm_dataset_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/realm_index.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/realm_index.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/realm_index.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/realm_index.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/t5_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/t5_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/t5_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/t5_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/test/test_indexed_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/test/test_indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/test/test_indexed_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/test/test_indexed_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/test/test_preprocess_data.sh b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/test/test_preprocess_data.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/test/test_preprocess_data.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/test/test_preprocess_data.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/vit_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/vit_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/data/vit_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/data/vit_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/dist_signal_handler.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/dist_signal_handler.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/dist_signal_handler.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/dist_signal_handler.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/enums.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/enums.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/enums.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fp16_deprecated/loss_scaler.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fp16_deprecated/loss_scaler.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fp16_deprecated/loss_scaler.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fp16_deprecated/loss_scaler.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/compat.h b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/compat.h
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/compat.h
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/compat.h
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/tests/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/tests/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/tests/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/tests/test_fused_kernels.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/tests/test_fused_kernels.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/tests/test_fused_kernels.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/tests/test_fused_kernels.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/type_shim.h b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/type_shim.h
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/fused_kernels/type_shim.h
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/fused_kernels/type_shim.h
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/global_vars.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/global_vars.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/global_vars.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/global_vars.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/indexer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/indexer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/indexer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/indexer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/initialize.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/initialize.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/initialize.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/initialize.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/log_handler.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/log_handler.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/log_handler.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/log_handler.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/memory.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/memory.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/memory.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/memory.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/microbatches.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/microbatches.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/microbatches.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/microbatches.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/bert_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/bert_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/bert_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/bert_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/biencoder_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/biencoder_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/biencoder_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/biencoder_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/classification.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/classification.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/classification.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/distributed.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/distributed.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/distributed.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/distributed.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/enums.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/enums.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/enums.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/fused_bias_gelu.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/fused_bias_gelu.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/fused_bias_gelu.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/fused_bias_gelu.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/fused_layer_norm.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/fused_layer_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/fused_layer_norm.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/fused_layer_norm.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/fused_softmax.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/fused_softmax.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/fused_softmax.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/fused_softmax.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/gpt_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/gpt_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/gpt_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/language_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/language_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/language_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/language_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/module.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/module.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/module.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/module.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/multiple_choice.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/multiple_choice.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/multiple_choice.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/multiple_choice.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/realm_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/realm_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/realm_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/realm_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/rms_norm.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/rms_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/rms_norm.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/rms_norm.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/rotary_pos_embedding.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/rotary_pos_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/rotary_pos_embedding.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/rotary_pos_embedding.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/t5_model.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/t5_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/t5_model.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/t5_model.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/transformer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/transformer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/transformer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/transformer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/classification.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/classification.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/classification.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/dino.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/dino.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/dino.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/dino.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/esvit_swin_backbone.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/esvit_swin_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/esvit_swin_backbone.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/esvit_swin_backbone.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/inpainting.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/inpainting.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/inpainting.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/inpainting.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/knn_monitor.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/knn_monitor.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/knn_monitor.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/knn_monitor.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/mit_backbone.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/mit_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/mit_backbone.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/mit_backbone.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/swin_backbone.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/swin_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/swin_backbone.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/swin_backbone.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/vit_backbone.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/vit_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/model/vision/vit_backbone.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/model/vision/vit_backbone.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/commons.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/commons.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/commons.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/commons.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_cross_entropy.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_cross_entropy.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_cross_entropy.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_data.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_data.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_data.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_initialize.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_initialize.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_initialize.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_initialize.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_layers.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_layers.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_layers.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_layers.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_random.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_random.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/mpu/tests/test_random.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/mpu/tests/test_random.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/clip_grads.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/clip_grads.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/clip_grads.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/clip_grads.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/distrib_optimizer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/distrib_optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/distrib_optimizer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/distrib_optimizer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/grad_scaler.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/grad_scaler.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/grad_scaler.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/grad_scaler.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/optimizer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/optimizer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/optimizer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer_param_scheduler.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer_param_scheduler.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/optimizer_param_scheduler.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/optimizer_param_scheduler.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/p2p_communication.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/p2p_communication.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/p2p_communication.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/p2p_communication.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/checkpointing_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/checkpointing_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/checkpointing_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/checkpointing_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/communication_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/communication_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/communication_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/communication_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/forward_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/forward_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/forward_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/forward_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/generation_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/generation_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/generation/generation_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/generation/generation_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/initialize_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/initialize_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/initialize_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/initialize_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/schedules_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/schedules_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/schedules_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/schedules_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/training_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/training_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/rlhf/training_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/rlhf/training_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/static/index.html b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/static/index.html
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/static/index.html
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/static/index.html
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/api.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/api.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/api.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/api.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/beam_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/beam_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/beam_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/beam_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/communication.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/communication.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/communication.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/communication.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/forward_step.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/forward_step.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/forward_step.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/forward_step.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/generation.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/generation.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/generation.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/generation.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/sampling.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/sampling.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/sampling.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/sampling.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/tokenization.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation/tokenization.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation/tokenization.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation_server.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation_server.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation_server.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation_server.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/text_generation_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/text_generation_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/theoretical_memory_usage.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/theoretical_memory_usage.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/theoretical_memory_usage.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/theoretical_memory_usage.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/timers.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/timers.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/timers.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/timers.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/bert_tokenization.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/bert_tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/bert_tokenization.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/bert_tokenization.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/gpt2_tokenization.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/gpt2_tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/gpt2_tokenization.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/gpt2_tokenization.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/tokenization_utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/tokenization_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/tokenization_utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/tokenization_utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/tokenizer.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/tokenizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/tokenizer/tokenizer.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/tokenizer/tokenizer.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/training.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/training.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/training.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/training.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/megatron_ds/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/megatron_ds/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/pretrain_gpt.py b/nlp/llm/llama2-7b_rlhf/pytorch/pretrain_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/pretrain_gpt.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/pretrain_gpt.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/pretrain_gpt_megatron.py b/nlp/llm/llama2-7b_rlhf/pytorch/pretrain_gpt_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/pretrain_gpt_megatron.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/pretrain_gpt_megatron.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/report_theoretical_memory.py b/nlp/llm/llama2-7b_rlhf/pytorch/report_theoretical_memory.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/report_theoretical_memory.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/report_theoretical_memory.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/requirments_rlhf.txt b/nlp/llm/llama2-7b_rlhf/pytorch/requirments_rlhf.txt
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/requirments_rlhf.txt
rename to nlp/llm/llama2-7b_rlhf/pytorch/requirments_rlhf.txt
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/setup.py b/nlp/llm/llama2-7b_rlhf/pytorch/setup.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/setup.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/setup.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/embed.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/embed.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/embed.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/embed.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/external_libs.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/external_libs.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/external_libs.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/external_libs.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/huggingface.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/huggingface.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/huggingface.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/huggingface.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/bert_embedding/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/bert_embedding/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/checkpoint_loader_megatron.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/checkpoint_loader_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/checkpoint_loader_megatron.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/checkpoint_loader_megatron.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/checkpoint_saver_megatron.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/checkpoint_saver_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/checkpoint_saver_megatron.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/checkpoint_saver_megatron.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/checkpoint_util.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/checkpoint_util.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/checkpoint_util.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/checkpoint_util.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/README.md b/nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/README.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/README.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/deepspeed_checkpoint.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/deepspeed_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/deepspeed_checkpoint.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/deepspeed_checkpoint.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_megatron.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/deepspeed_to_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_megatron.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/deepspeed_to_megatron.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_transformers.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/deepspeed_to_transformers.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_transformers.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/deepspeed_to_transformers.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/inspect_checkpoint.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/inspect_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/inspect_checkpoint.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/inspect_checkpoint.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/generate_samples_gpt.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/generate_samples_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/generate_samples_gpt.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/generate_samples_gpt.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/hf2megads_weight_converter.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/hf2megads_weight_converter.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/hf2megads_weight_converter.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/hf2megads_weight_converter.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/linter.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/linter.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/linter.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/linter.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/loader_llama2_hf.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/loader_llama2_hf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/loader_llama2_hf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/loader_llama2_hf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/loader_tinyllama_rlhf.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/loader_tinyllama_rlhf.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/loader_tinyllama_rlhf.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/loader_tinyllama_rlhf.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/merge_datasets.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/merge_datasets.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/merge_datasets.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/merge_datasets.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/README.md b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/README.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/README.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/add_id.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/add_id.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/add_id.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/add_id.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/blacklist_urls.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/blacklist_urls.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/blacklist_urls.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/blacklist_urls.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/cleanup_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/cleanup_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/cleanup_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/cleanup_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/cleanup_fix_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/cleanup_fix_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/cleanup_fix_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/cleanup_fix_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/filter_ngrams.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/filter_ngrams.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/filter_ngrams.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/filter_ngrams.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/find_duplicates.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/find_duplicates.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/find_duplicates.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/find_duplicates.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/group_duplicate_url.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/group_duplicate_url.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/group_duplicate_url.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/group_duplicate_url.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/merge_jsons.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/merge_jsons.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/merge_jsons.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/merge_jsons.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/remove_group_duplicates.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/remove_group_duplicates.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/openwebtext/remove_group_duplicates.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/openwebtext/remove_group_duplicates.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/preprocess_data.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/preprocess_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/preprocess_data.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/preprocess_data.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/preprocess_data_nmt.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/preprocess_data_nmt.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/preprocess_data_nmt.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/preprocess_data_nmt.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/README.md b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/README.md
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/README.md
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/cli/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/cli/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/cli/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/cli/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/cli/__main__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/cli/__main__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/cli/__main__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/cli/__main__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/cli/cli.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/cli/cli.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/cli/cli.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/cli/cli.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/build.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/build.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/build.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/build.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/db/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/db/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/get_dataset_configs.sh b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/get_dataset_configs.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/get_dataset_configs.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/get_dataset_configs.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/get_preprocess_cmd.sh b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/get_preprocess_cmd.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/get_preprocess_cmd.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/get_preprocess_cmd.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/preprocess_data.sh b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/preprocess_data.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/preprocess_data.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/preprocess_data.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/pretrain_model.sh b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/pretrain_model.sh
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/examples/pretrain_model.sh
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/examples/pretrain_model.sh
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/external_libs.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/external_libs.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/external_libs.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/external_libs.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/build.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/build.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/build.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/build.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/factory.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/factory.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/factory.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/factory.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/index.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/index.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/index.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/index.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/indexes/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/indexes/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/indexes/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/indexes/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/indexes/faiss_base.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/indexes/faiss_base.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/indexes/faiss_base.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/indexes/faiss_base.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/indexes/faiss_par_add.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/indexes/faiss_par_add.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/indexes/faiss_par_add.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/indexes/faiss_par_add.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/index/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/index/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/main.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/main.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/main.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/main.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/__init__.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/__init__.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/__init__.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/chunk_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/chunk_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/chunk_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/chunk_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/query.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/query.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/query.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/query.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/retro_dataset.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/retro_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/retro_dataset.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/retro_dataset.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/query/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/query/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/utils.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/retro/utils.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/retro/utils.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/run_text_generation_server.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/run_text_generation_server.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/run_text_generation_server.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/run_text_generation_server.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/text_generation_cli.py b/nlp/llm/llama2-7b_rlhf/pytorch/tools/text_generation_cli.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/tools/text_generation_cli.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/tools/text_generation_cli.py
diff --git a/nlp/llm/llama2-7b_rlhf/megatron-deepspeed/train_rlhf_llama.py b/nlp/llm/llama2-7b_rlhf/pytorch/train_rlhf_llama.py
similarity index 100%
rename from nlp/llm/llama2-7b_rlhf/megatron-deepspeed/train_rlhf_llama.py
rename to nlp/llm/llama2-7b_rlhf/pytorch/train_rlhf_llama.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/.coveragerc b/nlp/llm/llama2-7b_sft/pytorch/.coveragerc
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/.coveragerc
rename to nlp/llm/llama2-7b_sft/pytorch/.coveragerc
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/CODEOWNERS b/nlp/llm/llama2-7b_sft/pytorch/CODEOWNERS
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/CODEOWNERS
rename to nlp/llm/llama2-7b_sft/pytorch/CODEOWNERS
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/ILUVATAR.md b/nlp/llm/llama2-7b_sft/pytorch/ILUVATAR.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/ILUVATAR.md
rename to nlp/llm/llama2-7b_sft/pytorch/ILUVATAR.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/LICENSE b/nlp/llm/llama2-7b_sft/pytorch/LICENSE
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/LICENSE
rename to nlp/llm/llama2-7b_sft/pytorch/LICENSE
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/MANIFEST.in b/nlp/llm/llama2-7b_sft/pytorch/MANIFEST.in
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/MANIFEST.in
rename to nlp/llm/llama2-7b_sft/pytorch/MANIFEST.in
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/README.md b/nlp/llm/llama2-7b_sft/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/README_origin.md b/nlp/llm/llama2-7b_sft/pytorch/README_origin.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/README_origin.md
rename to nlp/llm/llama2-7b_sft/pytorch/README_origin.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/SECURITY.md b/nlp/llm/llama2-7b_sft/pytorch/SECURITY.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/SECURITY.md
rename to nlp/llm/llama2-7b_sft/pytorch/SECURITY.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/build_megatron-deepspeed.sh b/nlp/llm/llama2-7b_sft/pytorch/build_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/build_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/build_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/checkpoints/convert_hf_2_meg.sh b/nlp/llm/llama2-7b_sft/pytorch/checkpoints/convert_hf_2_meg.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/checkpoints/convert_hf_2_meg.sh
rename to nlp/llm/llama2-7b_sft/pytorch/checkpoints/convert_hf_2_meg.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/clean_megatron-deepspeed.sh b/nlp/llm/llama2-7b_sft/pytorch/clean_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/clean_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/clean_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/dataset/README.md b/nlp/llm/llama2-7b_sft/pytorch/dataset/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/dataset/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/dataset/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/dataset/download_and_convert_dataset.sh b/nlp/llm/llama2-7b_sft/pytorch/dataset/download_and_convert_dataset.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/dataset/download_and_convert_dataset.sh
rename to nlp/llm/llama2-7b_sft/pytorch/dataset/download_and_convert_dataset.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/docs/distrib_optimizer.md b/nlp/llm/llama2-7b_sft/pytorch/docs/distrib_optimizer.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/docs/distrib_optimizer.md
rename to nlp/llm/llama2-7b_sft/pytorch/docs/distrib_optimizer.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/docs/images/distrib_optimizer/data_flow.png b/nlp/llm/llama2-7b_sft/pytorch/docs/images/distrib_optimizer/data_flow.png
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/docs/images/distrib_optimizer/data_flow.png
rename to nlp/llm/llama2-7b_sft/pytorch/docs/images/distrib_optimizer/data_flow.png
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/docs/images/distrib_optimizer/sharding_scheme.png b/nlp/llm/llama2-7b_sft/pytorch/docs/images/distrib_optimizer/sharding_scheme.png
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/docs/images/distrib_optimizer/sharding_scheme.png
rename to nlp/llm/llama2-7b_sft/pytorch/docs/images/distrib_optimizer/sharding_scheme.png
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/ds_config.json b/nlp/llm/llama2-7b_sft/pytorch/ds_config.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/ds_config.json
rename to nlp/llm/llama2-7b_sft/pytorch/ds_config.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/annotations/filter-selfgeneration.py b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/annotations/filter-selfgeneration.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/annotations/filter-selfgeneration.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/annotations/filter-selfgeneration.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/annotations/perspective_api_annotate.py b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/annotations/perspective_api_annotate.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/annotations/perspective_api_annotate.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/annotations/perspective_api_annotate.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/annotations/preprocess.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/annotations/preprocess.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/annotations/preprocess.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/annotations/preprocess.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/finetune_gpt.py b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/finetune_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/finetune_gpt.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/finetune_gpt.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/generate-1.3b.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/generate-1.3b.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/generate-1.3b.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/generate-1.3b.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/generate_samples_gpt.py b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/generate_samples_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/generate_samples_gpt.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/generate_samples_gpt.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/perspective_api.py b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/perspective_api.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/perspective_api.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/perspective_api.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/evaluate_retriever_nq.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/evaluate_retriever_nq.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/evaluate_retriever_nq.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/evaluate_retriever_nq.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/evaluate_zeroshot_gpt.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/evaluate_zeroshot_gpt.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/evaluate_zeroshot_gpt.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/evaluate_zeroshot_gpt.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/finetune_mnli_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/finetune_mnli_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/finetune_mnli_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/finetune_mnli_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/finetune_race_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/finetune_race_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/finetune_race_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/finetune_race_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/finetune_retriever_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/finetune_retriever_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/finetune_retriever_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/finetune_retriever_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/llama2/run_meg_llama2_7b_sft.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/llama2/run_meg_llama2_7b_sft.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/llama2/run_meg_llama2_7b_sft.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/llama2/run_meg_llama2_7b_sft.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/llama2/tokenizer/tokenizer.model b/nlp/llm/llama2-7b_sft/pytorch/examples/llama2/tokenizer/tokenizer.model
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/llama2/tokenizer/tokenizer.model
rename to nlp/llm/llama2-7b_sft/pytorch/examples/llama2/tokenizer/tokenizer.model
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/merge_mp_bert.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/merge_mp_bert.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/merge_mp_bert.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/merge_mp_bert.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/data_processing.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/data_processing.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/data_processing.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/data_processing.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/eval_knwl_generation.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/eval_knwl_generation.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/eval_knwl_generation.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/eval_knwl_generation.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/eval_resp_generation.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/eval_resp_generation.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/eval_resp_generation.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/eval_resp_generation.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/prep_resp_gen.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/prep_resp_gen.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/prep_resp_gen.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/prep_resp_gen.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/prompt_knwl_gen.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/prompt_knwl_gen.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/prompt_knwl_gen.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/prompt_knwl_gen.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/prompt_resp_gen.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/msdp/prompt_resp_gen.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/msdp/prompt_resp_gen.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/msdp/prompt_resp_gen.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_bert.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_bert.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_bert.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_bert.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_bert_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_bert_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_bert_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_bert_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_bert_distributed_with_mp.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_bert_distributed_with_mp.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_bert_distributed_with_mp.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_bert_distributed_with_mp.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt3_175B.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt3_175B.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt3_175B.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt3_175B.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt_distributed_with_mp.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt_distributed_with_mp.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_gpt_distributed_with_mp.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_gpt_distributed_with_mp.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_ict.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_ict.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_ict.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_ict.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_t5.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_t5.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_t5.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_t5.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_t5_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_t5_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_t5_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_t5_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_t5_distributed_with_mp.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_t5_distributed_with_mp.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/pretrain_t5_distributed_with_mp.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/pretrain_t5_distributed_with_mp.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/run_text_generation_server_345M.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/run_text_generation_server_345M.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/run_text_generation_server_345M.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/run_text_generation_server_345M.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/run_text_generation_server_345M_8_tensor_parallel.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/run_text_generation_server_345M_8_tensor_parallel.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/run_text_generation_server_345M_8_tensor_parallel.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/run_text_generation_server_345M_8_tensor_parallel.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/CONFIG.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/CONFIG.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/CONFIG.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/CONFIG.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/SBATCH.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/SBATCH.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/SBATCH.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/SBATCH.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/SRUN.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/SRUN.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/SRUN.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/SRUN.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_11.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_11.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_11.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_11.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_12.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_12.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_12.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_12.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_13.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_13.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_13.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_13.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_14.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_14.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_14.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_14.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_15.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_15.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_15.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_15.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_16.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_16.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_16.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_16.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_17.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_17.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_17.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_17.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_18.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_18.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_figure_18.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_figure_18.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_table_1.sh b/nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_table_1.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples/sc21/run_table_1.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples/sc21/run_table_1.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_config_gpt_Zero2_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_evalharness.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_evalharness.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_evalharness.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_evalharness.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_MoE128.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_PR-MoE64or128_MoS.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_1.3B_dense_cl.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_MoE64.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_125M_dense_cl.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_MoE128.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_PR-MoE32or64_MoS.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_350M_dense.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/ds_pretrain_gpt_6.7B_dense.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/readme_evalharness.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/readme_evalharness.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/MoE/readme_evalharness.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/MoE/readme_evalharness.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/run-175b.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/run-175b.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/run-175b.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/run-175b.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/run-1t.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/run-1t.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/run-1t.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/run-1t.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/run-benchmark-model.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/run-benchmark-model.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azure/run-benchmark-model.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azure/run-benchmark-model.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/Dockerfile.dockerfile b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/Dockerfile.dockerfile
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/Dockerfile.dockerfile
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/Dockerfile.dockerfile
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/aml_submit.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/aml_submit.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/aml_submit.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/aml_submit.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/prepare_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/prepare_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/azureml/prepare_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/azureml/prepare_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_config_bert_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_mnli.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_qqp.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_finetune_bert_race.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/ds_pretrain_bert.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/prepare_pile_data.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/prepare_pile_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/bert_with_pile/prepare_pile_data.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/bert_with_pile/prepare_pile_data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/125M-Int8-test-64gpu-distilled-group48.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/125M-L10-Int8-test-64gpu-distilled-group48.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/125M-L12-Int8-test-64gpu-distilled-group48.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_config_gpt_TEMPLATE_compression.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_evalharness.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_evalharness.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_evalharness.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_evalharness.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_1.3B_dense_cl_kd.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_cl_kd.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_125M_dense_kd.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/compression/ds_pretrain_gpt_350M_dense_kd.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_pretrain_gpt2.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_train.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_train.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_train.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_train.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_baseline.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/curriculum_learning/ds_zero_stage_1_config_curriculum_fixed_linear.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/analyze_data.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/analyze_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/analyze_data.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/analyze_data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_map.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/ds_analyze_bert_data_reduce.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_config_bert_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_mnli.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_qqp.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_bert_race.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune/ds_finetune_gather_result.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_config_bert_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_bert_glue_run.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/finetune_glue/ds_finetune_gather_result.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pile_data_download_preprocess.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_1clmetric_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_config_bert_2clmetrics_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_base_script.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/bert/pretrain/ds_pretrain_bert_336M_run.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_map.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/ds_analyze_gpt_data_reduce.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_config_eval_dummy.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_1gpu.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_gather_result.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/eval/ds_evalharness_parallel_run_10shot.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_1clmetric_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_config_gpt_2clmetrics_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_base_script.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/data_efficiency/gpt/pretrain/ds_pretrain_gpt_1.3B_dense_run.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/generate_text.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/generate_text.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/generate_text.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/generate_text.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/pretrain_llama2_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/pretrain_llama2_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/pretrain_llama2_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/pretrain_llama2_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/pretrain_llama_distributed.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/pretrain_llama_distributed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/pretrain_llama_distributed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/pretrain_llama_distributed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_megatron_checkpointing.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_1.3B_rope.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_125M_flashattn.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/rebase/ds_pretrain_gpt_13B.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/run_deepspeed_example.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/run_deepspeed_example.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/run_deepspeed_example.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/run_deepspeed_example.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/README.md b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/ds_config_gpt_TEMPLATE.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_1.3B_seq_parallel_32k.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh b/nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh
rename to nlp/llm/llama2-7b_sft/pytorch/examples_deepspeed/sequence_parallel/ds_pretrain_gpt_30B_seq_parallel_32k.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/images/Achieved_petaFLOPs.png b/nlp/llm/llama2-7b_sft/pytorch/images/Achieved_petaFLOPs.png
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/images/Achieved_petaFLOPs.png
rename to nlp/llm/llama2-7b_sft/pytorch/images/Achieved_petaFLOPs.png
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/images/cases_april2021.png b/nlp/llm/llama2-7b_sft/pytorch/images/cases_april2021.png
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/images/cases_april2021.png
rename to nlp/llm/llama2-7b_sft/pytorch/images/cases_april2021.png
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/install_megatron-deepspeed.sh b/nlp/llm/llama2-7b_sft/pytorch/install_megatron-deepspeed.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/install_megatron-deepspeed.sh
rename to nlp/llm/llama2-7b_sft/pytorch/install_megatron-deepspeed.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/arguments.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/arguments.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/arguments.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/arguments.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/checkpointing.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/checkpointing.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/checkpointing.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/checkpointing.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/README.md b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/Makefile b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/Makefile
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/Makefile
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/Makefile
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/blended_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/blended_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/blended_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/blended_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/blended_megatron_dataset_builder.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/blended_megatron_dataset_builder.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/blended_megatron_dataset_builder.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/blended_megatron_dataset_builder.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/blended_megatron_dataset_config.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/blended_megatron_dataset_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/blended_megatron_dataset_config.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/blended_megatron_dataset_config.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/gpt_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/gpt_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/gpt_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/gpt_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/helpers b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/helpers
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/helpers
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/helpers
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/helpers.cpp b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/helpers.cpp
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/helpers.cpp
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/helpers.cpp
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/indexed_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/indexed_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/indexed_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/megatron_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/megatron_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/megatron_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/megatron_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/readme.md b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/readme.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/readme.md
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/readme.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/datasets/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/datasets/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/core.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/core.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/core.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/core.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/dict_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/dict_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/dict_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/dict_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/mapping.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/mapping.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/mapping.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/mapping.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/optimizer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/optimizer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/optimizer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/serialization.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/serialization.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/serialization.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/serialization.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/base.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/base.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/base.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/base.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/tensorstore.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/tensorstore.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/tensorstore.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/tensorstore.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/two_stage.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/two_stage.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/two_stage.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/two_stage.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/zarr.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/zarr.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/strategies/zarr.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/strategies/zarr.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/dist_checkpointing/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/dist_checkpointing/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/distributed_data_parallel.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/distributed_data_parallel.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/distributed_data_parallel.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/distributed_data_parallel.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/finalize_model_grads.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/finalize_model_grads.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/finalize_model_grads.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/finalize_model_grads.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/grad_buffer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/grad_buffer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/distributed/grad_buffer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/distributed/grad_buffer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/enums.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/enums.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/enums.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_bias_dropout.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_bias_dropout.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_bias_dropout.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_bias_dropout.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_bias_gelu.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_bias_gelu.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_bias_gelu.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_bias_gelu.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_layer_norm.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_layer_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_layer_norm.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_layer_norm.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_softmax.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_softmax.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/fusions/fused_softmax.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/fusions/fused_softmax.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/inference_params.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/inference_params.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/inference_params.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/inference_params.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/model_parallel_config.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/model_parallel_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/model_parallel_config.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/model_parallel_config.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/T5/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/T5/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/T5/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/T5/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/T5/t5_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/T5/t5_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/T5/t5_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/T5/t5_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/T5/t5_spec.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/T5/t5_spec.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/T5/t5_spec.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/T5/t5_spec.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/bert_layer_specs.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/bert_layer_specs.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/bert_layer_specs.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/bert_layer_specs.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/bert_lm_head.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/bert_lm_head.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/bert_lm_head.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/bert_lm_head.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/bert_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/bert_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/bert_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/bert_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/pooler.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/pooler.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/bert/pooler.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/bert/pooler.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/embeddings/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/embeddings/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/embeddings/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/embeddings/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/embeddings/language_model_embedding.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/embeddings/language_model_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/embeddings/language_model_embedding.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/embeddings/language_model_embedding.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/embeddings/rotary_pos_embedding.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/embeddings/rotary_pos_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/embeddings/rotary_pos_embedding.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/embeddings/rotary_pos_embedding.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/language_module/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/language_module/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/language_module/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/language_module/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/language_module/language_module.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/language_module/language_module.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/common/language_module/language_module.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/common/language_module/language_module.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/gpt_embedding.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/gpt_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/gpt_embedding.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/gpt_embedding.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/gpt_layer_specs.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/gpt_layer_specs.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/gpt_layer_specs.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/gpt_layer_specs.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/gpt_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/gpt/gpt_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/gpt/gpt_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/base_attention.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/base_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/base_attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/base_attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/config.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/config.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/config.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/config.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/decoder_attention.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/decoder_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/decoder_attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/decoder_attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/decoder_spec.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/decoder_spec.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/decoder_spec.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/decoder_spec.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/encoder_attention.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/encoder_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/encoder_attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/encoder_attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/encoder_spec.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/encoder_spec.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/encoder_spec.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/encoder_spec.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/models/retro/model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/models/retro/model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/package_info.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/package_info.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/package_info.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/package_info.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/parallel_state.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/parallel_state.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/parallel_state.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/parallel_state.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/pipeline_parallel/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/pipeline_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/pipeline_parallel/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/pipeline_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/pipeline_parallel/p2p_communication.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/pipeline_parallel/p2p_communication.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/pipeline_parallel/p2p_communication.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/pipeline_parallel/p2p_communication.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/pipeline_parallel/schedules.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/pipeline_parallel/schedules.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/pipeline_parallel/schedules.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/pipeline_parallel/schedules.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/requirements.txt b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/requirements.txt
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/requirements.txt
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/requirements.txt
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/sequence_parallel/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/sequence_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/sequence_parallel/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/sequence_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/sequence_parallel/cross_entropy.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/sequence_parallel/cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/sequence_parallel/cross_entropy.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/sequence_parallel/cross_entropy.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/cross_entropy.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/cross_entropy.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/cross_entropy.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/data.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/data.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/layers.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/layers.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/layers.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/layers.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/mappings.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/mappings.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/mappings.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/mappings.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/random.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/random.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/random.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/random.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/tensor_parallel/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/tensor_parallel/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/attention.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/custom_layers/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/custom_layers/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/custom_layers/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/custom_layers/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/custom_layers/transformer_engine.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/custom_layers/transformer_engine.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/custom_layers/transformer_engine.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/custom_layers/transformer_engine.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/dot_product_attention.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/dot_product_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/dot_product_attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/dot_product_attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/enums.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/enums.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/enums.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/identity_op.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/identity_op.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/identity_op.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/identity_op.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/mlp.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/mlp.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/mlp.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/mlp.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/module.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/module.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/module.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/module.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/spec_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/spec_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/spec_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/spec_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/switch_mlp.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/switch_mlp.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/switch_mlp.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/switch_mlp.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/transformer_block.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/transformer_block.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/transformer_block.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/transformer_block.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/transformer_config.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/transformer_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/transformer_config.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/transformer_config.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/transformer_layer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/transformer_layer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/transformer_layer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/transformer_layer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/transformer/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/transformer/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/core/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/core/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/core/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/Makefile b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/Makefile
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/Makefile
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/Makefile
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/autoaugment.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/autoaugment.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/autoaugment.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/autoaugment.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/bert_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/bert_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/bert_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/bert_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/biencoder_dataset_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/biencoder_dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/biencoder_dataset_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/biencoder_dataset_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/blendable_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/blendable_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/blendable_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/blendable_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/data_samplers.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/data_samplers.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/data_samplers.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/data_samplers.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/dataset_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/dataset_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/dataset_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/gpt_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/gpt_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/gpt_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/gpt_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/helpers.cpp b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/helpers.cpp
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/helpers.cpp
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/helpers.cpp
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/ict_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/ict_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/ict_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/ict_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/image_folder.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/image_folder.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/image_folder.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/image_folder.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/indexed_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/indexed_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/indexed_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/multimodal_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/multimodal_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/multimodal_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/multimodal_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/orqa_wiki_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/orqa_wiki_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/orqa_wiki_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/orqa_wiki_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/realm_dataset_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/realm_dataset_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/realm_dataset_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/realm_dataset_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/realm_index.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/realm_index.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/realm_index.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/realm_index.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/t5_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/t5_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/t5_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/t5_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/test/test_indexed_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/test/test_indexed_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/test/test_indexed_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/test/test_indexed_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/test/test_preprocess_data.sh b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/test/test_preprocess_data.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/test/test_preprocess_data.sh
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/test/test_preprocess_data.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/vit_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/data/vit_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/data/vit_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/data/vit_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/dist_signal_handler.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/dist_signal_handler.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/dist_signal_handler.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/dist_signal_handler.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/enums.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/enums.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/enums.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fp16_deprecated/loss_scaler.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/fp16_deprecated/loss_scaler.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fp16_deprecated/loss_scaler.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/fp16_deprecated/loss_scaler.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/compat.h b/nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/compat.h
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/compat.h
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/compat.h
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/tests/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/tests/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/tests/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/tests/test_fused_kernels.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/tests/test_fused_kernels.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/tests/test_fused_kernels.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/tests/test_fused_kernels.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/type_shim.h b/nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/type_shim.h
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/fused_kernels/type_shim.h
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/fused_kernels/type_shim.h
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/global_vars.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/global_vars.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/global_vars.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/global_vars.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/indexer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/indexer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/indexer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/indexer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/initialize.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/initialize.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/initialize.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/initialize.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/log_handler.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/log_handler.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/log_handler.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/log_handler.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/memory.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/memory.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/memory.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/memory.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/microbatches.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/microbatches.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/microbatches.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/microbatches.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/bert_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/bert_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/bert_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/bert_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/biencoder_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/biencoder_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/biencoder_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/biencoder_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/classification.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/classification.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/classification.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/distributed.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/distributed.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/distributed.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/distributed.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/enums.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/enums.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/enums.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/enums.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/fused_bias_gelu.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/fused_bias_gelu.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/fused_bias_gelu.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/fused_bias_gelu.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/fused_layer_norm.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/fused_layer_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/fused_layer_norm.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/fused_layer_norm.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/fused_softmax.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/fused_softmax.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/fused_softmax.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/fused_softmax.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/gpt_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/gpt_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/gpt_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/language_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/language_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/language_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/language_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/module.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/module.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/module.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/module.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/multiple_choice.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/multiple_choice.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/multiple_choice.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/multiple_choice.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/realm_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/realm_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/realm_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/realm_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/rms_norm.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/rms_norm.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/rms_norm.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/rms_norm.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/rotary_pos_embedding.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/rotary_pos_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/rotary_pos_embedding.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/rotary_pos_embedding.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/t5_model.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/t5_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/t5_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/t5_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/transformer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/transformer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/transformer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/transformer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/classification.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/classification.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/classification.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/dino.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/dino.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/dino.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/dino.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/esvit_swin_backbone.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/esvit_swin_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/esvit_swin_backbone.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/esvit_swin_backbone.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/inpainting.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/inpainting.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/inpainting.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/inpainting.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/knn_monitor.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/knn_monitor.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/knn_monitor.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/knn_monitor.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/mit_backbone.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/mit_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/mit_backbone.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/mit_backbone.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/swin_backbone.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/swin_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/swin_backbone.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/swin_backbone.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/vit_backbone.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/vit_backbone.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/model/vision/vit_backbone.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/model/vision/vit_backbone.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/commons.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/commons.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/commons.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/commons.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_cross_entropy.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_cross_entropy.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_cross_entropy.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_data.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_data.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_initialize.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_initialize.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_initialize.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_initialize.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_layers.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_layers.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_layers.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_layers.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_random.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_random.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/mpu/tests/test_random.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/mpu/tests/test_random.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/clip_grads.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/clip_grads.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/clip_grads.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/clip_grads.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/distrib_optimizer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/distrib_optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/distrib_optimizer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/distrib_optimizer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/grad_scaler.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/grad_scaler.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/grad_scaler.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/grad_scaler.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/optimizer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/optimizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/optimizer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/optimizer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer_param_scheduler.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer_param_scheduler.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/optimizer_param_scheduler.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/optimizer_param_scheduler.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/p2p_communication.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/p2p_communication.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/p2p_communication.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/p2p_communication.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/static/index.html b/nlp/llm/llama2-7b_sft/pytorch/megatron/static/index.html
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/static/index.html
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/static/index.html
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/api.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/api.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/api.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/api.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/beam_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/beam_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/beam_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/beam_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/communication.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/communication.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/communication.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/communication.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/forward_step.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/forward_step.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/forward_step.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/forward_step.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/generation.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/generation.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/generation.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/generation.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/sampling.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/sampling.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/sampling.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/sampling.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/tokenization.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation/tokenization.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation/tokenization.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation_server.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation_server.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation_server.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation_server.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/text_generation_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/text_generation_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/theoretical_memory_usage.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/theoretical_memory_usage.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/theoretical_memory_usage.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/theoretical_memory_usage.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/timers.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/timers.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/timers.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/timers.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/bert_tokenization.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/bert_tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/bert_tokenization.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/bert_tokenization.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/gpt2_tokenization.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/gpt2_tokenization.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/gpt2_tokenization.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/gpt2_tokenization.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/tokenization_utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/tokenization_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/tokenization_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/tokenization_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/tokenizer.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/tokenizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/tokenizer/tokenizer.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/tokenizer/tokenizer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/training.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/training.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/training.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/training.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/utils.py b/nlp/llm/llama2-7b_sft/pytorch/megatron/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/megatron/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/megatron/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_bert.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_bert.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_bert.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_bert.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_gpt.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_gpt.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_gpt.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_gpt_megatron.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_gpt_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_gpt_megatron.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_gpt_megatron.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_ict.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_ict.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_ict.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_ict.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_retro.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_retro.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_retro.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_retro.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_t5.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_t5.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_t5.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_t5.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_vision_classify.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_vision_classify.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_vision_classify.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_vision_classify.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_vision_dino.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_vision_dino.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_vision_dino.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_vision_dino.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_vision_inpaint.py b/nlp/llm/llama2-7b_sft/pytorch/pretrain_vision_inpaint.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/pretrain_vision_inpaint.py
rename to nlp/llm/llama2-7b_sft/pytorch/pretrain_vision_inpaint.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/report_theoretical_memory.py b/nlp/llm/llama2-7b_sft/pytorch/report_theoretical_memory.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/report_theoretical_memory.py
rename to nlp/llm/llama2-7b_sft/pytorch/report_theoretical_memory.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/setup.py b/nlp/llm/llama2-7b_sft/pytorch/setup.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/setup.py
rename to nlp/llm/llama2-7b_sft/pytorch/setup.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/data_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/data_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/data_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/data_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/ensemble_classifier.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/ensemble_classifier.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/ensemble_classifier.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/ensemble_classifier.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_harness/download.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/eval_harness/download.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_harness/download.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/eval_harness/download.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_harness/evaluate.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/eval_harness/evaluate.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_harness/evaluate.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/eval_harness/evaluate.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_harness/report-to-csv.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/eval_harness/report-to-csv.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_harness/report-to-csv.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/eval_harness/report-to-csv.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/eval_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/eval_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/eval_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/finetune_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/finetune_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/finetune_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/finetune_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/cola.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/cola.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/cola.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/cola.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/data.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/data.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/finetune.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/finetune.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/finetune.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/finetune.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/mnli.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/mnli.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/mnli.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/mnli.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/mrpc.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/mrpc.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/mrpc.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/mrpc.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/qnli.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/qnli.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/qnli.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/qnli.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/qqp.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/qqp.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/qqp.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/qqp.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/rte.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/rte.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/rte.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/rte.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/sst2.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/sst2.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/sst2.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/sst2.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/stsb.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/glue/stsb.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/glue/stsb.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/glue/stsb.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/main.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/main.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/main.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/main.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/README.md b/nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/evaluate.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/evaluate.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/evaluate.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/evaluate.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/main.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/main.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/main.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/main.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/metrics.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/metrics.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/metrics.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/metrics.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/preprocessing.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/preprocessing.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/preprocessing.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/preprocessing.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/prompt.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/prompt.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/msdp/prompt.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/msdp/prompt.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/README.md b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/evaluate_orqa.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/evaluate_orqa.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/evaluate_orqa.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/evaluate_orqa.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/evaluate_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/evaluate_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/evaluate_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/evaluate_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/supervised/data.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/supervised/data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/supervised/data.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/supervised/data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/supervised/eval_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/supervised/eval_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/supervised/eval_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/supervised/eval_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/supervised/finetune.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/supervised/finetune.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/supervised/finetune.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/supervised/finetune.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/unsupervised/nq.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/unsupervised/nq.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/unsupervised/nq.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/unsupervised/nq.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/unsupervised/qa_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/unsupervised/qa_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/unsupervised/qa_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/unsupervised/qa_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/unsupervised/tokenizers.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/unsupervised/tokenizers.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/orqa/unsupervised/tokenizers.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/orqa/unsupervised/tokenizers.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/race/data.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/race/data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/race/data.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/race/data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/race/finetune.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/race/finetune.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/race/finetune.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/race/finetune.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/classification/classification.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/classification/classification.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/classification/classification.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/classification/classification.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/classification/eval_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/classification/eval_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/classification/eval_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/classification/eval_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/finetune_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/finetune_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/finetune_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/finetune_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/main.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/main.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/main.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/main.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/cityscapes.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/cityscapes.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/cityscapes.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/cityscapes.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/data.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/data.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/finetune_segformer.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/finetune_segformer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/finetune_segformer.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/finetune_segformer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/finetune_setr.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/finetune_setr.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/finetune_setr.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/finetune_setr.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/metrics.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/metrics.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/metrics.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/metrics.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/seg_heads.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/seg_heads.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/seg_heads.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/seg_heads.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/seg_models.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/seg_models.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/seg_models.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/seg_models.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/transforms.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/transforms.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/transforms.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/transforms.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/utils.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/vision/segmentation/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/vision/segmentation/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/zeroshot_gpt/datasets.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/zeroshot_gpt/datasets.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/zeroshot_gpt/datasets.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/zeroshot_gpt/datasets.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/zeroshot_gpt/detokenizer.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/zeroshot_gpt/detokenizer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/zeroshot_gpt/detokenizer.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/zeroshot_gpt/detokenizer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/zeroshot_gpt/evaluate.py b/nlp/llm/llama2-7b_sft/pytorch/tasks/zeroshot_gpt/evaluate.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tasks/zeroshot_gpt/evaluate.py
rename to nlp/llm/llama2-7b_sft/pytorch/tasks/zeroshot_gpt/evaluate.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/conftest.py b/nlp/llm/llama2-7b_sft/pytorch/tests/conftest.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/conftest.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/conftest.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/check_slurm_job_completion.py b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/check_slurm_job_completion.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/check_slurm_job_completion.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/check_slurm_job_completion.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/test_ci_pipeline.py b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/test_ci_pipeline.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/test_ci_pipeline.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/test_ci_pipeline.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/python_test_utils/test_resume_checkpoint_pipeline.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/shell_test_utils/jobwait.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/shell_test_utils/jobwait.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/shell_test_utils/jobwait.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/shell_test_utils/jobwait.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp1_pp2_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp1_pp2_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp1_pp2_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp1_pp2_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp1_pp4_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp1_pp4_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp1_pp4_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp1_pp4_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp2_pp2_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp2_pp2_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp2_pp2_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp2_pp2_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp4_pp1_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp4_pp1_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/bert/bert_tp4_pp1_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/bert/bert_tp4_pp1_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp2_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp2_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp2_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp2_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp4_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp4_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp4_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp1_pp4_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp2_pp2_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp2_pp2_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp2_pp2_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp2_pp2_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp4_pp1_1nodes_50steps.json b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp4_pp1_1nodes_50steps.json
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_results/gpt3/gpt3_tp4_pp1_1nodes_50steps.json
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_results/gpt3/gpt3_tp4_pp1_1nodes_50steps.json
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_resume_checkpoint_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_resume_checkpoint_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_resume_checkpoint_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_resume_checkpoint_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/bert/sbatch_bert_distributed_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/models/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/models/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/models/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/models/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/models/test_gpt_embedding.py b/nlp/llm/llama2-7b_sft/pytorch/tests/models/test_gpt_embedding.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/models/test_gpt_embedding.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/models/test_gpt_embedding.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/models/test_gpt_model.py b/nlp/llm/llama2-7b_sft/pytorch/tests/models/test_gpt_model.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/models/test_gpt_model.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/models/test_gpt_model.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/pipeline_parallel/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/pipeline_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/pipeline_parallel/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/pipeline_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/pipeline_parallel/test_schedules.py b/nlp/llm/llama2-7b_sft/pytorch/tests/pipeline_parallel/test_schedules.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/pipeline_parallel/test_schedules.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/pipeline_parallel/test_schedules.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/requirements.txt b/nlp/llm/llama2-7b_sft/pytorch/tests/requirements.txt
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/requirements.txt
rename to nlp/llm/llama2-7b_sft/pytorch/tests/requirements.txt
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/run_megatron.py b/nlp/llm/llama2-7b_sft/pytorch/tests/run_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/run_megatron.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/run_megatron.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/run_test_multi_node.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/run_test_multi_node.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/run_test_multi_node.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/run_test_multi_node.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/run_test_one_node.sh b/nlp/llm/llama2-7b_sft/pytorch/tests/run_test_one_node.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/run_test_one_node.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tests/run_test_one_node.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/tensor_parallel/__int__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/tensor_parallel/__int__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/tensor_parallel/__int__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/tensor_parallel/__int__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/test_megatron.py b/nlp/llm/llama2-7b_sft/pytorch/tests/test_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/test_megatron.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/test_megatron.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/tests.py b/nlp/llm/llama2-7b_sft/pytorch/tests/tests.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/tests.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/tests.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_core_attention.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_core_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_core_attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_core_attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_module.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_module.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_module.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_module.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_attention.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_attention.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_attention.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_attention.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_mlp.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_mlp.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_mlp.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_mlp.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_transformer_block.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_transformer_block.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_transformer_block.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_transformer_block.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_transformer_layer.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_transformer_layer.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_parallel_transformer_layer.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_parallel_transformer_layer.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_transformer_config.py b/nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_transformer_config.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/transformer/test_transformer_config.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/transformer/test_transformer_config.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_cross_entropy.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_cross_entropy.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_cross_entropy.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_cross_entropy.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_data.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_data.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_mappings.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_mappings.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_mappings.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_mappings.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_random.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_random.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_random.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_random.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_basic.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_basic.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_basic.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_basic.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_parallel_state.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_parallel_state.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_parallel_state.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_parallel_state.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_utilities.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_utilities.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_utilities.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_utilities.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_utils.py b/nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tests/unit_tests/test_utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tests/unit_tests/test_utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/dataset.py b/nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/embed.py b/nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/embed.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/embed.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/embed.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/external_libs.py b/nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/external_libs.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/external_libs.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/external_libs.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/huggingface.py b/nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/huggingface.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/huggingface.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/huggingface.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/utils.py b/nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/bert_embedding/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/bert_embedding/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/checkpoint_loader_megatron.py b/nlp/llm/llama2-7b_sft/pytorch/tools/checkpoint_loader_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/checkpoint_loader_megatron.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/checkpoint_loader_megatron.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/checkpoint_saver_megatron.py b/nlp/llm/llama2-7b_sft/pytorch/tools/checkpoint_saver_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/checkpoint_saver_megatron.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/checkpoint_saver_megatron.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/checkpoint_util.py b/nlp/llm/llama2-7b_sft/pytorch/tools/checkpoint_util.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/checkpoint_util.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/checkpoint_util.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/README.md b/nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/deepspeed_checkpoint.py b/nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/deepspeed_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/deepspeed_checkpoint.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/deepspeed_checkpoint.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_megatron.py b/nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/deepspeed_to_megatron.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_megatron.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/deepspeed_to_megatron.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_transformers.py b/nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/deepspeed_to_transformers.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/deepspeed_to_transformers.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/deepspeed_to_transformers.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/inspect_checkpoint.py b/nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/inspect_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/inspect_checkpoint.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/inspect_checkpoint.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py b/nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/convert_checkpoint/inspect_deepspeed_checkpoint.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/generate_samples_gpt.py b/nlp/llm/llama2-7b_sft/pytorch/tools/generate_samples_gpt.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/generate_samples_gpt.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/generate_samples_gpt.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/linter.py b/nlp/llm/llama2-7b_sft/pytorch/tools/linter.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/linter.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/linter.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/loader_llama2_hf.py b/nlp/llm/llama2-7b_sft/pytorch/tools/loader_llama2_hf.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/loader_llama2_hf.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/loader_llama2_hf.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/merge_datasets.py b/nlp/llm/llama2-7b_sft/pytorch/tools/merge_datasets.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/merge_datasets.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/merge_datasets.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/README.md b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/add_id.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/add_id.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/add_id.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/add_id.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/blacklist_urls.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/blacklist_urls.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/blacklist_urls.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/blacklist_urls.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/cleanup_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/cleanup_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/cleanup_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/cleanup_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/cleanup_fix_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/cleanup_fix_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/cleanup_fix_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/cleanup_fix_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/filter_ngrams.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/filter_ngrams.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/filter_ngrams.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/filter_ngrams.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/find_duplicates.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/find_duplicates.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/find_duplicates.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/find_duplicates.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/group_duplicate_url.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/group_duplicate_url.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/group_duplicate_url.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/group_duplicate_url.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/merge_jsons.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/merge_jsons.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/merge_jsons.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/merge_jsons.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/remove_group_duplicates.py b/nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/remove_group_duplicates.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/openwebtext/remove_group_duplicates.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/openwebtext/remove_group_duplicates.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/preprocess_data.py b/nlp/llm/llama2-7b_sft/pytorch/tools/preprocess_data.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/preprocess_data.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/preprocess_data.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/preprocess_data_nmt.py b/nlp/llm/llama2-7b_sft/pytorch/tools/preprocess_data_nmt.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/preprocess_data_nmt.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/preprocess_data_nmt.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/README.md b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/README.md
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/README.md
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/README.md
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/cli/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/cli/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/cli/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/cli/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/cli/__main__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/cli/__main__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/cli/__main__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/cli/__main__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/cli/cli.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/cli/cli.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/cli/cli.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/cli/cli.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/build.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/build.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/build.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/build.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/dataset.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/utils.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/db/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/db/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/get_dataset_configs.sh b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/get_dataset_configs.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/get_dataset_configs.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/get_dataset_configs.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/get_preprocess_cmd.sh b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/get_preprocess_cmd.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/get_preprocess_cmd.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/get_preprocess_cmd.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/preprocess_data.sh b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/preprocess_data.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/preprocess_data.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/preprocess_data.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/pretrain_model.sh b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/pretrain_model.sh
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/examples/pretrain_model.sh
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/examples/pretrain_model.sh
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/external_libs.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/external_libs.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/external_libs.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/external_libs.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/build.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/build.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/build.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/build.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/factory.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/factory.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/factory.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/factory.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/index.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/index.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/index.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/index.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/indexes/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/indexes/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/indexes/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/indexes/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/indexes/faiss_base.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/indexes/faiss_base.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/indexes/faiss_base.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/indexes/faiss_base.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/indexes/faiss_par_add.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/indexes/faiss_par_add.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/indexes/faiss_par_add.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/indexes/faiss_par_add.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/utils.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/index/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/index/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/main.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/main.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/main.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/main.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/__init__.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/__init__.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/__init__.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/__init__.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/chunk_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/chunk_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/chunk_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/chunk_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/query.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/query.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/query.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/query.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/retro_dataset.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/retro_dataset.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/retro_dataset.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/retro_dataset.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/utils.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/query/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/query/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/utils.py b/nlp/llm/llama2-7b_sft/pytorch/tools/retro/utils.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/retro/utils.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/retro/utils.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/run_text_generation_server.py b/nlp/llm/llama2-7b_sft/pytorch/tools/run_text_generation_server.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/run_text_generation_server.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/run_text_generation_server.py
diff --git a/nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/text_generation_cli.py b/nlp/llm/llama2-7b_sft/pytorch/tools/text_generation_cli.py
similarity index 100%
rename from nlp/llm/llama2-7b_sft/megatron-deepspeed/tools/text_generation_cli.py
rename to nlp/llm/llama2-7b_sft/pytorch/tools/text_generation_cli.py
diff --git a/nlp/llm/llama3_8b/megatron-deepspeed/README.md b/nlp/llm/llama3_8b/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama3_8b/megatron-deepspeed/README.md
rename to nlp/llm/llama3_8b/pytorch/README.md
diff --git a/nlp/llm/llama3_8b/colossalai/README.md b/nlp/llm/llama3_8b_sft/pytorch/README.md
similarity index 100%
rename from nlp/llm/llama3_8b/colossalai/README.md
rename to nlp/llm/llama3_8b_sft/pytorch/README.md
diff --git a/nlp/llm/mamba-2/megatron-lm/README.md b/nlp/llm/mamba-2/pytorch/README.md
similarity index 100%
rename from nlp/llm/mamba-2/megatron-lm/README.md
rename to nlp/llm/mamba-2/pytorch/README.md
diff --git a/nlp/llm/minicpm/deepspeed/=0.9.16 b/nlp/llm/minicpm/deepspeed/=0.9.16
deleted file mode 100644
index b2bc93ecc295abd8a611955470680c9e243ffe5e..0000000000000000000000000000000000000000
--- a/nlp/llm/minicpm/deepspeed/=0.9.16
+++ /dev/null
@@ -1,2 +0,0 @@
-Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
-Collecting timm
diff --git a/nlp/llm/minicpm/deepspeed/README.md b/nlp/llm/minicpm/deepspeed/README.md
deleted file mode 100644
index b35c29ef9bbdd0669bdf6138438f2ea83ce4c986..0000000000000000000000000000000000000000
--- a/nlp/llm/minicpm/deepspeed/README.md
+++ /dev/null
@@ -1,955 +0,0 @@
-<div align="center">
-<img src="./assets/minicpm_logo.png" width="500em" ></img> 
-</div>
-
-<h4 align="center">
-    <p>
-        <b>中文</b> | <a href="https://github.com/OpenBMB/MiniCPM/blob/main/README-en.md">English</a>
-    <p>
-</h4>
-
-
-<p align="center">
-<a href="https://openbmb.vercel.app/?category=Chinese+Blog" target="_blank">MiniCPM 技术博客</a> |
-<a href="https://modelbest.feishu.cn/wiki/D2tFw8Pcsi5CIzkaHNacLK64npg" target="_blank">MiniCPM 知识库</a> |
-<a href="https://arxiv.org/abs/2404.06395" target="_blank">MiniCPM 论文</a> |
-<a href="https://github.com/OpenBMB/MiniCPM-V/" target="_blank">MiniCPM-V 仓库</a> |
-加入我们的 <a href="https://discord.gg/3cGQn9b3YM" target="_blank">discord</a> 和 <a href="https://github.com/OpenBMB/MiniCPM/blob/main/assets/wechat.jpg" target="_blank">微信群</a>
- 
-</p>
-
-## 更新日志🔥
-
-- [2024.09.28] **[LLMxMapReduce](https://github.com/thunlp/LLMxMapReduce) 开源，支持MiniCPM3-4B，理论上支持无限长文本输入！**
-- [2024.09.18] **[SGLang](https://github.com/sgl-project/sglang) 已经支持 MiniCPM3-4B (推荐使用)！由于 SGLang v0.3 对 MiniCPM3 中使用的 MLA 结构进行了推理优化，吞吐量相比于 vLLM 提高 70%！**[[用法](#sglang推荐)]
-- [2024.09.16] [llama.cpp](https://github.com/ggerganov/llama.cpp/releases/tag/b3765) 已经官方支持 MiniCPM3-4B！[[GGUF模型](https://huggingface.co/openbmb/MiniCPM3-4B-GGUF)|[用法](#llamacpp)]
-- [2024.09.05] 发布 [**MiniCPM3-4B**](https://huggingface.co/openbmb/MiniCPM3-4B)！该模型的表现超越 Phi-3.5-mini-instruct 和 GPT-3.5-Turbo-0125，并且能够比肩 Llama3.1-8B-Instruct、Qwen2-7B-Instruct、GLM-4-9B-Chat 等多个 7B-9B 参数量的模型。
-- [2024.07.09] MiniCPM-2B 已经支持使用 [SGLang](#sglang-推理) 推理！
-- [2024.07.05] 发布 [MiniCPM-S-1B](https://huggingface.co/openbmb/MiniCPM-S-1B-sft)！该模型在保持下游任务性能无损的前提下，FFN 层实现了 87.89% 的平均稀疏度，将 FFN FLOPs 降低了 84%。
-- [2024.04.11] 发布 [MiniCPM-2B-128k](https://huggingface.co/openbmb/MiniCPM-2B-128k)、[MiniCPM-MoE-8x2B](https://huggingface.co/openbmb/MiniCPM-MoE-8x2B) 和 [MiniCPM-1B](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16)！点击[这里](https://openbmb.vercel.app/?category=Chinese+Blog)查看技术博客。
-- [2024.03.16] MiniCPM-2B 的 30 余个中间检查点开放了！[HuggingFace链接](https://huggingface.co/openbmb/MiniCPM-2B-history)
-- [2024.02.01] 发布 [**MiniCPM-2B**](https://huggingface.co/openbmb/MiniCPM-2B-sft-bf16)！该模型在公开评测集上与 Mistral-7B 表现相近（中文、数学、代码能力更优），整体性能超越 Llama2-13B、MPT-30B、Falcon-40B 等模型。
-
-## 目录
-
-- [模型下载](#模型下载)
-- [MiniCPM 3.0](#minicpm-30)
-  - [评测结果](#评测结果)
-    - [综合评测](#综合评测)
-    - [工具调用能力](#工具调用能力)
-    - [长文本能力](#长文本能力)
-  - [模型推理](#模型推理)
-    - [HuggingFace](#huggingface)
-    - [vLLM](#vllm)
-    - [llama.cpp](#llamacpp)
-  - [模型微调](#模型微调)
-    - [LLaMA-Factory](#llama-factory)
-  - [进阶功能](#进阶功能)
-    - [工具调用](#工具调用)
-    - [代码解释器](#代码解释器)
-- [MiniCPM 2.0](#minicpm-20)
-- [MiniCPM 1.0](#minicpm-10)
-
-
-## 模型下载
- 
-  | HuggingFace | ModelScope |
-  |-------------|------------|
-  |[MiniCPM3-4B](https://huggingface.co/openbmb/MiniCPM3-4B)|[MiniCPM3-4B](https://www.modelscope.cn/models/OpenBMB/MiniCPM3-4B)|
-  |[MiniCPM-2B-sft](https://huggingface.co/openbmb/MiniCPM-2B-sft-bf16)|[MiniCPM-2B-sft](https://modelscope.cn/models/OpenBMB/miniCPM-bf16)|
-  |[MiniCPM-2B-dpo](https://huggingface.co/openbmb/MiniCPM-2B-dpo-bf16)|[MiniCPM-2B-dpo](https://modelscope.cn/models/OpenBMB/MiniCPM-2B-dpo-bf16/summary)|
-  |[MiniCPM-2B-128k](https://huggingface.co/openbmb/MiniCPM-2B-128k) |[MiniCPM-2B-128k](https://modelscope.cn/models/openbmb/MiniCPM-2B-128k/summary)| 
-  |[MiniCPM-MoE-8x2B](https://huggingface.co/openbmb/MiniCPM-MoE-8x2B) |[MiniCPM-MoE-8x2B](https://modelscope.cn/models/OpenBMB/MiniCPM-MoE-8x2B)| 
-  |[MiniCPM-1B](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16) | [MiniCPM-1B](https://modelscope.cn/models/OpenBMB/MiniCPM-1B-sft-bf16) |
-  |[MiniCPM-S-1B](https://huggingface.co/openbmb/MiniCPM-S-1B-sft)|[MiniCPM-S-1B](https://modelscope.cn/models/OpenBMB/MiniCPM-S-1B-sft)|
-
-  注: 更多模型版本见[这里](https://huggingface.co/collections/openbmb/minicpm-2b-65d48bf958302b9fd25b698f)。
-
-
-## MiniCPM 3.0
-
-MiniCPM 3.0 是一个 4B 参数量的语言模型，相比 MiniCPM1.0/2.0，功能更加全面，综合能力大幅提升，多数评测集上的效果比肩甚至超越众多 7B-9B 模型。
-* **支持工具调用🛠️（Function Calling）和代码解释器💻（Code Interpreter）**：[Berkeley Function Calling Leaderboard (BFCL)](https://gorilla.cs.berkeley.edu/leaderboard.html) 上取得 9B 规模以下 SOTA，超越 GLM-4-9B-Chat、Qwen2-7B-Instruct。
-* **超强的推理能力🧮**：数学能力方面，[MathBench](https://open-compass.github.io/MathBench/) 上的效果超越 GPT-3.5-Turbo 以及多个 7B-9B 模型。在非常具有挑战性的 [LiveCodeBench](https://livecodebench.github.io/) 上，效果超越 Llama3.1-8B-Instruct。
-* **出色的中英文指令遵循能力🤖**：英文指令遵循 [IFEval](https://huggingface.co/datasets/google/IFEval)、中文指令遵循 [FollowBench-zh](https://huggingface.co/datasets/YuxinJiang/FollowBench) 效果超越 GLM-4-9B-Chat、Qwen2-7B-Instruct。
-* **长文本能力**：原生支持 32k 上下文长度，32k 长度内大海捞针全绿。提出 [LLMxMapReduce](https://github.com/thunlp/LLMxMapReduce) ，理论可处理的上下文长度达到 +∞，在综合性长文本评测基准 [InfiniteBench](https://github.com/OpenBMB/InfiniteBench) 平均得分超越GPT-4、KimiChat等标杆模型。
-* **RAG能力**：我们发布了 [MiniCPM RAG 套件](https://huggingface.co/collections/openbmb/minicpm-rag-suite-66d976b4204cd0a4f8beaabb)。基于 MiniCPM 系列模型的 [MiniCPM-Embedding](https://huggingface.co/openbmb/MiniCPM-Embedding)、[MiniCPM-Reranker](https://huggingface.co/openbmb/MiniCPM-Reranker) 在中文、中英跨语言检索测试中取得 SOTA 表现；针对 RAG 场景的 [MiniCPM3-RAG-LoRA](https://huggingface.co/openbmb/MiniCPM3-RAG-LoRA) 在开放域问答等多项任务上超越 Llama3-8B、Baichuan2-13B 等模型。
-
-### 评测结果
-
-#### 综合评测
-
-<table>
-    <tr>
-        <td>评测集</td>
-        <td>Qwen2-7B-Instruct</td>
-        <td>GLM-4-9B-Chat</td>
-        <td>Gemma2-9B-it</td>
-        <td>Llama3.1-8B-Instruct</td>
-        <td>GPT-3.5-Turbo-0125</td>
-        <td>Phi-3.5-mini-Instruct(3.8B)</td>
-        <td>MiniCPM3-4B </td>
-    </tr>
-    <tr>
-        <td colspan="15" align="left"><strong>英文能力</strong></td>
-    </tr>
-    <tr>
-        <td>MMLU</td>
-        <td>70.5</td>
-        <td>72.4</td>
-        <td>72.6</td>
-        <td>69.4</td>
-        <td>69.2</td>
-        <td>68.4</td>
-        <td>67.2 </td>
-    </tr>
-    <tr>
-        <td>BBH</td>
-        <td>64.9</td>
-        <td>76.3</td>
-        <td>65.2</td>
-        <td>67.8</td>
-        <td>70.3</td>
-        <td>68.6</td>
-        <td>70.2 </td>
-    </tr>
-    <tr>
-        <td>MT-Bench</td>
-        <td>8.41</td>
-        <td>8.35</td>
-        <td>7.88</td>
-        <td>8.28</td>
-        <td>8.17</td>
-        <td>8.60</td>
-        <td>8.41 </td>
-    </tr>
-    <tr>
-        <td>IFEVAL (Prompt Strict-Acc.)</td>
-        <td>51.0</td>
-        <td>64.5</td>
-        <td>71.9</td>
-        <td>71.5</td>
-        <td>58.8</td>
-        <td>49.4</td>
-        <td>68.4 </td>
-    </tr>
-    <tr>
-        <td colspan="15" align="left"><strong>中文能力</strong></td>
-    </tr>
-    <tr>
-        <td>CMMLU</td>
-        <td>80.9</td>
-        <td>71.5</td>
-        <td>59.5</td>
-        <td>55.8</td>
-        <td>54.5</td>
-        <td>46.9</td>
-        <td>73.3 </td>
-    </tr>
-    <tr>
-        <td>CEVAL</td>
-        <td>77.2</td>
-        <td>75.6</td>
-        <td>56.7</td>
-        <td>55.2</td>
-        <td>52.8</td>
-        <td>46.1</td>
-        <td>73.6 </td>
-    </tr>
-    <tr>
-        <td>AlignBench v1.1</td>
-        <td>7.10</td>
-        <td>6.61</td>
-        <td>7.10</td>
-        <td>5.68</td>
-        <td>5.82</td>
-        <td>5.73</td>
-        <td>6.74 </td>
-    </tr>
-    <tr>
-        <td>FollowBench-zh (SSR)</td>
-        <td>63.0</td>
-        <td>56.4</td>
-        <td>57.0</td>
-        <td>50.6</td>
-        <td>64.6</td>
-        <td>58.1</td>
-        <td>66.8 </td>
-    </tr>
-    <tr>
-        <td colspan="15" align="left"><strong>数学能力</strong></td>
-    </tr>
-    <tr>
-        <td>MATH</td>
-        <td>49.6</td>
-        <td>50.6</td>
-        <td>46.0</td>
-        <td>51.9</td>
-        <td>41.8</td>
-        <td>46.4</td>
-        <td>46.6 </td>
-    </tr>
-    <tr>
-        <td>GSM8K</td>
-        <td>82.3</td>
-        <td>79.6</td>
-        <td>79.7</td>
-        <td>84.5</td>
-        <td>76.4</td>
-        <td>82.7</td>
-        <td>81.1 </td>
-    </tr>
-    <tr>
-        <td>MathBench</td>
-        <td>63.4</td>
-        <td>59.4</td>
-        <td>45.8</td>
-        <td>54.3</td>
-        <td>48.9</td>
-        <td>54.9</td>
-        <td>65.6 </td>
-    </tr>
-    <tr>
-        <td colspan="15" align="left"><strong>代码能力</strong></td>
-    </tr>
-    <tr>
-        <td>HumanEval+</td>
-        <td>70.1</td>
-        <td>67.1</td>
-        <td>61.6</td>
-        <td>62.8</td>
-        <td>66.5</td>
-        <td>68.9</td>
-        <td>68.3 </td>
-    </tr>
-    <tr>
-        <td>MBPP+</td>
-        <td>57.1</td>
-        <td>62.2</td>
-        <td>64.3</td>
-        <td>55.3</td>
-        <td>71.4</td>
-        <td>55.8</td>
-        <td>63.2 </td>
-    </tr>
-    <tr>
-        <td>LiveCodeBench v3</td>
-        <td>22.2</td>
-        <td>20.2</td>
-        <td>19.2</td>
-        <td>20.4</td>
-        <td>24.0</td>
-        <td>19.6</td>
-        <td>22.6 </td>
-    </tr>
-    <tr>
-        <td colspan="15" align="left"><strong>工具调用能力</strong></td>
-    </tr>
-    <tr>
-        <td>BFCL v2</td>
-        <td>71.6</td>
-        <td>70.1</td>
-        <td>19.2</td>
-        <td>73.3</td>
-        <td>75.4</td>
-        <td>48.4</td>
-        <td>76.0 </td>
-    </tr>
-    <tr>
-        <td colspan="15" align="left"><strong>综合能力</strong></td>
-    </tr>
-    <tr>
-        <td>平均分</td>
-        <td>65.3</td>
-        <td>65.0</td>
-        <td>57.9</td>
-        <td>60.8</td>
-        <td>61.0</td>
-        <td>57.2</td>
-        <td><strong>66.3</strong></td>
-    </tr>
-</table>
-
-#### 工具调用能力
-
-我们在 [Berkeley Function Calling Leaderboard (BFCL)](https://gorilla.cs.berkeley.edu/leaderboard.html) 上测试了模型的工具调用能力，MiniCPM3-4B 在该榜单上的表现超越了多个 7B-9B 参数量的模型，优于 GPT-3.5-Turbo-0125。
-
-<table>
-    <tr>
-        <td>模型</td>
-        <td>总体准确率</td>
-        <td>AST Summary</td>
-        <td>Exec Summary</td>
-        <td>Irrelevance Detection</td>
-        <td>Relevance Detection </td>
-    </tr>
-    <tr>
-        <td>MiniCPM3-4B</td>
-        <td>76.03%</td>
-        <td>68.55%</td>
-        <td>85.54%</td>
-        <td>53.71%</td>
-        <td>90.24% </td>
-    </tr>
-    <tr>
-        <td>Llama3.1-8B-Instruct</td>
-        <td>73.28%</td>
-        <td>64.61%</td>
-        <td>86.48%</td>
-        <td>43.12%</td>
-        <td>85.37% </td>
-    </tr>
-    <tr>
-        <td>Qwen2-7B-Instruct</td>
-        <td>71.61%</td>
-        <td>65.71%</td>
-        <td>79.57%</td>
-        <td>44.70%</td>
-        <td>90.24% </td>
-    </tr>
-    <tr>
-        <td>GLM-4-9B-Chat</td>
-        <td>70.08%</td>
-        <td>60.69%</td>
-        <td>80.02%</td>
-        <td>55.02%</td>
-        <td>82.93% </td>
-    </tr>
-    <tr>
-        <td>Phi-3.5-mini-instruct</td>
-        <td>48.44%</td>
-        <td>38.89%</td>
-        <td>54.04%</td>
-        <td>46.78%</td>
-        <td>65.85% </td>
-    </tr>
-    <tr>
-        <td>Gemma2-9B-it</td>
-        <td>19.18%</td>
-        <td>5.41%</td>
-        <td>18.50%</td>
-        <td>88.88%</td>
-        <td>7.32%</td>
-    </tr>
-</table>
-
-#### 长文本能力
-
-在 32k 的上下文长度进行[大海捞针](https://github.com/gkamradt/LLMTest_NeedleInAHaystack)测试，结果如下图：
-
-![needle](assets/eval_needle.jpeg)
-
-同时我们提出[LLMxMapReduce](https://github.com/thunlp/LLMxMapReduce)，利用分治的策略，理论上可以处理无限长度的文本。我们在[InfiniteBench](https://github.com/OpenBMB/InfiniteBench)上测试了模型的长文本处理能力，在LLMxMapReduce框架的加持下，MiniCPM3-4B在这个榜单的平均得分能够超越 GPT-4、KimiChat 等标杆模型。
-
-|                               | Context length| Qwen2-70b | Kimi-Chat(2024.06) | GPT-4 (From InfiniteBench) | MiniCPM 3.0 x MR | Qwen2-70b x MR | Llama3-70bx MR |
-| ----------------------------- | ---------- | --------- | ------------------ | -------------------------- | --------------- | ------------ | ------------- |
-| Math.Find                     | 87.9k      | 59.71%    | 18.57%             | 60.00%                     | 83.43%          | 54.29%       | **91.43%**        |
-| Retrieve.KV                   | 89.9k      | 29.00%    | 69.20%             | 89.00%                     | 93.80%          | 98.80%       | **98.89%**        |
-| En.Dia                        | 103.6K     | 23.00%    | 23.00%             | 7.50%                      | 12.50%          | **46.50%**       | 17.50%        |
-| Code.Debug                    | 114.7k     | 45.43%    | 38.32%             | 54.31%                     | 25.63%          | 54.82%       | **62.94%**       |
-| Retrieve.Number               | 122.4k     | **100.00%**  | 97.45%             | **100.00%**                   | 99.32%          | **100.00%**     | 99.79%        |
-| Retrieve.PassKey              | 122.4k     | **100.00%**   | 99.32%             | **100.00%**                   | 98.81%          | **100.00%**     | **100.00%**      |
-| En.Sum                        | 171.5K     | 31.85%    | 29.94%             | 14.73%                     | 25.89%          | **32.39%**       | 30.63%        |
-| En.MC                         | 184.4k     | 81.66%    | 79.91%             | 68.12%                     | 66.38%          |**83.84%**      | 82.10%        |
-| En.QA        | 192.6k     | 21.97%    | 18.80%             | 22.44%                     | 28.39%          | 23.13%       | **34.70%**      |
-| Zh.QA        | 2068.6k    | 21.40%    | 19.84%             | **25.96%**                    | 23.66%          | 19.10%       | N/A           |
-| avg w/o Zh.QA | /          | 51.92%    | 52.96%             | 55.33%                     | 59.29%          | 64.98%       | **68.64%**        |
-| avg                           | /          | 48.86%    | 49.65%             | 52.39%                     | 55.55%          | **60.39%**       | N/A           |
-
-### 模型推理
-
-#### Huggingface
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-torch.manual_seed(0)
-
-path = 'openbmb/MiniCPM3-4B'
-tokenizer = AutoTokenizer.from_pretrained(path)
-model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='cuda', trust_remote_code=True)
-
-responds, history = model.chat(tokenizer, "请写一篇关于人工智能的文章，详细介绍人工智能的未来发展和隐患。", temperature=0.7, top_p=0.7)
-print(responds)
-```
-
-#### SGLang（推荐）
-* 安装
-
-参考 SGLang [官方仓库](ttps://github.com/sgl-project/sglang)，通过*源码*安装最新版本。
-
-* 启动推理服务
-```shell
-python -m sglang.launch_server --model openbmb/MiniCPM3-4B --trust-remote-code --port 30000 --chat-template chatml
-```
-
-* 使用示例
-```python
-from sglang import function, system, user, assistant, gen, set_default_backend, RuntimeEndpoint
-
-@function
-def multi_turn_question(s, question_1, question_2):
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=1024))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=1024))
-
-set_default_backend(RuntimeEndpoint("http://localhost:30000"))
-
-state = multi_turn_question.run(
-    question_1="介绍一下人工智能",
-    question_2="写一篇关于它的文章",
-)
-
-for m in state.messages():
-    print(m["role"], ":", m["content"])
-```
-
-#### vLLM
-* 安装 vllm
-  ```shell
-  pip install "vllm>=0.6.2"
-  ```
-* 推理
-  ```python
-  from transformers import AutoTokenizer
-  from vllm import LLM, SamplingParams
-
-  model_name = "openbmb/MiniCPM3-4B"
-  prompt = [{"role": "user", "content": "请写一篇关于人工智能的文章，详细介绍人工智能的未来发展和隐患。"}]
-
-  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-  input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
-
-  llm = LLM(model=model_name,
-      trust_remote_code=True,
-      tensor_parallel_size=1
-  )
-  sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024)
-
-  outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
-
-  print(outputs[0].outputs[0].text)
-  ```
-
-#### llama.cpp
-
-我们提供了 MiniCPM3 的 [GGUF 版本](https://huggingface.co/openbmb/MiniCPM3-4B-GGUF)，可以直接使用 llama.cpp 推理。
-
-* 安装 llama.cpp
-  ```shell
-    git clone https://github.com/ggerganov/llama.cpp
-    cd llama.cpp
-    make 
-  ```
-* 推理
-  ```shell
-  ./llama-cli -c 1024 -m minicpm3-4b-fp16.gguf -n 1024 --top-p 0.7 --temp 0.7 --prompt "<|im_start|>user\n请写一篇关于人工智能的文章，详细介绍人工智能的未来发展和隐患。<|im_end|>\n<|im_start|>assistant\n"
-  ```
-
-### 模型微调
-#### LLaMA-Factory
-目前模型微调支持 [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory)，使用方法参考 [LLaMA-Factory 微调](https://modelbest.feishu.cn/docx/Z7USdW4lloZzkZxQ14icJ3senjb?from=from_copylink)。
-
-### 进阶功能
-
-对于以下进阶功能，我们的样例代码中使用 [vLLM](#vllm) 进行推理。
-
-#### 工具调用
-
-我们提供了使用 MiniCPM3 调用工具的示例代码：
-
-```bash
-cd demo/minicpm3/function_call
-python function_call.py
-```
-
-如果你想启动一个能够调用工具的推理服务，使用以下代码：
-
-```bash
-cd demo/minicpm3/function_call
-pip install -r requirements.txt
-python openai_api_server.py \
-    --model openbmb/MiniCPM3-4B \
-    --served-model-name MiniCPM3-4B \
-    --chat-template chatml.jinja \
-    --dtype auto \
-    --api-key token-abc123 \
-    --tensor-parallel-size 1 \
-    --trust-remote-code
-```
-
-下面是一个调用搜索工具回答问题的演示：
-
-![function_call](./assets/function_call.gif)
-
-#### 代码解释器
-
-我们提供了一个 MiniCPM3 使用代码解释器的示例代码：
-
-```bash
-cd demo/minicpm3/code_interpreter
-pip install -r requirements.txt
-python code_interpreter.py openbmb/MiniCPM3-4B
-```
-
-下面是一个使用代码解释器生成二维码的演示：
-
-![code_interpreter](./assets/code_interpreter.gif)
-
-## MiniCPM 2.0
-
-<details>
-<summary>查看 MiniCPM 2.0 的详细信息</summary>
-
-MiniCPM 2.0 系列模型对 MiniCPM 进行了多个维度的升级，包括以下模型版本：
-- MiniCPM-2B-128k：将 MiniCPM-2B 的上下文长度从 4k 扩展至 128k，在 InfiniteBench 测试集上优于 ChatGLM3-6B-128k、Yi-6B-200k 等更大参数量的模型。
-- MiniCPM-MoE-8x2B：基于 MiniCPM-2B 进行 MoE 扩展，综合表现相比于 MiniCPM-2B 平均提高 4.5 个百分点。
-- MiniCPM-1B：相比于 MiniCPM-2B 成本下降 60%，综合表现仍然优于 LLaMA2-13B。
-- MiniCPM-S-1B：在保持下游任务性能无损的前提下，FFN 层实现了 87.89% 的平均稀疏度，将 FFN FLOPs 降低了 84%。结合 PowerInfer 推理框架，解码速度提升约 2.8 倍。
-
-### 评测结果
-
-#### MiniCPM-2B-128k 模型评测
-| Model                               | avg   | avg w/o code&math | passkey | number_string | kv_retrieval | longbook_choice_eng | longbook_qa_chn | longbook_qa_eng | longbook_sum_eng | longdialogue_qa_eng | math_calc | math_find | code_debug | code_run |
-|-------------------------------------|-------|-------------------|---------|---------------|--------------|---------------------|-----------------|-----------------|------------------|---------------------|-----------|-----------|------------|----------|
-| LWM-Text-128k                       | 24.45 | 33.62             | 100     | 97.8          | 0.6          | 28.82               | 15.93           | 14.31           | 9.99             | 1.5                 | 0         | 3.43      | 20.05      | 1        |
-| Yarn-Mistral-7b-128k                | 19.84 | 27.36             | 92.71   |               | 0            | 27.95               | 15.49           | 9.55            | 9.06             | 7.5                 | 0         | 17.14     | 0.76       | 1.25     |
-| Mistral-7B-Instruct-v0.2(ABF 1000w) | 27.75 | 36.9              | 100     | 78.98         | 3.6          | 37.12               | 11.74           | 17.37           | 21.12            | 9.5                 | 0         | 29.43     | 17.51      | 0        |
-| Yi-6B-200k                          | 22.15 | 32.54             | 100     | 94.92         | 0            | 36.68               | 15.07           | 9.2             | 0.92             | 3.5                 | 0         | 4.29      | 0.51       | 0.75     |
-| chatglm3-6b-128k                    | 25.58 | 36.57             | 89.93   | 99.66         | 5.2          | 46.29               | 10.7            | 8.38            | 25.91            | 6.5                 | 0         | 8         | 5.33       | 1        |
-| MiniCPM-2.4B-128k                   | 27.32 | 37.68             | 98.31   | 99.83         | 9            | 29.69               | 23.06           | 16.33           | 15.73            | 9.5                 | 0         | 4.29      | 22.08      | 0        |
-
-#### MiniCPM-MoE-8x2B 模型评测
-<div align="left">
-
-<table style="margin: 0px auto;">
-<thead>
-  <tr>
-    <th align="left">Model</th>
-    <th nowrap="nowrap" >BBH</th>
-    <th nowrap="nowrap" >MMLU</th>
-    <th nowrap="nowrap" >CEval</th>
-    <th nowrap="nowrap" >CMMLU</th>
-    <th nowrap="nowrap" >HumanEval</th>
-    <th nowrap="nowrap" >MBPP&dagger;</th>
-    <th nowrap="nowrap" >GSM8K</th>
-    <th nowrap="nowrap" >MATH</th
-  </tr>
-</thead>
-<tbody align="center">
-  <tr>
-    <td nowrap="nowrap" align="left">Llama2-34B*</td>
-    <td>44.1</td>
-    <td>62.6</td>
-    <td>-</td>
-    <td>-</td>
-    <td>22.6</td>
-    <td>33.0</td>
-    <td>42.2</td>
-    <td>6.24</td>
-  </tr>
-  <tr>
-    <td nowrap="nowrap" align="left">Mistral-7B-Instruct-v0.2</td>
-    <td>39.81</td>
-    <td>60.51</td>
-    <td>42.55</td>
-    <td>41.92</td>
-    <td>36.59</td>
-    <td>39.63</td>
-    <td>40.49</td>
-    <td>4.95</td>
-  </tr>
-  <tr>
-    <td nowrap="nowrap" align="left" >Gemma-7B*</td>
-    <td>55.1</td>
-    <td>64.3</td>
-    <td>-</td>
-    <td>-</td>
-    <td>32.3</td>
-    <td>44.4</td>
-    <td>46.4</td>
-    <td>24.3</td>
-  </tr>
-  <tr>
-    <td nowrap="nowrap" align="left" >Qwen1.5-7B*</td>
-    <td>40.2</td>
-    <td>61</td>
-    <td>74.1</td>
-    <td>73.1</td>
-    <td>36</td>
-    <td>37.4</td>
-    <td>62.5</td>
-    <td>20.3</td>
-  </tr>
-  <tr>
-    <td  nowrap="nowrap" align="left" >Deepseek-MoE(16B)*</td>
-    <td>-</td>
-    <td>45.0</td>
-    <td>40.6</td>
-    <td>42.5</td>
-    <td>26.8</td>
-    <td>39.2</td>
-    <td>18.8</td>
-    <td>4.3</td>
-  </tr>
-  <tr>
-    <td nowrap="nowrap" align="left" ><b>MiniCPM-2.4B</b></td>
-    <td>36.87</td>
-    <td>53.46</td>
-    <td>51.13</td>
-    <td>51.07</td>
-    <td>50.00</td>
-    <td>35.93</td>
-    <td>53.83</td>
-    <td>10.24</td>
-  </tr>
-  <tr>
-    <td nowrap="nowrap" align="left" ><b>MiniCPM-MoE-8x2B</b></td>
-    <td>39.22</td>
-    <td>58.90</td>
-    <td>58.11</td>
-    <td>58.80</td>
-    <td>55.49</td>
-    <td>41.68</td>
-    <td>61.56</td>
-    <td>10.52</td>
-  </tr>
-</tbody>
-</table>
-
-</div>
-
-注：* 表示结果取自技术报告。&dagger; 表示评测集为MBPP全集。
-
-#### MiniCPM-S-1B 评测结果
-
-- 代码生成：在 HumanEval（0-shot）和 MBPP（3-shot）上的平均 pass@1 得分。
-- 常识推理：在 PIQA、SIQA、HellaSwag、WinoGrande 和 COPA 上的平均 0-shot 准确率。
-- 阅读理解：在 BoolQ、LAMBADA 和 TyDi QA 上的平均 0-shot 准确率。
-
-其他测试集：我们报告在GSM8K（8-shot）、MMLU（5-shot）、BBH（3-shot）和 AGI-Eval（0-shot）上的平均准确率。
-
-|        Setting        | Average<br>Sparsity | Average<br>Performance | Code<br>Generation | Commonsense<br>Reasoning | Reading<br>Comprehension | GSM8K | MMLU  |  BBH  | AGI Eval |
-| :-------------------: | :----------------: | :----------------------: | :----------------------: | :---: | :---: | :---: | :---------: | :-----: | :-----------------: |
-| LLaMA2-7B    | - | 37.96 | 16.37 | 69.59 | 61.87 | 12.96 | 44.45 | 32.96 | 27.53 |
-| ReluLLaMA-7B | 66.98 | 37.62 | 15.85 | 69.64 | 70.54 |  5.84 | 38.64 | 35.07 | 27.73 |
-| **ProSparse-7B**\* | 88.11 | 38.31 | 19.47 | 66.29 | 63.33 | 12.74 | 45.21 | 33.59 | 27.55 |
-| **ProSparse-7B**   | **89.32** | **38.46** | 19.42 | 66.27 | 63.50 | 12.13 | 45.48 | 34.99 | 27.46 |
-| LLaMA2-13B | - | 44.06 | 20.19 | 72.58 | 71.55 | 22.21 | 54.69 | 37.89 | 29.33 |
-| ReluLLaMA-13B | 71.56 | 42.74 | 20.19 | 70.44 | 73.29 | 18.50 | 50.58 | 37.97 | 28.22 |
-| **ProSparse-13B**\* | 87.97 | **45.07** | 29.03 | 69.75 | 67.54 | 25.40 | 54.78 | 40.20 | 28.76 |
-| **ProSparse-13B**   | **88.80** | 44.90 | 28.42 | 69.76 | 66.91 | 26.31 | 54.35 | 39.90 | 28.67 |
-| MiniCPM-1B | - | 44.44 | 36.85 | 63.67 | 60.90 | 35.48 | 50.44 | 35.03 | 28.71 |
-| **MiniCPM-S-1B**\*  | 86.25 | **44.72** | 41.38 | 64.55 | 60.69 | 34.72 | 49.36 | 34.04 | 28.27 |
-| **MiniCPM-S-1B**    | **87.89** | **44.72** | 42.04 | 64.37 | 60.73 | 34.57 | 49.51 | 34.08 | 27.77 |
-
-注：
-1. ReluLLaMA-7B 和 ReluLLaMA-13B 的下载链接分别是 [7B](https://huggingface.co/SparseLLM/ReluLLaMA-7B) and [13B](https://huggingface.co/SparseLLM/ReluLLaMA-13B)。"ProSparse-7B\*"、"ProSparse-13B\*" 和 "MiniCPM-S-1B\*" 代表没有激活阈值偏移的 ProSparse 版本。
-2. 对于 PIQA、SIQA、HellaSwag、WinoGrande、COPA、BoolQ、LAMBADA、TyDi QA 和 AGI-Eval，我们根据各个选项的 PPL 来进行答案选择。对于 GSM8K、MMLU 和 BBH，我们直接生成答案。
-
-### 模型推理
-
-#### HuggingFace、vLLM推理
-
-参考 MiniCPM 1.0 中的[模型推理](#huggingface-推理)部分。
-
-#### Powerinfer 推理
-
-针对 MiniCPM-S-1B 模型，我们可以使用 Powerinfer 进行推理加速，使用方法如下：
-
-1. 保证cmake版本3.17以上，如果已经安装过，则跳过此步骤
-  ```bash
-    # 下载安装包
-    sudo wget https://cmake.org/files/v3.23/cmake-3.23.0.tar.gz
-    # 解压安装包
-    sudo tar -zxvf cmake-3.23.0.tar.gz
-    # 配置安装环境
-    sudo ./configure
-    sudo make -j8
-    # 编译安装
-    sudo make install
-    # 查看安装后版本
-    cmake --version
-    # 返回版本号则安装成功
-    #cmake version 3.23.0
-  ```
-2. 安装powerinfer：
-```bash
-  git clone https://github.com/SJTU-IPADS/PowerInfer
-  cd PowerInfer
-  pip install -r requirements.txt # install Python helpers' dependencies
-```
-3. cpu版本powerinfer编译,如果你的机器只有cpu，或者只想使用cpu进行推理，则运行以下命令：
-```bash
-  cmake -S . -B build
-  cmake --build build --config Release
-```
-4. gpu版本powerinfer编译,如果你的机器有gpu，则可以运行以下命令：
-```bash
-  cmake -S . -B build -DLLAMA_CUBLAS=ON
-  cmake --build build --config Release
-```
-5. 获取稀疏模型
-```bash
-git clone https://huggingface.co/openbmb/MiniCPM-S-1B-sft-gguf/tree/main
-#or
-git clone https://modelscope.cn/models/OpenBMB/MiniCPM-S-1B-sft-gguf
-```
-6. 模型推理：
-```bash
-cd PowerInfer
-# 以下是命令模版，output_token_count为最大输出tokens，thread_num 为线程数，prompt为输入prompt字符
-#./build/bin/main -m /PATH/TO/MODEL -n $output_token_count -t $thread_num -p $prompt
-# 以下是示例
-./build/bin/main -m /root/ld/ld_model_pretrain/1b-s-minicpm/MiniCPM-S-1B-sft.gguf -n 2048 -t 8 -p '<用户>hello,tell me a story please.<AI>'
-```
-</details>
-
-## MiniCPM 1.0
-
-<details>
-<summary>查看 MiniCPM 1.0 的详细信息</summary>
-
-MiniCPM-2B 语言模型有 24亿（2.4B）的非词嵌入参数量, 总计 2.7B 参数量。
-- 经过 SFT 后，MiniCPM-2B 在公开评测集上与 Mistral-7B 表现相近（中文、数学、代码能力更优），整体性能超越 Llama2-13B、MPT-30B、Falcon-40B 等模型。
-- 经过 DPO 后，MiniCPM-2B 在 MTBench 上也超越了 Llama2-70B-Chat、Vicuna-33B、Mistral-7B-Instruct-v0.1、Zephyr-7B-alpha 等众多代表性开源大模型。
-
-注意：为了保证在学术研究用途上模型的通用性，我们**未对 MiniCPM-2B 进行任何身份认同训练**。同时由于我们用 ShareGPT 开源语料作为部分训练数据，模型可能会输出类似 GPT 系列模型的身份认同信息。
-
-### 评测结果
-
-#### 评测设置
-
-* 由于大模型评测难以统一，且大量评测也没有公开的prompt和测试代码，对于具体评测方式，我们只能尽量做到适合各类模型。
-* 整体而言，我们测试时采用统一的prompt输入，并按照各模型对应的模板进行输入调整。
-* **评测脚本及prompt已开源在我们的Github仓库中，也欢迎更多开发者来不断改进我们的评测方式。**
-  * 文本评测部分，采用了我们的开源大模型能力评测框架[UltraEval](https://github.com/OpenBMB/UltraEval)。以下为开源模型复现流程：
-    * 安装UltraEval
-      ```shell
-      git clone https://github.com/OpenBMB/UltraEval.git
-      cd UltraEval
-      pip install -e .
-      ```
-    * 下载相关数据并解压处理
-      ```shell
-      wget -O RawData.zip "https://cloud.tsinghua.edu.cn/f/71b5232264ae4833a4d0/?dl=1"
-      unzip RawData.zip
-      python data_process.py
-      ```
-    * 执行评测脚本(提供了模板，可自定义)
-      ```shell
-      bash run_eval.sh
-      ```
-
-#### 部署模式
-
-* 因为MiniCPM采用Mup的结构，与现有模型在具体计算上有细微差别，我们是基于vllm=0.2.2版本进行了我们模型的实现。
-* **对于非MiniCPM模型，我们采用了vllm=0.2.7的最新版本进行推理。**
-
-#### 评测度量
-
-* 对于QA任务（选择题任务），我们选用两种方式进行测试：
-  * PPL：将选项作为题目生成的延续，并根据各个选项的PPL来进行答案选择；
-  * 第二种是直接生成答案选项。
-* 对于不同模型，这两种方式得到的结果差异较大。MiniCPM两种模式上的结果较为接近，而Mistral-7B-v0.1等模型在PPL上表现较好，直接生成上效果较差。
-* 在具体评测时，我们以两种评测方式得分的最高者为最终结果，以此保证对比的公平性(以下表格中*号表示采用PPL)。
-
-#### 文本模型评测
-
-**越级比较:**
-|模型|平均分|英文均分|中文均分|C-Eval|CMMLU|MMLU|HumanEval|MBPP|GSM8K|MATH|BBH|ARC-E|ARC-C|HellaSwag|
-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
-|Llama2-7B|35.40|36.21|31.765|32.42|31.11|44.32|12.2|27.17|13.57|1.8|33.23|75.25|42.75|75.62*|
-|Qwen-7B|49.46|47.19|59.655|58.96|60.35|57.65|17.07|42.15|41.24|5.34|37.75|83.42|64.76|75.32*|
-|Deepseek-7B|39.96|39.15|43.64|42.82|44.45|47.82|20.12|41.45|15.85|1.53|33.38|74.58*|42.15*|75.45*|
-|Mistral-7B|48.97|49.96|44.54|46.12|42.96|62.69|27.44|45.2|33.13|5.0|41.06|83.92|70.73|80.43*|
-|Llama2-13B|41.48|42.44|37.19|37.32|37.06|54.71|17.07|32.55|21.15|2.25|37.92|78.87*|58.19|79.23*|
-|MPT-30B|38.17|39.82|30.72|29.34|32.09|46.56|21.95|35.36|10.31|1.56|38.22|78.66*|46.08*|79.72*|
-|Falcon-40B|43.62|44.21|40.93|40.29|41.57|53.53|24.39|36.53|22.44|1.92|36.24|81.94*|57.68|83.26*|
-|MiniCPM-2B|52.33|52.6|51.1|51.13|51.07|53.46|50.00|47.31|53.83|10.24|36.87|85.44|68.00|68.25|
-
-**同级比较：**
-|模型|平均分|英文均分|中文均分|C-Eval|CMMLU|MMLU|HumanEval|MBPP|GSM8K|MATH|BBH|ARC-E|ARC-C|HellaSwag|
-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
-|TinyLlama-1.1B|25.36|25.55|24.525|25.02|24.03|24.3|6.71|19.91|2.27|0.74|28.78|60.77*|28.15*|58.33*|Qwen-1.8B|34.72|31.87|47.565|49.81|45.32|43.37|7.93|17.8|19.26|2.42|29.07|63.97*|43.69|59.28*|
-|Qwen-1.8B|34.72|31.87|47.57|49.81|45.32|43.37|7.93|17.80|19.26|2.42|29.07|63.97*|43.69|59.28*|
-|Gemini Nano-3B|-|-|-|-|-|-|-|27.2(report)|22.8(report)|-|42.4(report)|-|-|-|
-|StableLM-Zephyr-3B|43.46|46.31|30.62|30.34|30.89|45.9|35.37|31.85|52.54|12.49|37.68|73.78|55.38|71.87*|
-|Phi-2-2B|48.84|54.41|23.78|23.37|24.18|52.66|47.56|55.04|57.16|3.5|43.39|86.11|71.25|73.07*|
-|MiniCPM-2B|52.33|52.6|51.10|51.13|51.07|53.46|50.00|47.31|53.83|10.24|36.87|85.44|68.00|68.25|
-
-**Chat模型比较：**
-|模型|平均分|英文均分|中文均分|C-Eval|CMMLU|MMLU|HumanEval|MBPP|GSM8K|MATH|BBH|ARC-E|ARC-C|HellaSwag|
-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
-|ChatGLM2-6B|37.98|35.17|50.63|52.05|49.21|45.77|10.37|9.38|22.74|5.96|32.6|74.45|56.82|58.48*|
-|Mistral-7B-Instruct-v0.1|44.36|45.89|37.51|38.06|36.96|53.56|29.27|39.34|28.73|3.48|39.52|81.61|63.99|73.47*|
-|Mistral-7B-Instruct-v0.2|50.91|52.83|42.235|42.55|41.92|60.51|36.59|48.95|40.49|4.95|39.81|86.28|73.38|84.55*|
-|Qwen-7B-Chat|44.93|42.05|57.9|58.57|57.23|56.03|15.85|40.52|42.23|8.3|37.34|64.44*|39.25*|74.52*|
-|Yi-6B-Chat|50.46|45.89|70.995|70.88|71.11|62.95|14.02|28.34|36.54|3.88|37.43|84.89|70.39|74.6*|
-|Baichuan2-7B-Chat|44.68|42.74|53.39|53.28|53.5|53|21.34|32.32|25.25|6.32|37.46|79.63|60.15|69.23*|
-|Deepseek-7B-chat|49.34|49.56|48.335|46.95|49.72|51.67|40.85|48.48|48.52|4.26|35.7|76.85|63.05|76.68*|
-|Llama2-7B-Chat|38.16|39.17|33.59|34.54|32.64|47.64|14.02|27.4|21.15|2.08|35.54|74.28|54.78|75.65*|
-|MiniCPM-2B|52.33|52.6|51.10|51.13|51.07|53.46|50.00|47.31|53.83|10.24|36.87|85.44|68.00|68.25|
-
-**DPO后模型比较：**
-
-|模型|MT-bench|
-|---|---|
-|GPT-4-turbo|9.32|
-|GPT-3.5-turbo|8.39|
-|Mistral-8*7b-Instruct-v0.1|8.30|
-|Claude-2.1|8.18|
-|Zephyr-7B-beta|7.34|
-|**MiniCPM-2B**|**7.25**|
-|Vicuna-33B|7.12|
-|Zephyr-7B-alpha|6.88|
-|LLaMA-2-70B-chat|6.86|
-|Mistral-7B-Instruct-v0.1|6.84|
-|MPT-34B-instruct|6.39|
-
-
-### 快速上手 
-
-#### 在线体验
-
-- [Colab](https://colab.research.google.com/drive/1tJcfPyWGWA5HezO7GKLeyeIso0HyOc0l?usp=sharing)
-
-#### 基于Gradio的网页版Demo
-
-* 使用如下命令启动基于Gradio的网页版demo：
-
-```shell
-# generation powered by vllm
-python demo/minicpm/vllm_based_demo.py --model_path <vllmcpm_repo_path>
-# generation powered by huggingface
-python demo/minicpm/hf_based_demo.py --model_path <hf_repo_path>
-```
-
-#### HuggingFace 推理
-
-##### MiniCPM-2B
-
-安装`transformers>=4.36.0`以及`accelerate`后，运行以下代码：
-
-```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
-import torch
-torch.manual_seed(0)
-
-path = 'openbmb/MiniCPM-2B-dpo-bf16'
-tokenizer = AutoTokenizer.from_pretrained(path)
-model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map='cuda', trust_remote_code=True)
-
-responds, history = model.chat(tokenizer, "山东省最高的山是哪座山, 它比黄山高还是矮？差距多少？", temperature=0.5, top_p=0.8, repetition_penalty=1.02)
-print(responds)
-```
-
-##### MiniCPM-2B （Llama Format）
-
-我们将MiniCPM的模型权重转化成了Llama代码可以直接调用的[格式](https://huggingface.co/openbmb/MiniCPM-2B-sft-bf16-llama-format)，以便大家尝试:
-
-```python
-import torch
-from transformers import LlamaTokenizerFast, LlamaForCausalLM
-model_path = "openbmb/MiniCPM-2B-dpo-bf16-llama-format"
-tokenizer = LlamaTokenizerFast.from_pretrained(model_path)
-model = LlamaForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map='cuda', trust_remote_code=True)
-
-prompt="Now you act like a terminal situated within a beginner's C++ practice repository folder, please provide the output for the command: `ls -l`"
-input_ids = tokenizer.encode("<用户>{}<AI>".format(prompt), return_tensors='pt', add_special_tokens=True).cuda()
-responds = model.generate(input_ids, temperature=0.3, top_p=0.8, repetition_penalty=1.02, max_length=1024)
-responds = tokenizer.decode(responds[0], skip_special_tokens=True)
-print(responds)
-```
-
-#### vLLM 推理
-
-安装 [vLLM](https://github.com/vllm-project/vllm)。
-
-```shell
-pip install "vllm>=0.4.1"
-```
-
-具体推理代码见[这里](#vllm)。
-
-#### SGLang 推理
-
-安装 [SGLang](https://github.com/sgl-project/sglang)。
-
-* 首先需要启动一个服务:
-
-```bash
-python -m sglang.launch_server --model-path openbmb/MiniCPM-2B-dpo-fp16 --trust-remote-code --port 30000
-```
-
-* 下面是一个推理代码的样例:
-
-```python
-from sglang import function, gen, set_default_backend, RuntimeEndpoint
-
-@function
-def text_qa(s, question):
-    s += "<用户>" + question + "<AI>"
-    s += gen("answer", max_tokens=1024, temperature=0.7, top_p=0.7)
-
-set_default_backend(RuntimeEndpoint("http://localhost:30000"))
-
-state = text_qa.run(
-    question="What is the capital of China?",
-)
-
-print(state["answer"])
-```
-
-#### llama.cpp、Ollama、fastllm、mlx_lm推理
-MiniCPM支持[llama.cpp](https://github.com/ggerganov/llama.cpp/) 、[ollama](https://github.com/ollama/ollama)、[fastllm](https://github.com/ztxz16/fastllm)、[mlx_lm](https://github.com/ml-explore/mlx-examples)推理。感谢[@runfuture](https://github.com/runfuture)对llama.cpp和ollama的适配。
-
-请参考 MiniCPM 知识库中的[边端部署教程](https://modelbest.feishu.cn/wiki/VL5kw9DsEiRDmJkEyTUcydE0nie)。
-
-#### 模型量化
-
-请参考 MiniCPM 知识库中的[量化指南](https://modelbest.feishu.cn/wiki/EatbwdLuvitbbMk2X5wcX6h5n7c)。
-
-#### 模型微调
-
-- 一张 1080/2080 可实现高效参数微调：[代码](https://github.com/OpenBMB/MiniCPM/tree/main/finetune)
-- mlx 微调：[教程](https://modelbest.feishu.cn/wiki/AIU3wbREcirOm9kkvd7cxujFnMb#share-ASrDdvFAloHtycxfy85cLNhAnd3)
-- [xtuner](https://github.com/InternLM/xtuner): [MiniCPM高效率微调的不二选择](https://modelbest.feishu.cn/wiki/AIU3wbREcirOm9kkvd7cxujFnMb#AMdXdzz8qoadZhxU4EucELWznzd)
-- [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory.git)：[MiniCPM微调一键式解决方案](https://modelbest.feishu.cn/wiki/AIU3wbREcirOm9kkvd7cxujFnMb#BAWrdSjXuoFvX4xuIuzc8Amln5E)
-
-</details>
-
-
-## 开源协议
-
-#### 模型协议
-
-* 本仓库中代码依照 [Apache-2.0](https://github.com/OpenBMB/MiniCPM/blob/main/LICENSE) 协议开源
-* MiniCPM 模型权重的使用则需要遵循 [MiniCPM 模型商用许可协议](https://github.com/OpenBMB/MiniCPM/blob/main/MiniCPM%E6%A8%A1%E5%9E%8B%E5%95%86%E7%94%A8%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.md)。
-* MiniCPM 模型权重对学术研究完全开放，在填写[问卷](https://modelbest.feishu.cn/share/base/form/shrcnpV5ZT9EJ6xYjh3Kx0J6v8g)进行登记后亦允许免费商业使用。
-
-#### 声明
-
-* 作为一个语言模型，MiniCPM 通过学习大量的文本来生成内容，但它无法理解、表达个人观点或价值判断，它所输出的任何内容都不代表模型开发者的观点和立场。
-* 因此用户在使用 MiniCPM 生成的内容时，应自行负责对其进行评估和验证。
-* 如果由于使用 MiniCPM 开源模型而导致的任何问题，包括但不限于数据安全问题、公共舆论风险，或模型被误导、滥用、传播或不当利用所带来的任何风险和问题，我们将不承担任何责任。
-
-## 开发机构
-
-本项目由以下机构共同开发：
-
-- <img src="assets/modelbest.png" width="28px"> [面壁智能](https://modelbest.cn/)
-- <img src="assets/thunlp.png" width="28px"> [清华大学自然语言处理实验室](https://nlp.csai.tsinghua.edu.cn/)
-
-## 工作引用
-
-* 如果觉得MiniCPM有助于您的工作，请引用我们的[论文](https://arxiv.org/abs/2404.06395)
-
-```
-@article{hu2024minicpm,
-  title={MiniCPM: Unveiling the Potential of Small Language Models with Scalable Training Strategies},
-  author={Hu, Shengding and Tu, Yuge and Han, Xu and He, Chaoqun and Cui, Ganqu and Long, Xiang and Zheng, Zhi and Fang, Yewei and Huang, Yuxiang and Zhao, Weilin and others},
-  journal={arXiv preprint arXiv:2404.06395},
-  year={2024}
-}
-```
diff --git a/nlp/llm/minicpm/deepspeed/.gitignore b/nlp/llm/minicpm/pytorch/.gitignore
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/.gitignore
rename to nlp/llm/minicpm/pytorch/.gitignore
diff --git a/nlp/llm/minicpm/deepspeed/LICENSE b/nlp/llm/minicpm/pytorch/LICENSE
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/LICENSE
rename to nlp/llm/minicpm/pytorch/LICENSE
diff --git a/nlp/llm/minicpm/deepspeed/MiniCPM Model License.md b/nlp/llm/minicpm/pytorch/MiniCPM Model License.md
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/MiniCPM Model License.md
rename to nlp/llm/minicpm/pytorch/MiniCPM Model License.md
diff --git "a/nlp/llm/minicpm/deepspeed/MiniCPM\346\250\241\345\236\213\345\225\206\347\224\250\350\256\270\345\217\257\345\215\217\350\256\256.md" "b/nlp/llm/minicpm/pytorch/MiniCPM\346\250\241\345\236\213\345\225\206\347\224\250\350\256\270\345\217\257\345\215\217\350\256\256.md"
similarity index 100%
rename from "nlp/llm/minicpm/deepspeed/MiniCPM\346\250\241\345\236\213\345\225\206\347\224\250\350\256\270\345\217\257\345\215\217\350\256\256.md"
rename to "nlp/llm/minicpm/pytorch/MiniCPM\346\250\241\345\236\213\345\225\206\347\224\250\350\256\270\345\217\257\345\215\217\350\256\256.md"
diff --git a/nlp/llm/minicpm/deepspeed/README-en.md b/nlp/llm/minicpm/pytorch/README-en.md
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/README-en.md
rename to nlp/llm/minicpm/pytorch/README-en.md
diff --git a/nlp/llm/minicpm/deepspeed/finetune/README.md b/nlp/llm/minicpm/pytorch/README.md
similarity index 36%
rename from nlp/llm/minicpm/deepspeed/finetune/README.md
rename to nlp/llm/minicpm/pytorch/README.md
index ad88672c5e4cd69bf99f87c51e3dc3b36ae56616..b1214587f6a62a9a8ff53df5c7321fe44c384dcb 100644
--- a/nlp/llm/minicpm/deepspeed/finetune/README.md
+++ b/nlp/llm/minicpm/pytorch/README.md
@@ -1,24 +1,33 @@
-# MiniCPM
+# MiniCPM (DeepSpeed)
 
 ## Model description
 
-MiniCPM is a series of on-device large language models, with the core language model, MiniCPM-2B, possessing 2.4 billion non-embedding parameters. On comprehensive benchmarks, it performs similarly to Mistral-7B (with superior capabilities in Chinese, mathematics, and code), while exhibiting overall performance surpassing models like Llama2-13B, MPT-30B, and Falcon-40B. Furthermore, on the MT-Bench, currently the closest benchmark to user experience, MiniCPM-2B outperforms many representative open-source large language models, including Llama2-70B-Chat, Vicuna-33B, Mistral-7B-Instruct-v0.1, and Zephyr-7B-alpha.
+MiniCPM is a series of on-device large language models, with the core language model, MiniCPM-2B, possessing 2.4 billion
+non-embedding parameters. On comprehensive benchmarks, it performs similarly to Mistral-7B (with superior capabilities
+in Chinese, mathematics, and code), while exhibiting overall performance surpassing models like Llama2-13B, MPT-30B, and
+Falcon-40B. Furthermore, on the MT-Bench, currently the closest benchmark to user experience, MiniCPM-2B outperforms
+many representative open-source large language models, including Llama2-70B-Chat, Vicuna-33B, Mistral-7B-Instruct-v0.1,
+and Zephyr-7B-alpha.
 
 ## Step 1: Installation
+
 ```bash
-cd /model/to/minicpm/deepspeed
+cd /model/to/minicpm/pytorch
 pip3 install -r requirements.txt
 cd finetune
 pip3 install -r requirements.txt
 ```
 
-## Step 2 : Training
+## Step 2: Training
+
 ### SFT
+
 ```bash
-cd finetune
 bash sft_finetune.sh
 ```
+
 ### LoRA
+
 ```bash
 bash lora_finetune.sh
 ```
@@ -26,4 +35,3 @@ bash lora_finetune.sh
 ## Reference
 
 - [MiniCPM](https://github.com/OpenBMB/MiniCPM/tree/main)
-
diff --git a/nlp/llm/minicpm/deepspeed/assets/eval_needle.jpeg b/nlp/llm/minicpm/pytorch/assets/eval_needle.jpeg
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/assets/eval_needle.jpeg
rename to nlp/llm/minicpm/pytorch/assets/eval_needle.jpeg
diff --git a/nlp/llm/minicpm/deepspeed/assets/minicpm_logo.png b/nlp/llm/minicpm/pytorch/assets/minicpm_logo.png
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/assets/minicpm_logo.png
rename to nlp/llm/minicpm/pytorch/assets/minicpm_logo.png
diff --git a/nlp/llm/minicpm/deepspeed/assets/modelbest.png b/nlp/llm/minicpm/pytorch/assets/modelbest.png
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/assets/modelbest.png
rename to nlp/llm/minicpm/pytorch/assets/modelbest.png
diff --git a/nlp/llm/minicpm/deepspeed/assets/thunlp.png b/nlp/llm/minicpm/pytorch/assets/thunlp.png
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/assets/thunlp.png
rename to nlp/llm/minicpm/pytorch/assets/thunlp.png
diff --git a/nlp/llm/minicpm/deepspeed/assets/wechat.jpg b/nlp/llm/minicpm/pytorch/assets/wechat.jpg
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/assets/wechat.jpg
rename to nlp/llm/minicpm/pytorch/assets/wechat.jpg
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm/hf_based_demo.py b/nlp/llm/minicpm/pytorch/demo/minicpm/hf_based_demo.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm/hf_based_demo.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm/hf_based_demo.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm/langchain_demo.py b/nlp/llm/minicpm/pytorch/demo/minicpm/langchain_demo.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm/langchain_demo.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm/langchain_demo.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm/mlx_based_demo.py b/nlp/llm/minicpm/pytorch/demo/minicpm/mlx_based_demo.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm/mlx_based_demo.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm/mlx_based_demo.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm/vllm_based_demo.py b/nlp/llm/minicpm/pytorch/demo/minicpm/vllm_based_demo.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm/vllm_based_demo.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm/vllm_based_demo.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/code_interpreter/code_interpreter.py b/nlp/llm/minicpm/pytorch/demo/minicpm3/code_interpreter/code_interpreter.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/code_interpreter/code_interpreter.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/code_interpreter/code_interpreter.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/code_interpreter/requirements.txt b/nlp/llm/minicpm/pytorch/demo/minicpm3/code_interpreter/requirements.txt
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/code_interpreter/requirements.txt
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/code_interpreter/requirements.txt
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/README.md b/nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/README.md
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/README.md
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/README.md
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/function_calling.py b/nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/function_calling.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/function_calling.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/function_calling.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/minicpm_chat_template_with_tool.jinja b/nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/minicpm_chat_template_with_tool.jinja
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/minicpm_chat_template_with_tool.jinja
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/minicpm_chat_template_with_tool.jinja
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/minicpm_tool_parser.py b/nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/minicpm_tool_parser.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/minicpm_tool_parser.py
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/minicpm_tool_parser.py
diff --git a/nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/requirements.txt b/nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/requirements.txt
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/demo/minicpm3/function_call/requirements.txt
rename to nlp/llm/minicpm/pytorch/demo/minicpm3/function_call/requirements.txt
diff --git a/nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero2.json b/nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero2.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero2.json
rename to nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero2.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero2_offload.json b/nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero2_offload.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero2_offload.json
rename to nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero2_offload.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero3.json b/nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero3.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero3.json
rename to nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero3.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero3_offload.json b/nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero3_offload.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/configs/ds_config_zero3_offload.json
rename to nlp/llm/minicpm/pytorch/finetune/configs/ds_config_zero3_offload.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/data/kto_en_demo.json b/nlp/llm/minicpm/pytorch/finetune/data/kto_en_demo.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/data/kto_en_demo.json
rename to nlp/llm/minicpm/pytorch/finetune/data/kto_en_demo.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/finetune.py b/nlp/llm/minicpm/pytorch/finetune/finetune.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/finetune.py
rename to nlp/llm/minicpm/pytorch/finetune/finetune.py
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/README.md b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/README.md
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/README.md
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/README.md
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/llama_factory_data/dpo_en_demo.json b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/llama_factory_data/dpo_en_demo.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/llama_factory_data/dpo_en_demo.json
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/llama_factory_data/dpo_en_demo.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/llama_factory_data/kto_en_demo.json b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/llama_factory_data/kto_en_demo.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/llama_factory_data/kto_en_demo.json
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/llama_factory_data/kto_en_demo.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/llama_factory_data/sft_zh_demo.json b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/llama_factory_data/sft_zh_demo.json
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/llama_factory_data/sft_zh_demo.json
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/llama_factory_data/sft_zh_demo.json
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/minicpm_dpo.yaml b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/minicpm_dpo.yaml
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/minicpm_dpo.yaml
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/minicpm_dpo.yaml
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/minicpm_kto.yaml b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/minicpm_kto.yaml
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/minicpm_kto.yaml
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/minicpm_kto.yaml
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/minicpm_sft.yaml b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/minicpm_sft.yaml
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/minicpm_sft.yaml
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/minicpm_sft.yaml
diff --git a/nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/single_node.sh b/nlp/llm/minicpm/pytorch/finetune/llama_factory_example/single_node.sh
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/llama_factory_example/single_node.sh
rename to nlp/llm/minicpm/pytorch/finetune/llama_factory_example/single_node.sh
diff --git a/nlp/llm/minicpm/deepspeed/finetune/lora_finetune.sh b/nlp/llm/minicpm/pytorch/finetune/lora_finetune.sh
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/lora_finetune.sh
rename to nlp/llm/minicpm/pytorch/finetune/lora_finetune.sh
diff --git a/nlp/llm/minicpm/deepspeed/finetune/lora_finetune_ocnli.sh b/nlp/llm/minicpm/pytorch/finetune/lora_finetune_ocnli.sh
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/lora_finetune_ocnli.sh
rename to nlp/llm/minicpm/pytorch/finetune/lora_finetune_ocnli.sh
diff --git a/nlp/llm/minicpm/deepspeed/finetune/mlx_finetune.py b/nlp/llm/minicpm/pytorch/finetune/mlx_finetune.py
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/mlx_finetune.py
rename to nlp/llm/minicpm/pytorch/finetune/mlx_finetune.py
diff --git a/nlp/llm/minicpm/deepspeed/finetune/requirements.txt b/nlp/llm/minicpm/pytorch/finetune/requirements.txt
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/requirements.txt
rename to nlp/llm/minicpm/pytorch/finetune/requirements.txt
diff --git a/nlp/llm/minicpm/deepspeed/finetune/requirements_mlx.txt b/nlp/llm/minicpm/pytorch/finetune/requirements_mlx.txt
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/requirements_mlx.txt
rename to nlp/llm/minicpm/pytorch/finetune/requirements_mlx.txt
diff --git a/nlp/llm/minicpm/deepspeed/finetune/sft_finetune.sh b/nlp/llm/minicpm/pytorch/finetune/sft_finetune.sh
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/finetune/sft_finetune.sh
rename to nlp/llm/minicpm/pytorch/finetune/sft_finetune.sh
diff --git a/nlp/llm/minicpm/deepspeed/requirements.txt b/nlp/llm/minicpm/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/minicpm/deepspeed/requirements.txt
rename to nlp/llm/minicpm/pytorch/requirements.txt
diff --git a/nlp/llm/mixtral/megatron-lm/README.md b/nlp/llm/mixtral/pytorch/README.md
similarity index 100%
rename from nlp/llm/mixtral/megatron-lm/README.md
rename to nlp/llm/mixtral/pytorch/README.md
diff --git a/nlp/llm/qwen-7b/firefly/README.md b/nlp/llm/qwen-7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/README.md
rename to nlp/llm/qwen-7b/pytorch/README.md
diff --git a/nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z2_config.json b/nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z2_config.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z2_config.json
rename to nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z2_config.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z2_config_bf16.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z2_config_bf16.json
rename to nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z2_config_bf16.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z2_config_offload.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z2_config_offload.json
rename to nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z2_config_offload.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z3_config.json b/nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z3_config.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z3_config.json
rename to nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z3_config.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z3_config_bf16.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z3_config_bf16.json
rename to nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z3_config_bf16.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z3_config_offload.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/ds_config/ds_z3_config_offload.json
rename to nlp/llm/qwen-7b/pytorch/configs/ds_config/ds_z3_config_offload.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/qwen-7b-sft-full.json b/nlp/llm/qwen-7b/pytorch/configs/qwen-7b-sft-full.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/qwen-7b-sft-full.json
rename to nlp/llm/qwen-7b/pytorch/configs/qwen-7b-sft-full.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/qwen-7b-sft-lora.json b/nlp/llm/qwen-7b/pytorch/configs/qwen-7b-sft-lora.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/qwen-7b-sft-lora.json
rename to nlp/llm/qwen-7b/pytorch/configs/qwen-7b-sft-lora.json
diff --git a/nlp/llm/qwen-7b/firefly/configs/qwen-7b-sft-ptuning_v2.json b/nlp/llm/qwen-7b/pytorch/configs/qwen-7b-sft-ptuning_v2.json
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/configs/qwen-7b-sft-ptuning_v2.json
rename to nlp/llm/qwen-7b/pytorch/configs/qwen-7b-sft-ptuning_v2.json
diff --git a/nlp/llm/qwen-7b/firefly/get_Qwen-7B.py b/nlp/llm/qwen-7b/pytorch/get_Qwen-7B.py
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/get_Qwen-7B.py
rename to nlp/llm/qwen-7b/pytorch/get_Qwen-7B.py
diff --git a/nlp/llm/qwen-7b/firefly/main.py b/nlp/llm/qwen-7b/pytorch/main.py
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/main.py
rename to nlp/llm/qwen-7b/pytorch/main.py
diff --git a/nlp/llm/qwen-7b/firefly/requirements.txt b/nlp/llm/qwen-7b/pytorch/requirements.txt
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/requirements.txt
rename to nlp/llm/qwen-7b/pytorch/requirements.txt
diff --git a/nlp/llm/qwen-7b/firefly/train.sh b/nlp/llm/qwen-7b/pytorch/train.sh
similarity index 100%
rename from nlp/llm/qwen-7b/firefly/train.sh
rename to nlp/llm/qwen-7b/pytorch/train.sh
diff --git a/nlp/llm/qwen1.5-14b/firefly/README.md b/nlp/llm/qwen1.5-14b/pytorch/README.md
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/README.md
rename to nlp/llm/qwen1.5-14b/pytorch/README.md
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z2_config.json b/nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z2_config.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z2_config.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z2_config.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z2_config_bf16.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z2_config_bf16.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z2_config_bf16.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z2_config_offload.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z2_config_offload.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z2_config_offload.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z3_config.json b/nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z3_config.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z3_config.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z3_config.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z3_config_bf16.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z3_config_bf16.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z3_config_bf16.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z3_config_offload.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/ds_config/ds_z3_config_offload.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/ds_config/ds_z3_config_offload.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/configs/qwen-14b-sft-full.json b/nlp/llm/qwen1.5-14b/pytorch/configs/qwen-14b-sft-full.json
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/configs/qwen-14b-sft-full.json
rename to nlp/llm/qwen1.5-14b/pytorch/configs/qwen-14b-sft-full.json
diff --git a/nlp/llm/qwen1.5-14b/firefly/get_Qwen1.5.py b/nlp/llm/qwen1.5-14b/pytorch/get_Qwen1.5.py
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/get_Qwen1.5.py
rename to nlp/llm/qwen1.5-14b/pytorch/get_Qwen1.5.py
diff --git a/nlp/llm/qwen1.5-14b/firefly/main.py b/nlp/llm/qwen1.5-14b/pytorch/main.py
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/main.py
rename to nlp/llm/qwen1.5-14b/pytorch/main.py
diff --git a/nlp/llm/qwen1.5-14b/firefly/train.sh b/nlp/llm/qwen1.5-14b/pytorch/train.sh
similarity index 100%
rename from nlp/llm/qwen1.5-14b/firefly/train.sh
rename to nlp/llm/qwen1.5-14b/pytorch/train.sh
diff --git a/nlp/llm/qwen1.5-7b/firefly/README.md b/nlp/llm/qwen1.5-7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/README.md
rename to nlp/llm/qwen1.5-7b/pytorch/README.md
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z2_config.json b/nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z2_config.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z2_config.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z2_config.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z2_config_bf16.json b/nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z2_config_bf16.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z2_config_bf16.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z2_config_bf16.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z2_config_offload.json b/nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z2_config_offload.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z2_config_offload.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z2_config_offload.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z3_config.json b/nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z3_config.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z3_config.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z3_config.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z3_config_bf16.json b/nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z3_config_bf16.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z3_config_bf16.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z3_config_bf16.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z3_config_offload.json b/nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z3_config_offload.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/ds_config/ds_z3_config_offload.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/ds_config/ds_z3_config_offload.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-full.json b/nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-full.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-full.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-full.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-lora.json b/nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-lora.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-lora.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-lora.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-ptuning.json b/nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-ptuning.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-ptuning.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-ptuning.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-qlora.json b/nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-qlora.json
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/configs/qwen-7b-sft-qlora.json
rename to nlp/llm/qwen1.5-7b/pytorch/configs/qwen-7b-sft-qlora.json
diff --git a/nlp/llm/qwen1.5-7b/firefly/get_Qwen1.5.py b/nlp/llm/qwen1.5-7b/pytorch/get_Qwen1.5.py
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/get_Qwen1.5.py
rename to nlp/llm/qwen1.5-7b/pytorch/get_Qwen1.5.py
diff --git a/nlp/llm/qwen1.5-7b/firefly/main.py b/nlp/llm/qwen1.5-7b/pytorch/main.py
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/main.py
rename to nlp/llm/qwen1.5-7b/pytorch/main.py
diff --git a/nlp/llm/qwen1.5-7b/firefly/train.sh b/nlp/llm/qwen1.5-7b/pytorch/train.sh
similarity index 100%
rename from nlp/llm/qwen1.5-7b/firefly/train.sh
rename to nlp/llm/qwen1.5-7b/pytorch/train.sh
diff --git a/nlp/llm/qwen2.5-7b/llama-factory/README.md b/nlp/llm/qwen2.5-7b/pytorch/README.md
similarity index 100%
rename from nlp/llm/qwen2.5-7b/llama-factory/README.md
rename to nlp/llm/qwen2.5-7b/pytorch/README.md
diff --git a/nlp/llm/qwen2.5-7b/llama-factory/qwen2_5-7b_full_sft.yaml b/nlp/llm/qwen2.5-7b/pytorch/qwen2_5-7b_full_sft.yaml
similarity index 100%
rename from nlp/llm/qwen2.5-7b/llama-factory/qwen2_5-7b_full_sft.yaml
rename to nlp/llm/qwen2.5-7b/pytorch/qwen2_5-7b_full_sft.yaml
diff --git a/nlp/llm/qwen2.5-7b/llama-factory/qwen2_5-7b_lora_sft.yaml b/nlp/llm/qwen2.5-7b/pytorch/qwen2_5-7b_lora_sft.yaml
similarity index 100%
rename from nlp/llm/qwen2.5-7b/llama-factory/qwen2_5-7b_lora_sft.yaml
rename to nlp/llm/qwen2.5-7b/pytorch/qwen2_5-7b_lora_sft.yaml
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/configs/default_config.yaml b/toolbox/diffusers/configs/default_config.yaml
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/configs/default_config.yaml
rename to toolbox/diffusers/configs/default_config.yaml
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/configs/single_config.yaml b/toolbox/diffusers/configs/single_config.yaml
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/configs/single_config.yaml
rename to toolbox/diffusers/configs/single_config.yaml
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/configs/zero2_config.yaml b/toolbox/diffusers/configs/zero2_config.yaml
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/configs/zero2_config.yaml
rename to toolbox/diffusers/configs/zero2_config.yaml
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/requirements.txt b/toolbox/diffusers/requirements.txt
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/requirements.txt
rename to toolbox/diffusers/requirements.txt
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/run_sd3_dreambooth.sh b/toolbox/diffusers/run_sd3_dreambooth.sh
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/run_sd3_dreambooth.sh
rename to toolbox/diffusers/run_sd3_dreambooth.sh
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/run_sd_1.5_multi.sh b/toolbox/diffusers/run_sd_1.5_multi.sh
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/run_sd_1.5_multi.sh
rename to toolbox/diffusers/run_sd_1.5_multi.sh
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/run_sd_1.5_single.sh b/toolbox/diffusers/run_sd_1.5_single.sh
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/run_sd_1.5_single.sh
rename to toolbox/diffusers/run_sd_1.5_single.sh
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/run_sd_2.1_multi.sh b/toolbox/diffusers/run_sd_2.1_multi.sh
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/run_sd_2.1_multi.sh
rename to toolbox/diffusers/run_sd_2.1_multi.sh
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/run_sd_2.1_single.sh b/toolbox/diffusers/run_sd_2.1_single.sh
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/run_sd_2.1_single.sh
rename to toolbox/diffusers/run_sd_2.1_single.sh
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/run_sd_xl.sh b/toolbox/diffusers/run_sd_xl.sh
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/run_sd_xl.sh
rename to toolbox/diffusers/run_sd_xl.sh
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/train_dreambooth_sd3.py b/toolbox/diffusers/train_dreambooth_sd3.py
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/train_dreambooth_sd3.py
rename to toolbox/diffusers/train_dreambooth_sd3.py
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/train_text_to_image.py b/toolbox/diffusers/train_text_to_image.py
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/train_text_to_image.py
rename to toolbox/diffusers/train_text_to_image.py
diff --git a/multimodal/diffusion/stable-diffusion/diffusers/train_text_to_image_sdxl.py b/toolbox/diffusers/train_text_to_image_sdxl.py
similarity index 100%
rename from multimodal/diffusion/stable-diffusion/diffusers/train_text_to_image_sdxl.py
rename to toolbox/diffusers/train_text_to_image_sdxl.py