From 1018eb6ed63412cefbc5e65ecd91ddb2c5e03a9e Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Wed, 11 Dec 2024 03:48:09 +0000 Subject: [PATCH 1/2] add 24.12 models to readme model list Signed-off-by: mingjiang.li --- README.md | 73 ++++-------------- .../kan/pytorch}/README.md | 0 .../kan/pytorch}/__init__.py | 0 .../.ipynb_checkpoints/KANLayer-checkpoint.py | 0 .../.ipynb_checkpoints/LBFGS-checkpoint.py | 0 .../kan/.ipynb_checkpoints/MLP-checkpoint.py | 0 .../.ipynb_checkpoints/MultKAN-checkpoint.py | 0 .../Symbolic_KANLayer-checkpoint.py | 0 .../.ipynb_checkpoints/__init__-checkpoint.py | 0 .../.ipynb_checkpoints/compiler-checkpoint.py | 0 .../experiment-checkpoint.py | 0 .../.ipynb_checkpoints/feynman-checkpoint.py | 0 .../hypothesis-checkpoint.py | 0 .../.ipynb_checkpoints/spline-checkpoint.py | 0 .../.ipynb_checkpoints/utils-checkpoint.py | 0 .../kan/pytorch}/kan/KANLayer.py | 0 .../kan/pytorch}/kan/LBFGS.py | 0 .../kan/pytorch}/kan/MLP.py | 0 .../kan/pytorch}/kan/MultKAN.py | 0 .../kan/pytorch}/kan/Symbolic_KANLayer.py | 0 .../kan/pytorch}/kan/__init__.py | 0 .../pytorch}/kan/assets/img/mult_symbol.png | Bin .../pytorch}/kan/assets/img/sum_symbol.png | Bin .../kan/pytorch}/kan/compiler.py | 0 .../kan/pytorch}/kan/experiment.py | 0 .../kan/experiments/experiment1.ipynb | 0 .../kan/pytorch}/kan/feynman.py | 0 .../kan/pytorch}/kan/hypothesis.py | 0 .../kan/pytorch}/kan/spline.py | 0 .../kan/pytorch}/kan/utils.py | 0 .../kan/pytorch}/requirements.txt | 0 .../kan/pytorch}/run_train.sh | 0 .../kan/pytorch}/train_kan.py | 0 .../{ColossalAI => colossalai}/README.md | 12 ++- .../{ColossalAI => colossalai}/benchmark.py | 0 .../{ColossalAI => colossalai}/data_utils.py | 0 .../deepseek_moe_7b_pretrain.sh | 0 .../{ColossalAI => colossalai}/model_utils.py | 0 .../performance_evaluator.py | 0 .../applications/Colossal-LLaMA/README.md | 41 +++++----- .../Colossal-LLaMA/colossal_llama/__init__.py | 0 .../colossal_llama/dataset/__init__.py | 0 .../colossal_llama/dataset/conversation.py | 0 .../colossal_llama/dataset/dummy_dataset.py | 0 .../colossal_llama/dataset/loader.py | 0 .../dataset/spliced_and_tokenized_dataset.py | 0 .../colossal_llama/model/init_model.py | 0 .../tokenizer/init_tokenizer.py | 0 .../colossal_llama/utils/__init__.py | 0 .../colossal_llama/utils/ckpt_io.py | 0 .../colossal_llama/utils/froze.py | 0 .../colossal_llama/utils/neftune_patch.py | 0 .../colossal_llama/utils/stream_chat_patch.py | 0 .../colossal_llama/utils/utils.py | 0 .../Colossal-LLaMA/dataset/convert_data.py | 0 .../dataset/prepare_pretrain_dataset.py | 0 .../dataset/prepare_sft_dataset.py | 0 .../Colossal-LLaMA/get_Meta_LLaMA_8B.py | 0 .../Colossal-LLaMA/performance_evaluator.py | 0 .../Colossal-LLaMA/prepare_sft_dataset.sh | 0 .../Colossal-LLaMA/requirements.txt | 0 .../Colossal-LLaMA/run_llama3_8b_sft_3d.sh | 0 .../applications/Colossal-LLaMA/setup.py | 0 .../Colossal-LLaMA/train.example.sh | 0 .../applications/Colossal-LLaMA/train.py | 0 .../Colossal-LLaMA/train_sft.example.sh | 0 .../applications/Colossal-LLaMA/version.txt | 0 .../megatron-deepspeed/README.md | 0 68 files changed, 43 insertions(+), 83 deletions(-) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/README.md (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/__init__.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/KANLayer-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/LBFGS-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/MLP-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/MultKAN-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/__init__-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/compiler-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/experiment-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/feynman-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/hypothesis-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/spline-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/utils-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/KANLayer.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/LBFGS.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/MLP.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/MultKAN.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/Symbolic_KANLayer.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/__init__.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/assets/img/mult_symbol.png (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/assets/img/sum_symbol.png (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/compiler.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/experiment.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/experiments/experiment1.ipynb (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/feynman.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/hypothesis.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/spline.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/utils.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/requirements.txt (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/run_train.sh (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/train_kan.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/README.md (80%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/benchmark.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/data_utils.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/deepseek_moe_7b_pretrain.sh (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/model_utils.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/performance_evaluator.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/README.md (45%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/__init__.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/model/init_model.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/froze.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/utils.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/dataset/convert_data.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/performance_evaluator.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/prepare_sft_dataset.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/requirements.txt (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/setup.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/train.example.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/train.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/train_sft.example.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/version.txt (100%) rename nlp/llm/{llama3-8b => llama3_8b}/megatron-deepspeed/README.md (100%) diff --git a/README.md b/README.md index 58a4fe20..61bc70bb 100644 --- a/README.md +++ b/README.md @@ -4,62 +4,6 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 ## 模型列表 -- Computer Vision - - - [Classification](#classification) - - [Face Detection](#face-detection) - - [Face Recognition](#face-recognition) - - [Instance Segmentation](#instance-segmentation) - - [Knowledge Distillation](#knowledge-distillation) - - [Network Pruning](#network-pruning) - - [Object Detection](#object-detection) - - [3D Object Detection](#3d-object-detection) - - [OCR](#ocr) - - [Point Cloud](#point-cloud) - - [Pose Estimation](#pose-estimation) - - [Self-Supervised Learning](#self-supervised-learning) - - [Semantic Segmentation](#semantic-segmentation) - - [Super Resolution](#super-resolution) - - [Tracking](#tracking) - - [Traffic Forecast](#traffic-forecast) - -- Graph Neural Network (GNN) - - - [Graph Attention](#graph-attention) - - [Node Classification](#node-classification) - - [Text Classification](#text-classification) - -- High Performance Computing (HPC) - - - [Molecular Dynamics](#molecular-dynamics) - -- [Multimodal](#multimodal) - -- Natural Language Processing (NLP) - - - [Cloze Test](#cloze-test) - - [Dialogue Generation](#dialogue-generation) - - [Language Modeling](#language-modeling) - - [Large Language Model (LLM)](#large-language-model-llm) - - [Text Correction](#text-correction) - - [Translation](#translation) - -- Recommendation - - - [Collaborative Filtering](#collaborative-filtering) - - [Click Through Rate](#click-through-rate) - -- [Reinforcement Learning](#reinforcement-learning) - -- Speech - - - [Speech Recognition](#speech-recognition) - - [Speech Synthesis](#speech-synthesis) - -- [3D Reconstruction](#3d-reconstruction) - --------- - ### Computer Vision #### Classification @@ -197,6 +141,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [Faster R-CNN](cv/detection/fasterrcnn/pytorch/README.md) | PyTorch | COCO [FCOS](cv/detection/fcos/paddlepaddle/README.md) | PaddlePaddle | COCO [FCOS](cv/detection/fcos/pytorch/README.md) | PyTorch | COCO +[Mamba-YOLO](cv/detection/mamba_yolo/pytorch/README.md) | PyTorch | COCO [Mask R-CNN](cv/detection/maskrcnn/pytorch/README.md) | PyTorch | COCO [Mask R-CNN](cv/detection/maskrcnn/paddlepaddle/README.md) | PaddlePaddle | COCO [OC_SORT](cv/detection/oc_sort/paddlepaddle/README.md) | PaddlePaddle | MOT17 @@ -388,6 +333,14 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 -------- | ------ | ---- [Water/se_e2_a](hpc/molecular_dynamics/water_se_e2_a/tensorflow/README.md) | TensorFlow (DeePMD-kit) | data_water +### Methodology + +#### Kolmogorov-Arnold Networks + +模型名称 | 框架 | 数据集 +-------- | ------ | ---- +[KAN](methodology/kolmogorov_arnold_networks/kan/pytorch/README.md) | PyTorch | - + ### Multimodal 模型名称 | 框架 | 数据集 @@ -396,7 +349,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [CLIP](multimodal/Language-Image_Pre-Training/clip/pytorch/README.md) | PyTorch | CIFAR100 [ControlNet](multimodal/diffusion/ControlNet/README.md) | PyTorch | Fill50K [DDPM](multimodal/diffusion/ddpm/README.md) | PyTorch | CIFAR-10 -[LLaVA](multimodal/llava/pytorch/README.md) | PyTorch | LLaVA-Pretrain +[LLaVA 1.5](multimodal/llava/pytorch/README.md) | PyTorch | LLaVA-Pretrain [L-Verse](multimodal/Language-Image_Pre-Training/L-Verse/pytorch/README.md) | PyTorch | ImageNet [Stable Diffusion 1.4](multimodal/diffusion/stable-diffusion/training/README.md) | PyTorch | pokemon-images [Stable Diffusion 1.5](multimodal/diffusion/stable-diffusion/sd_1.5/README.md) | PyTorch | pokemon-images @@ -445,6 +398,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [ChatGLM-6B](nlp/llm/chatglm-6b/deepspeed/README.md) | PyTorch (DeepSpeed) | ADGEN & chatglm-6b [ChatGLM2-6B SFT](nlp/llm/ChatGLM2-6b-sft/README.md) | PyTorch (DeepSpeed) | ADGEN & chatglm2-6b [ChatGLM3-6B](nlp/llm/chatglm3-6b/deepspeed/finetune_demo/README.md) | PyTorch (DeepSpeed) | ADGEN & chatglm3-6b +[DeepSeekMoE 7B](nlp/llm/deepseek_moe_7b/colossalai/README.md) | PyTorch (ColossalAI) | deepseek-moe-16b-base [Llama-7B](nlp/llm/llama-7b/colossalai/README.md) | PyTorch (Colossal-AI) | llama-7b-hf [Llama2-7B](nlp/llm/llama2-7b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus [Llama2-7B Reward Model Finetuning](nlp/llm/llama2-7b_reward_sft/deepspeed/README.md) | PyTorch (DeepSpeed) | Dahoas/rm-static @@ -452,11 +406,12 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [Llama2-7B SFT](nlp/llm/llama2-7b_sft/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | gpt_small-117M [Llama2-13B](nlp/llm/llama2-13b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus [Llama2-34B](nlp/llm/llama2-34b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus -[Llama3-8B](nlp/llm/llama3-8b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus +[Llama3-8B](nlp/llm/llama3_8b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus +[Llama3-8B SFT](nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md) | PyTorch (ColossalAI) | school_math_0.25M [QWen-7B](nlp/llm/qwen-7b/firefly/README.md) | PyTorch (Firefly) | qwen-7b [QWen1.5-7B](nlp/llm/qwen1.5-7b/firefly/README.md) | PyTorch (Firefly) | school_math [QWen1.5-14B](nlp/llm/qwen1.5-14b/firefly/README.md) | PyTorch (Firefly) | school_math -[QWen2.5-7B](nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md) | PyTorch (LLaMA-Factory) | qwen2.5-7b +[Qwen2.5-7B SFT](nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md) | PyTorch (LLaMA-Factory) | qwen2.5-7b #### Text Correction diff --git a/dl/kan/README.md b/methodology/kolmogorov_arnold_networks/kan/pytorch/README.md similarity index 100% rename from dl/kan/README.md rename to methodology/kolmogorov_arnold_networks/kan/pytorch/README.md diff --git a/dl/kan/__init__.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/__init__.py similarity index 100% rename from dl/kan/__init__.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/__init__.py diff --git a/dl/kan/kan/.ipynb_checkpoints/KANLayer-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/KANLayer-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/KANLayer-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/KANLayer-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/LBFGS-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/LBFGS-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/LBFGS-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/LBFGS-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/MLP-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MLP-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/MLP-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MLP-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/MultKAN-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MultKAN-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/MultKAN-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MultKAN-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/__init__-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/__init__-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/__init__-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/__init__-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/compiler-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/compiler-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/compiler-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/compiler-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/experiment-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/experiment-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/experiment-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/experiment-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/feynman-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/feynman-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/feynman-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/feynman-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/hypothesis-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/hypothesis-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/hypothesis-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/hypothesis-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/spline-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/spline-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/spline-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/spline-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/utils-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/utils-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/utils-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/utils-checkpoint.py diff --git a/dl/kan/kan/KANLayer.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/KANLayer.py similarity index 100% rename from dl/kan/kan/KANLayer.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/KANLayer.py diff --git a/dl/kan/kan/LBFGS.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/LBFGS.py similarity index 100% rename from dl/kan/kan/LBFGS.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/LBFGS.py diff --git a/dl/kan/kan/MLP.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MLP.py similarity index 100% rename from dl/kan/kan/MLP.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MLP.py diff --git a/dl/kan/kan/MultKAN.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MultKAN.py similarity index 100% rename from dl/kan/kan/MultKAN.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MultKAN.py diff --git a/dl/kan/kan/Symbolic_KANLayer.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/Symbolic_KANLayer.py similarity index 100% rename from dl/kan/kan/Symbolic_KANLayer.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/Symbolic_KANLayer.py diff --git a/dl/kan/kan/__init__.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/__init__.py similarity index 100% rename from dl/kan/kan/__init__.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/__init__.py diff --git a/dl/kan/kan/assets/img/mult_symbol.png b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/mult_symbol.png similarity index 100% rename from dl/kan/kan/assets/img/mult_symbol.png rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/mult_symbol.png diff --git a/dl/kan/kan/assets/img/sum_symbol.png b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/sum_symbol.png similarity index 100% rename from dl/kan/kan/assets/img/sum_symbol.png rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/sum_symbol.png diff --git a/dl/kan/kan/compiler.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/compiler.py similarity index 100% rename from dl/kan/kan/compiler.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/compiler.py diff --git a/dl/kan/kan/experiment.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiment.py similarity index 100% rename from dl/kan/kan/experiment.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiment.py diff --git a/dl/kan/kan/experiments/experiment1.ipynb b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiments/experiment1.ipynb similarity index 100% rename from dl/kan/kan/experiments/experiment1.ipynb rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiments/experiment1.ipynb diff --git a/dl/kan/kan/feynman.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/feynman.py similarity index 100% rename from dl/kan/kan/feynman.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/feynman.py diff --git a/dl/kan/kan/hypothesis.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/hypothesis.py similarity index 100% rename from dl/kan/kan/hypothesis.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/hypothesis.py diff --git a/dl/kan/kan/spline.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/spline.py similarity index 100% rename from dl/kan/kan/spline.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/spline.py diff --git a/dl/kan/kan/utils.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/utils.py similarity index 100% rename from dl/kan/kan/utils.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/utils.py diff --git a/dl/kan/requirements.txt b/methodology/kolmogorov_arnold_networks/kan/pytorch/requirements.txt similarity index 100% rename from dl/kan/requirements.txt rename to methodology/kolmogorov_arnold_networks/kan/pytorch/requirements.txt diff --git a/dl/kan/run_train.sh b/methodology/kolmogorov_arnold_networks/kan/pytorch/run_train.sh similarity index 100% rename from dl/kan/run_train.sh rename to methodology/kolmogorov_arnold_networks/kan/pytorch/run_train.sh diff --git a/dl/kan/train_kan.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/train_kan.py similarity index 100% rename from dl/kan/train_kan.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/train_kan.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/README.md b/nlp/llm/deepseek_moe_7b/colossalai/README.md similarity index 80% rename from nlp/llm/deepseek_moe_7b/ColossalAI/README.md rename to nlp/llm/deepseek_moe_7b/colossalai/README.md index 1cbc5ba4..4a02e88a 100644 --- a/nlp/llm/deepseek_moe_7b/ColossalAI/README.md +++ b/nlp/llm/deepseek_moe_7b/colossalai/README.md @@ -1,8 +1,10 @@ -# Colossal-AI LLaMA-7B +# DeepSeekMoE 7B (ColossalAI) ## Model description + +DeepSeekMoE 7B is a variant of the 16B model. + DeepSeekMoE 16B is a Mixture-of-Experts (MoE) language model with 16.4B parameters. It employs an innovative MoE architecture, which involves two principal strategies: fine-grained expert segmentation and shared experts isolation. -DeepSeekMoE 7B is a variant of the 16B model. ## Step 1: Install @@ -11,14 +13,16 @@ Firstly, you should ensure that ColossalAI is installed in the environment. Gene ## Step 2: Prepare model and config Get "deepseek-moe-16b-base" models and config file from huggingface or other place, and mv it to "/home/model_zoos/nlp/deepseek-moe-16b-base". -One recommended link: "https://huggingface.co/deepseek-ai/deepseek-moe-16b-base/tree/main". +One recommended link: "". ## Step 3: Training + ```bash -$ bash deepseek_7b_pretrain.sh +bash deepseek_moe_7b_pretrain.sh ``` ## Results + | Model | Training speed | |--------------------|--------------------| | deepseek-moe-7b | 6.85 samples/sec | diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/benchmark.py b/nlp/llm/deepseek_moe_7b/colossalai/benchmark.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/benchmark.py rename to nlp/llm/deepseek_moe_7b/colossalai/benchmark.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/data_utils.py b/nlp/llm/deepseek_moe_7b/colossalai/data_utils.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/data_utils.py rename to nlp/llm/deepseek_moe_7b/colossalai/data_utils.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/deepseek_moe_7b_pretrain.sh b/nlp/llm/deepseek_moe_7b/colossalai/deepseek_moe_7b_pretrain.sh similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/deepseek_moe_7b_pretrain.sh rename to nlp/llm/deepseek_moe_7b/colossalai/deepseek_moe_7b_pretrain.sh diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/model_utils.py b/nlp/llm/deepseek_moe_7b/colossalai/model_utils.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/model_utils.py rename to nlp/llm/deepseek_moe_7b/colossalai/model_utils.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/performance_evaluator.py b/nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/performance_evaluator.py rename to nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/README.md similarity index 45% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/README.md index 26198825..7fdca2d6 100644 --- a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/README.md @@ -1,44 +1,45 @@ -# Llama3-8B (ColossalAI) +# Llama3-8B SFT (ColossalAI) ## Model description + The Llama 3 Herd of models natively supports multilinguality, coding, reasoning, and tool usage. Our largest model is dense Transformer with 405B parameters, processing information in a context window of up to 128K tokens, Llama 3 8B is the smallest model of Llama 3 Herd of models. ## Step 1: Installation Firstly, you should ensure that the corresponding version of ColossalAI has been installed in the iluvatar environment. Then install applications as follows: -```bash -$ cd ColossalAI/applications/Colossal-LLaMA -$ pip3 install -e . +```sh +cd ColossalAI/applications/Colossal-LLaMA +pip3 install -e . ``` ## Step 2: Preparing datasets and checkpoints -```bash -$ pip3 install modelscope -$ python3 ./get_Meta_LLaMA_8B.py -$ mkdir -p /home/model_zoos/nlp -$ mv ~/.cache/modelscope/hub/LLM-Research/Meta-Llama-3-8B /home/model_zoos/nlp +```sh +pip3 install modelscope +python3 ./get_Meta_LLaMA_8B.py +mkdir -p /home/model_zoos/nlp +mv ~/.cache/modelscope/hub/LLM-Research/Meta-Llama-3-8B /home/model_zoos/nlp -$ wget http://files.deepspark.org.cn:880/deepspark/tokenizer.model -$ cp tokenizer.model /home/model_zoos/nlp/Meta-Llama-3-8B +wget http://files.deepspark.org.cn:880/deepspark/tokenizer.model +cp tokenizer.model /home/model_zoos/nlp/Meta-Llama-3-8B -$ wget http://files.deepspark.org.cn:880/deepspark/school_math_0.25M.jsonl -$ mv school_math_0.25M.jsonl dataset/school_math -$ bash ./prepare_sft_dataset.sh llama3 +wget http://files.deepspark.org.cn:880/deepspark/school_math_0.25M.jsonl +mv school_math_0.25M.jsonl dataset/school_math +bash ./prepare_sft_dataset.sh llama3 ``` ## Step 3: Training -```bash -$ bash run_llama3_8b_sft_3d.sh + +```sh +bash run_llama3_8b_sft_3d.sh ``` ## Results -| No. | model | peft | num_gpus |train_samples_per_second | -| ---- | --------- | ----------- | ------------------ | ---------------------- | -| 1 | llama3-8b | Full sft | 16 | 1.53 | - +| model | peft | num_gpus |train_samples_per_second | +| --------- | ----------- | ------------------ | ---------------------- | +| llama3-8b | Full sft | 16 | 1.53 | ## Reference diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/__init__.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/model/init_model.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/model/init_model.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/froze.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/froze.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/utils.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/utils.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/convert_data.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/convert_data.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/convert_data.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/convert_data.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/performance_evaluator.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/performance_evaluator.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/performance_evaluator.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/performance_evaluator.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/prepare_sft_dataset.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/prepare_sft_dataset.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/prepare_sft_dataset.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/prepare_sft_dataset.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/requirements.txt b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/requirements.txt similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/requirements.txt rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/requirements.txt diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/setup.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/setup.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.example.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.example.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train_sft.example.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train_sft.example.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/version.txt b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/version.txt similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/version.txt rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/version.txt diff --git a/nlp/llm/llama3-8b/megatron-deepspeed/README.md b/nlp/llm/llama3_8b/megatron-deepspeed/README.md similarity index 100% rename from nlp/llm/llama3-8b/megatron-deepspeed/README.md rename to nlp/llm/llama3_8b/megatron-deepspeed/README.md -- Gitee From 6fdfaa95172435df625417eb71e0c921875114e6 Mon Sep 17 00:00:00 2001 From: deepspark-bot Date: Wed, 11 Dec 2024 11:50:16 +0800 Subject: [PATCH 2/2] auto add license --- nlp/llm/deepseek_moe_7b/colossalai/data_utils.py | 15 +++++++++++++++ nlp/llm/deepseek_moe_7b/colossalai/model_utils.py | 15 +++++++++++++++ .../colossalai/performance_evaluator.py | 15 +++++++++++++++ .../Colossal-LLaMA/colossal_llama/__init__.py | 15 +++++++++++++++ .../colossal_llama/dataset/__init__.py | 15 +++++++++++++++ .../colossal_llama/dataset/conversation.py | 15 +++++++++++++++ .../colossal_llama/dataset/dummy_dataset.py | 15 +++++++++++++++ .../colossal_llama/model/init_model.py | 15 +++++++++++++++ .../colossal_llama/tokenizer/init_tokenizer.py | 15 +++++++++++++++ .../colossal_llama/utils/__init__.py | 15 +++++++++++++++ .../colossal_llama/utils/ckpt_io.py | 15 +++++++++++++++ .../Colossal-LLaMA/colossal_llama/utils/froze.py | 15 +++++++++++++++ .../colossal_llama/utils/neftune_patch.py | 15 +++++++++++++++ .../colossal_llama/utils/stream_chat_patch.py | 15 +++++++++++++++ .../Colossal-LLaMA/colossal_llama/utils/utils.py | 15 +++++++++++++++ .../applications/Colossal-LLaMA/setup.py | 15 +++++++++++++++ .../applications/Colossal-LLaMA/train.example.sh | 15 +++++++++++++++ .../Colossal-LLaMA/train_sft.example.sh | 15 +++++++++++++++ 18 files changed, 270 insertions(+) diff --git a/nlp/llm/deepseek_moe_7b/colossalai/data_utils.py b/nlp/llm/deepseek_moe_7b/colossalai/data_utils.py index 6b9e8ef2..77488729 100644 --- a/nlp/llm/deepseek_moe_7b/colossalai/data_utils.py +++ b/nlp/llm/deepseek_moe_7b/colossalai/data_utils.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import random from typing import Iterator, Optional diff --git a/nlp/llm/deepseek_moe_7b/colossalai/model_utils.py b/nlp/llm/deepseek_moe_7b/colossalai/model_utils.py index 63569bc6..0de9a8a3 100644 --- a/nlp/llm/deepseek_moe_7b/colossalai/model_utils.py +++ b/nlp/llm/deepseek_moe_7b/colossalai/model_utils.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from contextlib import contextmanager import torch diff --git a/nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py b/nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py index 65c7e49a..878c9699 100644 --- a/nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py +++ b/nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from time import time from typing import Optional diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py index 56fafa58..51970121 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py @@ -1,2 +1,17 @@ #!/usr/bin/env python3 +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- coding: utf-8 -*- diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py index 56fafa58..51970121 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py @@ -1,2 +1,17 @@ #!/usr/bin/env python3 +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- coding: utf-8 -*- diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py index 8ec9c848..6acd1a67 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Copyright 2023 lm-sys@FastChat # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py index 3175159f..56707eb6 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch from torch.utils.data import Dataset diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py index f61291f3..016dd016 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py @@ -1,4 +1,19 @@ #!/usr/bin/env python3 +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- coding: utf-8 -*- """ diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py index 43913550..dd8616ab 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py @@ -1,4 +1,19 @@ #!/usr/bin/env python +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- encoding: utf-8 -*- """ diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py index 56fafa58..51970121 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py @@ -1,2 +1,17 @@ #!/usr/bin/env python3 +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- coding: utf-8 -*- diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py index 05342ce4..db3c2cac 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py @@ -1,4 +1,19 @@ #!/usr/bin/env python3 +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- coding: utf-8 -*- """ diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py index 82677160..aeba84e0 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py @@ -1,4 +1,19 @@ #!/usr/bin/env python3 +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # -*- coding: utf-8 -*- from transformers.models.llama import LlamaForCausalLM diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py index 21d769f3..988382bb 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Copyright 2023 The Hugging Face team # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py index 44fa3678..e941a9e3 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from copy import deepcopy from typing import Any, Callable, Dict, List, Optional, Tuple diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py index f24ab72c..463407ff 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Utils for Colossal-LLaMA """ diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py index c9ba3169..152bbbd6 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py @@ -1,3 +1,18 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from setuptools import find_packages, setup diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh index b795e8bc..f6d4ac09 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh @@ -1,4 +1,19 @@ #!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + set_n_least_used_CUDA_VISIBLE_DEVICES() { local n=${1:-"9999"} echo "GPU Memory Usage:" diff --git a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh index d87f9ef8..091e2ec0 100644 --- a/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh @@ -1,4 +1,19 @@ #!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # NCCL IB environment variables export NCCL_IB_HCA=mlx5_1:1,mlx5_2:1,mlx5_3:1,mlx5_4:1 -- Gitee