From 1018eb6ed63412cefbc5e65ecd91ddb2c5e03a9e Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Wed, 11 Dec 2024 03:48:09 +0000 Subject: [PATCH 1/3] add 24.12 models to readme model list Signed-off-by: mingjiang.li --- README.md | 73 ++++-------------- .../kan/pytorch}/README.md | 0 .../kan/pytorch}/__init__.py | 0 .../.ipynb_checkpoints/KANLayer-checkpoint.py | 0 .../.ipynb_checkpoints/LBFGS-checkpoint.py | 0 .../kan/.ipynb_checkpoints/MLP-checkpoint.py | 0 .../.ipynb_checkpoints/MultKAN-checkpoint.py | 0 .../Symbolic_KANLayer-checkpoint.py | 0 .../.ipynb_checkpoints/__init__-checkpoint.py | 0 .../.ipynb_checkpoints/compiler-checkpoint.py | 0 .../experiment-checkpoint.py | 0 .../.ipynb_checkpoints/feynman-checkpoint.py | 0 .../hypothesis-checkpoint.py | 0 .../.ipynb_checkpoints/spline-checkpoint.py | 0 .../.ipynb_checkpoints/utils-checkpoint.py | 0 .../kan/pytorch}/kan/KANLayer.py | 0 .../kan/pytorch}/kan/LBFGS.py | 0 .../kan/pytorch}/kan/MLP.py | 0 .../kan/pytorch}/kan/MultKAN.py | 0 .../kan/pytorch}/kan/Symbolic_KANLayer.py | 0 .../kan/pytorch}/kan/__init__.py | 0 .../pytorch}/kan/assets/img/mult_symbol.png | Bin .../pytorch}/kan/assets/img/sum_symbol.png | Bin .../kan/pytorch}/kan/compiler.py | 0 .../kan/pytorch}/kan/experiment.py | 0 .../kan/experiments/experiment1.ipynb | 0 .../kan/pytorch}/kan/feynman.py | 0 .../kan/pytorch}/kan/hypothesis.py | 0 .../kan/pytorch}/kan/spline.py | 0 .../kan/pytorch}/kan/utils.py | 0 .../kan/pytorch}/requirements.txt | 0 .../kan/pytorch}/run_train.sh | 0 .../kan/pytorch}/train_kan.py | 0 .../{ColossalAI => colossalai}/README.md | 12 ++- .../{ColossalAI => colossalai}/benchmark.py | 0 .../{ColossalAI => colossalai}/data_utils.py | 0 .../deepseek_moe_7b_pretrain.sh | 0 .../{ColossalAI => colossalai}/model_utils.py | 0 .../performance_evaluator.py | 0 .../applications/Colossal-LLaMA/README.md | 41 +++++----- .../Colossal-LLaMA/colossal_llama/__init__.py | 0 .../colossal_llama/dataset/__init__.py | 0 .../colossal_llama/dataset/conversation.py | 0 .../colossal_llama/dataset/dummy_dataset.py | 0 .../colossal_llama/dataset/loader.py | 0 .../dataset/spliced_and_tokenized_dataset.py | 0 .../colossal_llama/model/init_model.py | 0 .../tokenizer/init_tokenizer.py | 0 .../colossal_llama/utils/__init__.py | 0 .../colossal_llama/utils/ckpt_io.py | 0 .../colossal_llama/utils/froze.py | 0 .../colossal_llama/utils/neftune_patch.py | 0 .../colossal_llama/utils/stream_chat_patch.py | 0 .../colossal_llama/utils/utils.py | 0 .../Colossal-LLaMA/dataset/convert_data.py | 0 .../dataset/prepare_pretrain_dataset.py | 0 .../dataset/prepare_sft_dataset.py | 0 .../Colossal-LLaMA/get_Meta_LLaMA_8B.py | 0 .../Colossal-LLaMA/performance_evaluator.py | 0 .../Colossal-LLaMA/prepare_sft_dataset.sh | 0 .../Colossal-LLaMA/requirements.txt | 0 .../Colossal-LLaMA/run_llama3_8b_sft_3d.sh | 0 .../applications/Colossal-LLaMA/setup.py | 0 .../Colossal-LLaMA/train.example.sh | 0 .../applications/Colossal-LLaMA/train.py | 0 .../Colossal-LLaMA/train_sft.example.sh | 0 .../applications/Colossal-LLaMA/version.txt | 0 .../megatron-deepspeed/README.md | 0 68 files changed, 43 insertions(+), 83 deletions(-) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/README.md (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/__init__.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/KANLayer-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/LBFGS-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/MLP-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/MultKAN-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/__init__-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/compiler-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/experiment-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/feynman-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/hypothesis-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/spline-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/.ipynb_checkpoints/utils-checkpoint.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/KANLayer.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/LBFGS.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/MLP.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/MultKAN.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/Symbolic_KANLayer.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/__init__.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/assets/img/mult_symbol.png (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/assets/img/sum_symbol.png (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/compiler.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/experiment.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/experiments/experiment1.ipynb (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/feynman.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/hypothesis.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/spline.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/kan/utils.py (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/requirements.txt (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/run_train.sh (100%) rename {dl/kan => methodology/kolmogorov_arnold_networks/kan/pytorch}/train_kan.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/README.md (80%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/benchmark.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/data_utils.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/deepseek_moe_7b_pretrain.sh (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/model_utils.py (100%) rename nlp/llm/deepseek_moe_7b/{ColossalAI => colossalai}/performance_evaluator.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/README.md (45%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/__init__.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/model/init_model.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/froze.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/colossal_llama/utils/utils.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/dataset/convert_data.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/performance_evaluator.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/prepare_sft_dataset.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/requirements.txt (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/setup.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/train.example.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/train.py (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/train_sft.example.sh (100%) rename nlp/llm/llama3_8b/{ColossalAI => colossalai}/applications/Colossal-LLaMA/version.txt (100%) rename nlp/llm/{llama3-8b => llama3_8b}/megatron-deepspeed/README.md (100%) diff --git a/README.md b/README.md index 58a4fe20d..61bc70bbb 100644 --- a/README.md +++ b/README.md @@ -4,62 +4,6 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 ## 模型列表 -- Computer Vision - - - [Classification](#classification) - - [Face Detection](#face-detection) - - [Face Recognition](#face-recognition) - - [Instance Segmentation](#instance-segmentation) - - [Knowledge Distillation](#knowledge-distillation) - - [Network Pruning](#network-pruning) - - [Object Detection](#object-detection) - - [3D Object Detection](#3d-object-detection) - - [OCR](#ocr) - - [Point Cloud](#point-cloud) - - [Pose Estimation](#pose-estimation) - - [Self-Supervised Learning](#self-supervised-learning) - - [Semantic Segmentation](#semantic-segmentation) - - [Super Resolution](#super-resolution) - - [Tracking](#tracking) - - [Traffic Forecast](#traffic-forecast) - -- Graph Neural Network (GNN) - - - [Graph Attention](#graph-attention) - - [Node Classification](#node-classification) - - [Text Classification](#text-classification) - -- High Performance Computing (HPC) - - - [Molecular Dynamics](#molecular-dynamics) - -- [Multimodal](#multimodal) - -- Natural Language Processing (NLP) - - - [Cloze Test](#cloze-test) - - [Dialogue Generation](#dialogue-generation) - - [Language Modeling](#language-modeling) - - [Large Language Model (LLM)](#large-language-model-llm) - - [Text Correction](#text-correction) - - [Translation](#translation) - -- Recommendation - - - [Collaborative Filtering](#collaborative-filtering) - - [Click Through Rate](#click-through-rate) - -- [Reinforcement Learning](#reinforcement-learning) - -- Speech - - - [Speech Recognition](#speech-recognition) - - [Speech Synthesis](#speech-synthesis) - -- [3D Reconstruction](#3d-reconstruction) - --------- - ### Computer Vision #### Classification @@ -197,6 +141,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [Faster R-CNN](cv/detection/fasterrcnn/pytorch/README.md) | PyTorch | COCO [FCOS](cv/detection/fcos/paddlepaddle/README.md) | PaddlePaddle | COCO [FCOS](cv/detection/fcos/pytorch/README.md) | PyTorch | COCO +[Mamba-YOLO](cv/detection/mamba_yolo/pytorch/README.md) | PyTorch | COCO [Mask R-CNN](cv/detection/maskrcnn/pytorch/README.md) | PyTorch | COCO [Mask R-CNN](cv/detection/maskrcnn/paddlepaddle/README.md) | PaddlePaddle | COCO [OC_SORT](cv/detection/oc_sort/paddlepaddle/README.md) | PaddlePaddle | MOT17 @@ -388,6 +333,14 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 -------- | ------ | ---- [Water/se_e2_a](hpc/molecular_dynamics/water_se_e2_a/tensorflow/README.md) | TensorFlow (DeePMD-kit) | data_water +### Methodology + +#### Kolmogorov-Arnold Networks + +模型名称 | 框架 | 数据集 +-------- | ------ | ---- +[KAN](methodology/kolmogorov_arnold_networks/kan/pytorch/README.md) | PyTorch | - + ### Multimodal 模型名称 | 框架 | 数据集 @@ -396,7 +349,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [CLIP](multimodal/Language-Image_Pre-Training/clip/pytorch/README.md) | PyTorch | CIFAR100 [ControlNet](multimodal/diffusion/ControlNet/README.md) | PyTorch | Fill50K [DDPM](multimodal/diffusion/ddpm/README.md) | PyTorch | CIFAR-10 -[LLaVA](multimodal/llava/pytorch/README.md) | PyTorch | LLaVA-Pretrain +[LLaVA 1.5](multimodal/llava/pytorch/README.md) | PyTorch | LLaVA-Pretrain [L-Verse](multimodal/Language-Image_Pre-Training/L-Verse/pytorch/README.md) | PyTorch | ImageNet [Stable Diffusion 1.4](multimodal/diffusion/stable-diffusion/training/README.md) | PyTorch | pokemon-images [Stable Diffusion 1.5](multimodal/diffusion/stable-diffusion/sd_1.5/README.md) | PyTorch | pokemon-images @@ -445,6 +398,7 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [ChatGLM-6B](nlp/llm/chatglm-6b/deepspeed/README.md) | PyTorch (DeepSpeed) | ADGEN & chatglm-6b [ChatGLM2-6B SFT](nlp/llm/ChatGLM2-6b-sft/README.md) | PyTorch (DeepSpeed) | ADGEN & chatglm2-6b [ChatGLM3-6B](nlp/llm/chatglm3-6b/deepspeed/finetune_demo/README.md) | PyTorch (DeepSpeed) | ADGEN & chatglm3-6b +[DeepSeekMoE 7B](nlp/llm/deepseek_moe_7b/colossalai/README.md) | PyTorch (ColossalAI) | deepseek-moe-16b-base [Llama-7B](nlp/llm/llama-7b/colossalai/README.md) | PyTorch (Colossal-AI) | llama-7b-hf [Llama2-7B](nlp/llm/llama2-7b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus [Llama2-7B Reward Model Finetuning](nlp/llm/llama2-7b_reward_sft/deepspeed/README.md) | PyTorch (DeepSpeed) | Dahoas/rm-static @@ -452,11 +406,12 @@ DeepSparkHub甄选上百个应用算法和模型,覆盖AI和通用计算各领 [Llama2-7B SFT](nlp/llm/llama2-7b_sft/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | gpt_small-117M [Llama2-13B](nlp/llm/llama2-13b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus [Llama2-34B](nlp/llm/llama2-34b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus -[Llama3-8B](nlp/llm/llama3-8b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus +[Llama3-8B](nlp/llm/llama3_8b/megatron-deepspeed/README.md) | PyTorch (Megatron-DeepSpeed) | Bookcorpus +[Llama3-8B SFT](nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md) | PyTorch (ColossalAI) | school_math_0.25M [QWen-7B](nlp/llm/qwen-7b/firefly/README.md) | PyTorch (Firefly) | qwen-7b [QWen1.5-7B](nlp/llm/qwen1.5-7b/firefly/README.md) | PyTorch (Firefly) | school_math [QWen1.5-14B](nlp/llm/qwen1.5-14b/firefly/README.md) | PyTorch (Firefly) | school_math -[QWen2.5-7B](nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md) | PyTorch (LLaMA-Factory) | qwen2.5-7b +[Qwen2.5-7B SFT](nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md) | PyTorch (LLaMA-Factory) | qwen2.5-7b #### Text Correction diff --git a/dl/kan/README.md b/methodology/kolmogorov_arnold_networks/kan/pytorch/README.md similarity index 100% rename from dl/kan/README.md rename to methodology/kolmogorov_arnold_networks/kan/pytorch/README.md diff --git a/dl/kan/__init__.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/__init__.py similarity index 100% rename from dl/kan/__init__.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/__init__.py diff --git a/dl/kan/kan/.ipynb_checkpoints/KANLayer-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/KANLayer-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/KANLayer-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/KANLayer-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/LBFGS-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/LBFGS-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/LBFGS-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/LBFGS-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/MLP-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MLP-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/MLP-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MLP-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/MultKAN-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MultKAN-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/MultKAN-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/MultKAN-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/Symbolic_KANLayer-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/__init__-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/__init__-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/__init__-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/__init__-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/compiler-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/compiler-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/compiler-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/compiler-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/experiment-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/experiment-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/experiment-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/experiment-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/feynman-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/feynman-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/feynman-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/feynman-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/hypothesis-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/hypothesis-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/hypothesis-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/hypothesis-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/spline-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/spline-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/spline-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/spline-checkpoint.py diff --git a/dl/kan/kan/.ipynb_checkpoints/utils-checkpoint.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/utils-checkpoint.py similarity index 100% rename from dl/kan/kan/.ipynb_checkpoints/utils-checkpoint.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/.ipynb_checkpoints/utils-checkpoint.py diff --git a/dl/kan/kan/KANLayer.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/KANLayer.py similarity index 100% rename from dl/kan/kan/KANLayer.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/KANLayer.py diff --git a/dl/kan/kan/LBFGS.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/LBFGS.py similarity index 100% rename from dl/kan/kan/LBFGS.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/LBFGS.py diff --git a/dl/kan/kan/MLP.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MLP.py similarity index 100% rename from dl/kan/kan/MLP.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MLP.py diff --git a/dl/kan/kan/MultKAN.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MultKAN.py similarity index 100% rename from dl/kan/kan/MultKAN.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/MultKAN.py diff --git a/dl/kan/kan/Symbolic_KANLayer.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/Symbolic_KANLayer.py similarity index 100% rename from dl/kan/kan/Symbolic_KANLayer.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/Symbolic_KANLayer.py diff --git a/dl/kan/kan/__init__.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/__init__.py similarity index 100% rename from dl/kan/kan/__init__.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/__init__.py diff --git a/dl/kan/kan/assets/img/mult_symbol.png b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/mult_symbol.png similarity index 100% rename from dl/kan/kan/assets/img/mult_symbol.png rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/mult_symbol.png diff --git a/dl/kan/kan/assets/img/sum_symbol.png b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/sum_symbol.png similarity index 100% rename from dl/kan/kan/assets/img/sum_symbol.png rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/assets/img/sum_symbol.png diff --git a/dl/kan/kan/compiler.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/compiler.py similarity index 100% rename from dl/kan/kan/compiler.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/compiler.py diff --git a/dl/kan/kan/experiment.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiment.py similarity index 100% rename from dl/kan/kan/experiment.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiment.py diff --git a/dl/kan/kan/experiments/experiment1.ipynb b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiments/experiment1.ipynb similarity index 100% rename from dl/kan/kan/experiments/experiment1.ipynb rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/experiments/experiment1.ipynb diff --git a/dl/kan/kan/feynman.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/feynman.py similarity index 100% rename from dl/kan/kan/feynman.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/feynman.py diff --git a/dl/kan/kan/hypothesis.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/hypothesis.py similarity index 100% rename from dl/kan/kan/hypothesis.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/hypothesis.py diff --git a/dl/kan/kan/spline.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/spline.py similarity index 100% rename from dl/kan/kan/spline.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/spline.py diff --git a/dl/kan/kan/utils.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/kan/utils.py similarity index 100% rename from dl/kan/kan/utils.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/kan/utils.py diff --git a/dl/kan/requirements.txt b/methodology/kolmogorov_arnold_networks/kan/pytorch/requirements.txt similarity index 100% rename from dl/kan/requirements.txt rename to methodology/kolmogorov_arnold_networks/kan/pytorch/requirements.txt diff --git a/dl/kan/run_train.sh b/methodology/kolmogorov_arnold_networks/kan/pytorch/run_train.sh similarity index 100% rename from dl/kan/run_train.sh rename to methodology/kolmogorov_arnold_networks/kan/pytorch/run_train.sh diff --git a/dl/kan/train_kan.py b/methodology/kolmogorov_arnold_networks/kan/pytorch/train_kan.py similarity index 100% rename from dl/kan/train_kan.py rename to methodology/kolmogorov_arnold_networks/kan/pytorch/train_kan.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/README.md b/nlp/llm/deepseek_moe_7b/colossalai/README.md similarity index 80% rename from nlp/llm/deepseek_moe_7b/ColossalAI/README.md rename to nlp/llm/deepseek_moe_7b/colossalai/README.md index 1cbc5ba47..4a02e88a1 100644 --- a/nlp/llm/deepseek_moe_7b/ColossalAI/README.md +++ b/nlp/llm/deepseek_moe_7b/colossalai/README.md @@ -1,8 +1,10 @@ -# Colossal-AI LLaMA-7B +# DeepSeekMoE 7B (ColossalAI) ## Model description + +DeepSeekMoE 7B is a variant of the 16B model. + DeepSeekMoE 16B is a Mixture-of-Experts (MoE) language model with 16.4B parameters. It employs an innovative MoE architecture, which involves two principal strategies: fine-grained expert segmentation and shared experts isolation. -DeepSeekMoE 7B is a variant of the 16B model. ## Step 1: Install @@ -11,14 +13,16 @@ Firstly, you should ensure that ColossalAI is installed in the environment. Gene ## Step 2: Prepare model and config Get "deepseek-moe-16b-base" models and config file from huggingface or other place, and mv it to "/home/model_zoos/nlp/deepseek-moe-16b-base". -One recommended link: "https://huggingface.co/deepseek-ai/deepseek-moe-16b-base/tree/main". +One recommended link: "". ## Step 3: Training + ```bash -$ bash deepseek_7b_pretrain.sh +bash deepseek_moe_7b_pretrain.sh ``` ## Results + | Model | Training speed | |--------------------|--------------------| | deepseek-moe-7b | 6.85 samples/sec | diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/benchmark.py b/nlp/llm/deepseek_moe_7b/colossalai/benchmark.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/benchmark.py rename to nlp/llm/deepseek_moe_7b/colossalai/benchmark.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/data_utils.py b/nlp/llm/deepseek_moe_7b/colossalai/data_utils.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/data_utils.py rename to nlp/llm/deepseek_moe_7b/colossalai/data_utils.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/deepseek_moe_7b_pretrain.sh b/nlp/llm/deepseek_moe_7b/colossalai/deepseek_moe_7b_pretrain.sh similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/deepseek_moe_7b_pretrain.sh rename to nlp/llm/deepseek_moe_7b/colossalai/deepseek_moe_7b_pretrain.sh diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/model_utils.py b/nlp/llm/deepseek_moe_7b/colossalai/model_utils.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/model_utils.py rename to nlp/llm/deepseek_moe_7b/colossalai/model_utils.py diff --git a/nlp/llm/deepseek_moe_7b/ColossalAI/performance_evaluator.py b/nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py similarity index 100% rename from nlp/llm/deepseek_moe_7b/ColossalAI/performance_evaluator.py rename to nlp/llm/deepseek_moe_7b/colossalai/performance_evaluator.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/README.md similarity index 45% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/README.md index 26198825d..7fdca2d6b 100644 --- a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/README.md +++ b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/README.md @@ -1,44 +1,45 @@ -# Llama3-8B (ColossalAI) +# Llama3-8B SFT (ColossalAI) ## Model description + The Llama 3 Herd of models natively supports multilinguality, coding, reasoning, and tool usage. Our largest model is dense Transformer with 405B parameters, processing information in a context window of up to 128K tokens, Llama 3 8B is the smallest model of Llama 3 Herd of models. ## Step 1: Installation Firstly, you should ensure that the corresponding version of ColossalAI has been installed in the iluvatar environment. Then install applications as follows: -```bash -$ cd ColossalAI/applications/Colossal-LLaMA -$ pip3 install -e . +```sh +cd ColossalAI/applications/Colossal-LLaMA +pip3 install -e . ``` ## Step 2: Preparing datasets and checkpoints -```bash -$ pip3 install modelscope -$ python3 ./get_Meta_LLaMA_8B.py -$ mkdir -p /home/model_zoos/nlp -$ mv ~/.cache/modelscope/hub/LLM-Research/Meta-Llama-3-8B /home/model_zoos/nlp +```sh +pip3 install modelscope +python3 ./get_Meta_LLaMA_8B.py +mkdir -p /home/model_zoos/nlp +mv ~/.cache/modelscope/hub/LLM-Research/Meta-Llama-3-8B /home/model_zoos/nlp -$ wget http://files.deepspark.org.cn:880/deepspark/tokenizer.model -$ cp tokenizer.model /home/model_zoos/nlp/Meta-Llama-3-8B +wget http://files.deepspark.org.cn:880/deepspark/tokenizer.model +cp tokenizer.model /home/model_zoos/nlp/Meta-Llama-3-8B -$ wget http://files.deepspark.org.cn:880/deepspark/school_math_0.25M.jsonl -$ mv school_math_0.25M.jsonl dataset/school_math -$ bash ./prepare_sft_dataset.sh llama3 +wget http://files.deepspark.org.cn:880/deepspark/school_math_0.25M.jsonl +mv school_math_0.25M.jsonl dataset/school_math +bash ./prepare_sft_dataset.sh llama3 ``` ## Step 3: Training -```bash -$ bash run_llama3_8b_sft_3d.sh + +```sh +bash run_llama3_8b_sft_3d.sh ``` ## Results -| No. | model | peft | num_gpus |train_samples_per_second | -| ---- | --------- | ----------- | ------------------ | ---------------------- | -| 1 | llama3-8b | Full sft | 16 | 1.53 | - +| model | peft | num_gpus |train_samples_per_second | +| --------- | ----------- | ------------------ | ---------------------- | +| llama3-8b | Full sft | 16 | 1.53 | ## Reference diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/__init__.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/__init__.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/__init__.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/conversation.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/dummy_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/loader.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/dataset/spliced_and_tokenized_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/model/init_model.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/model/init_model.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/model/init_model.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/tokenizer/init_tokenizer.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/__init__.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/ckpt_io.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/froze.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/froze.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/froze.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/neftune_patch.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/stream_chat_patch.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/utils.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/colossal_llama/utils/utils.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/colossal_llama/utils/utils.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/convert_data.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/convert_data.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/convert_data.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/convert_data.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_pretrain_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/dataset/prepare_sft_dataset.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/get_Meta_LLaMA_8B.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/performance_evaluator.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/performance_evaluator.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/performance_evaluator.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/performance_evaluator.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/prepare_sft_dataset.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/prepare_sft_dataset.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/prepare_sft_dataset.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/prepare_sft_dataset.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/requirements.txt b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/requirements.txt similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/requirements.txt rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/requirements.txt diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/run_llama3_8b_sft_3d.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/setup.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/setup.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/setup.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.example.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.example.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.example.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.py b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.py similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train.py rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train.py diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train_sft.example.sh b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/train_sft.example.sh rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/train_sft.example.sh diff --git a/nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/version.txt b/nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/version.txt similarity index 100% rename from nlp/llm/llama3_8b/ColossalAI/applications/Colossal-LLaMA/version.txt rename to nlp/llm/llama3_8b/colossalai/applications/Colossal-LLaMA/version.txt diff --git a/nlp/llm/llama3-8b/megatron-deepspeed/README.md b/nlp/llm/llama3_8b/megatron-deepspeed/README.md similarity index 100% rename from nlp/llm/llama3-8b/megatron-deepspeed/README.md rename to nlp/llm/llama3_8b/megatron-deepspeed/README.md -- Gitee From 398210e86f4a7e46d5517c03c672c79f15594b54 Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Wed, 11 Dec 2024 05:20:27 +0000 Subject: [PATCH 2/3] update kan markdown format Signed-off-by: mingjiang.li --- .../kolmogorov_arnold_networks/kan/pytorch/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/methodology/kolmogorov_arnold_networks/kan/pytorch/README.md b/methodology/kolmogorov_arnold_networks/kan/pytorch/README.md index fd10c9351..ec6491e9e 100644 --- a/methodology/kolmogorov_arnold_networks/kan/pytorch/README.md +++ b/methodology/kolmogorov_arnold_networks/kan/pytorch/README.md @@ -1,23 +1,22 @@ # KAN ## Model description -Kolmogorov-Arnold Networks (KANs) are promising alternatives of Multi-Layer Perceptrons (MLPs). KANs have strong mathematical foundations just like MLPs: MLPs are based on the universal approximation theorem, while KANs are based on Kolmogorov-Arnold representation theorem. KANs and MLPs are dual: KANs have activation functions on edges, while MLPs have activation functions on nodes. This simple change makes KANs better (sometimes much better!) than MLPs in terms of both model accuracy and interpretability. +Kolmogorov-Arnold Networks (KANs) are promising alternatives of Multi-Layer Perceptrons (MLPs). KANs have strong mathematical foundations just like MLPs: MLPs are based on the universal approximation theorem, while KANs are based on Kolmogorov-Arnold representation theorem. KANs and MLPs are dual: KANs have activation functions on edges, while MLPs have activation functions on nodes. This simple change makes KANs better (sometimes much better!) than MLPs in terms of both model accuracy and interpretability. ## Run -```shell -$ pip3 install -r requirements.txt -$ bash ./run_train.sh +```sh +pip3 install -r requirements.txt +bash ./run_train.sh ``` ## Result + | Model | Training speed | |-------------|------------------| | KAN | 6490 samples/sec | - ## Reference - [pykan](https://github.com/KindXiaoming/pykan/tree/master) - -- Gitee From 5ded1f4d385b782a6165d4bf2edccbc56d60185e Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Wed, 11 Dec 2024 05:47:52 +0000 Subject: [PATCH 3/3] format qwen 2.5 7b llama factorcy readme Signed-off-by: mingjiang.li --- nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md b/nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md index 9331c4b86..65c87312c 100644 --- a/nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md +++ b/nlp/llm/qwen2.5-7b/LLaMA-Factory/README.md @@ -1,4 +1,4 @@ -# Qwen2.5-7B +# Qwen2.5-7B SFT (LLaMA-Factory) ## Model description -- Gitee