From 87477cdd502e44773b9c1384f2b5902b4ffc4c38 Mon Sep 17 00:00:00 2001 From: glhyy Date: Mon, 23 Sep 2024 01:43:08 +0000 Subject: [PATCH 01/32] =?UTF-8?q?!1709=201.2=E5=88=86=E6=94=AF=E6=9B=B4?= =?UTF-8?q?=E6=96=B0llama2-dpo=E8=84=9A=E6=9C=AC=20Merge=20pull=20request?= =?UTF-8?q?=20!1709=20from=20glhyy/1.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/mcore/llama2/dpo_llama2_7b_ptd.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/mcore/llama2/dpo_llama2_7b_ptd.sh b/examples/mcore/llama2/dpo_llama2_7b_ptd.sh index 2073b8c2a..28d0ac354 100644 --- a/examples/mcore/llama2/dpo_llama2_7b_ptd.sh +++ b/examples/mcore/llama2/dpo_llama2_7b_ptd.sh @@ -12,7 +12,7 @@ WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES)) CKPT_SAVE_DIR="your model save ckpt path" DATA_PATH="your data path" -TOKENIZER_MODEL="your tokenizer path" +TOKENIZER_PATH="your tokenizer path" CKPT_LOAD_DIR="your model ckpt path" TP=2 PP=4 @@ -34,8 +34,8 @@ GPT_ARGS=" --hidden-size 4096 \ --ffn-hidden-size 11008 \ --num-attention-heads 32 \ - --tokenizer-type Llama2Tokenizer \ - --tokenizer-model ${TOKENIZER_MODEL} \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path ${TOKENIZER_PATH} \ --seq-length 4096 \ --max-position-embeddings 4096 \ --micro-batch-size 1 \ -- Gitee From 22a6468913eedd55e13643397bfb3b2e4f7ff819 Mon Sep 17 00:00:00 2001 From: LeiZhenzhen Date: Mon, 23 Sep 2024 01:44:05 +0000 Subject: [PATCH 02/32] !1703 update release version Merge pull request !1703 from LeiZhenzhen/1.2 --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 32418ae7c..270d762f7 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ ModelLink已发布版本维护策略: | bk_origin_23 | Demo | EOL | 2023 | 生命周期终止 | 2024/6/30 | | 1.0 | 常规版本 | 维护 | 2024/03/30 | 预计2024/9/30起无维护 | | | 1.1 | 常规版本 | 维护 | 2024/06/30 | 预计2024/12/30起无维护 | | - +| 1.2 | 常规版本 | 维护 | 2024/09/30 | 预计2025/03/30起无维护 | | --- ## 配套版本与支持模型 @@ -64,11 +64,11 @@ ModelLink已发布版本维护策略: | 软件 | [版本](https://www.hiascend.com/zh/) | | :-----------------------: |:----------------------------------:| | Python | 3.8 | -| Driver | 在研版本 | -| Firmware | 在研版本 | -| CANN | 在研版本 | +| Driver | Ascend HDK 24.1.RC3 | +| Firmware | Ascend HDK 24.1.RC3 | +| CANN | CANN 8.0.RC3 | | Torch | 2.1.0、2.2.0 | -| Torch_npu | 在研版本 | +| Torch_npu | release v6.0.RC3 | 【预训练集群性能与线性度】 -- Gitee From ec8b5d4e725e315f43967f669bb0abfc7edf67ba Mon Sep 17 00:00:00 2001 From: zhangjianxiang Date: Mon, 23 Sep 2024 10:23:20 +0000 Subject: [PATCH 03/32] =?UTF-8?q?!1717=20=E4=BF=AE=E6=94=B9torch=E5=8F=8At?= =?UTF-8?q?orchvision=E7=9A=84=E5=AE=89=E8=A3=85=E8=AF=B4=E6=98=8E=20Merge?= =?UTF-8?q?=20pull=20request=20!1717=20from=20zhangjianxiang/1.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/README.md | 6 +++++- requirements.txt | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/README.md b/examples/README.md index 768d8caab..9273b6343 100644 --- a/examples/README.md +++ b/examples/README.md @@ -71,9 +71,13 @@ conda create -n test python=3.8 conda activate test - # 安装 torch 和 torch_npu,注意要选择对应python版本、x86或arm的torch、torch_npu及apex包 + # 安装所需版本的 torch 和 torch_npu,注意要选择对应python版本、x86或arm的torch、torch_npu及apex包 + # 以安装 torch-2.1.0 和 torch_npu-2.1.0为例 pip install torch-2.1.0-cp38-cp38m-manylinux2014_aarch64.whl pip install torch_npu-2.1.0*-cp38-cp38m-linux_aarch64.whl + + # 安装对应版本的torchvision + pip install torchvision==0.16.0 # apex for Ascend 参考 https://gitee.com/ascend/apex pip install apex-0.1_ascend*-cp38-cp38m-linux_aarch64.whl diff --git a/requirements.txt b/requirements.txt index 4e4afe71d..1888f81e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,8 +10,6 @@ datasets pybind11 accelerate six -torch==2.1.0 -torchvision==0.16.0 protobuf peft==0.7.1 tiktoken \ No newline at end of file -- Gitee From 1cec0431e40810d4b3fce02812aa24f2360a23da Mon Sep 17 00:00:00 2001 From: RuanZhiXiang Date: Tue, 24 Sep 2024 03:42:26 +0000 Subject: [PATCH 04/32] !1714 [1.2] adaptation for latest mindspeed core060 Merge pull request !1714 from RuanZhiXiang/1.2-adapt-mindspeed-core --- examples/README.md | 4 ++-- modellink/arguments.py | 12 ++++++++++++ .../common/embeddings/rotary_pos_embedding.py | 6 +++++- modellink/core/transformer/attention.py | 1 - modellink/patchs/megatron_patch.py | 6 ++---- .../embeddings/test_rotary_pos_embedding.py | 18 ++++++++++-------- 6 files changed, 31 insertions(+), 16 deletions(-) diff --git a/examples/README.md b/examples/README.md index 9273b6343..709bce526 100644 --- a/examples/README.md +++ b/examples/README.md @@ -88,8 +88,8 @@ # 安装加速库 git clone https://gitee.com/ascend/MindSpeed.git cd MindSpeed - # checkout commit from MindSpeed core_r0.6.0 - git checkout e6ea2117 + # checkout commit from MindSpeed core_r0.6.0 in 0923 + git checkout 4ea42a23 pip install -r requirements.txt pip3 install -e . cd .. diff --git a/modellink/arguments.py b/modellink/arguments.py index 1194ba3c0..4afd80bf4 100644 --- a/modellink/arguments.py +++ b/modellink/arguments.py @@ -487,6 +487,8 @@ def _add_training_args(parser): group.add_argument('--swap-attention', action='store_true', default=False, help='switch to open swap-attention feature.' 'The default is False.') + group.add_argument('--swap-modules', type=str, default=None, + help='Swap modules for model. Should be used together with "--swap-attention."') return parser @@ -587,6 +589,12 @@ def _validate_recompute_args(args): if args.recompute_granularity == "selective": raise AssertionError('--recompute-activation-function is not compatible with selective recomputation.') + if args.swap_attention and args.swap_modules is None: + if args.use_mcore_models: + args.swap_modules = "input_layernorm,self_attention,pre_cross_attn_layernorm" + else: + args.swap_modules = "input_norm,self_attention,post_attention_norm" + def _validate_high_availability(args): if args.enable_optimizer_state_local_copy and not args.enable_high_availability: @@ -792,6 +800,10 @@ def _add_dummy_args(args): args.recompute_in_bubble = False args.recompute_in_advance = False + args.moe_alltoall_overlap_comm = False + args.moe_allgather_overlap_comm = False + args.noop_layers = None + def validate_args_decorator(megatron_validate_args): @wraps(megatron_validate_args) diff --git a/modellink/core/models/common/embeddings/rotary_pos_embedding.py b/modellink/core/models/common/embeddings/rotary_pos_embedding.py index b94b718c6..5836616cf 100644 --- a/modellink/core/models/common/embeddings/rotary_pos_embedding.py +++ b/modellink/core/models/common/embeddings/rotary_pos_embedding.py @@ -62,8 +62,12 @@ def apply_yarn_scaling(freqs: torch.Tensor): def rotary_embedding_init_wrapper(fn): @wraps(fn) def wrapper(self, *args, **kwargs): - fn(self, *args, **kwargs) _args = get_args() + if _args.rotary_base: + kwargs["rotary_base"] = _args.rotary_base + + fn(self, *args, **kwargs) + if hasattr(_args, "rope_scaling_type") and _args.rope_scaling_type == "llama3": self.inv_freq = apply_llama3_scaling(self.inv_freq) elif hasattr(_args, "rope_scaling_type") and _args.rope_scaling_type == "yarn": diff --git a/modellink/core/transformer/attention.py b/modellink/core/transformer/attention.py index 62009e3f8..96a98f90d 100644 --- a/modellink/core/transformer/attention.py +++ b/modellink/core/transformer/attention.py @@ -5,7 +5,6 @@ from functools import wraps from megatron.training import get_args from megatron.core import mpu -from megatron.core.models.common.embeddings.rotary_pos_embedding import apply_rotary_pos_emb from mindspeed.core.context_parallel.ulysses_context_parallel import UlyssesContextAttention from mindspeed.core.parallel_state import get_context_parallel_group_for_hybrid_ulysses diff --git a/modellink/patchs/megatron_patch.py b/modellink/patchs/megatron_patch.py index 35a676252..3beeba61e 100644 --- a/modellink/patchs/megatron_patch.py +++ b/modellink/patchs/megatron_patch.py @@ -128,7 +128,6 @@ def patch_fusions(): def patch_core_models(args): from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec from mindspeed.core.models.common.embeddings.rotary_pos_embedding import get_pos_emb_on_this_cp_rank - from mindspeed.core.fusions.rotary_pos_embedding import rotary_embedding_init_wrapper from ..utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank, get_device_wrapper from ..core import rotary_embedding_forward, apply_rotary_pos_emb_bshd from ..core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec_wrapper @@ -136,6 +135,7 @@ def patch_core_models(args): dot_product_attention_forward_wrapper from ..core.transformer.attention import attention_init_wrapper from ..core.models.gpt.gpt_model import gpt_model_init_wrapper + from ..core import rotary_embedding_init_wrapper # Embedding PatchManager.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.get_pos_emb_on_this_cp_rank', get_pos_emb_on_this_cp_rank) @@ -169,7 +169,7 @@ def patch_core_models(args): def patch_core_transformers(args): from mindspeed.core.transformer.moe.router import aux_loss_load_balancing - from ..core import (PTNorm, topk_router_forward, topk_router_routing, z_loss_func, rotary_embedding_init_wrapper) + from ..core import (PTNorm, topk_router_forward, topk_router_routing, z_loss_func) from mindspeed.core.transformer.moe.token_dispatcher import allgather_token_permutation, allgather_token_unpermutation from mindspeed.core.transformer.moe.grouped_gemm_util import Ops, grouped_gemm_is_available, get_device_capability @@ -179,8 +179,6 @@ def patch_core_transformers(args): from ..core.transformer.transformer_layer import transformer_layer_init_wrapper PatchManager.register_patch('torch.cuda.get_device_capability', get_device_capability) - PatchManager.register_patch('megatron.core.models.common.embeddings.rotary_pos_embedding.RotaryEmbedding.__init__', - rotary_embedding_init_wrapper) PatchManager.register_patch('megatron.core.transformer.transformer_block.TENorm', PTNorm) PatchManager.register_patch('megatron.core.transformer.moe.router.TopKRouter.routing', topk_router_routing) PatchManager.register_patch('megatron.core.transformer.moe.router.TopKRouter.forward', topk_router_forward) diff --git a/tests/ut/model_module/embeddings/test_rotary_pos_embedding.py b/tests/ut/model_module/embeddings/test_rotary_pos_embedding.py index d801d4a30..4226c5fed 100644 --- a/tests/ut/model_module/embeddings/test_rotary_pos_embedding.py +++ b/tests/ut/model_module/embeddings/test_rotary_pos_embedding.py @@ -17,7 +17,6 @@ from types import SimpleNamespace from pathlib import Path import pytest import torch -import mindspeed import modellink from tests.test_tools.dist_test import create_testconfig from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding @@ -27,13 +26,16 @@ class TestRotaryPosEmbedding: test_config = create_testconfig(Path(__file__).with_suffix(".json")) @pytest.fixture - def mock_dependency(self, request, monkeypatch): - monkeypatch.setattr(modellink.core.models.common.embeddings.rotary_pos_embedding, "get_args", - lambda : SimpleNamespace(use_glm_rope=request.getfixturevalue("chatglm"), - rope_scaling_type = None, - )) - monkeypatch.setattr(mindspeed.core.fusions.rotary_pos_embedding, "get_args", - lambda : SimpleNamespace(rotary_base = request.getfixturevalue("rotary_base"))) + def mock_dependency(self, request): + # init test name space + def get_test_namespace(): + test_name_space = SimpleNamespace() + test_name_space.use_glm_rope = request.getfixturevalue("chatglm") + test_name_space.rope_scaling_type = None + test_name_space.rotary_base = request.getfixturevalue("rotary_base") + return test_name_space + # set up name space function + setattr(modellink.core.models.common.embeddings.rotary_pos_embedding, "get_args", get_test_namespace) @pytest.mark.parametrize("rotary_param, chatglm, rotary_base, seq, expected", test_config["test_rotary_pos_embedding"]) def test_rotary_pos_embedding(self, mock_dependency, rotary_param, chatglm, rotary_base, seq, expected): -- Gitee From e6a18342b24c21758ea90aa723c07e5e1b8cb0f7 Mon Sep 17 00:00:00 2001 From: guoxinjie Date: Tue, 24 Sep 2024 12:23:07 +0000 Subject: [PATCH 05/32] =?UTF-8?q?!1724=201.2=20=E5=88=86=E6=94=AF=E8=A1=A5?= =?UTF-8?q?=E5=85=85=E5=85=AC=E7=BD=91=E5=9C=B0=E5=9D=80=E5=A3=B0=E6=98=8E?= =?UTF-8?q?=20Merge=20pull=20request=20!1724=20from=20guoxinjie/1.2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- SECURITYNOTE.md | 10 ++++++---- examples/chatglm3/data_convert_chatglm3_instruction.sh | 2 +- examples/gpt3/data_convert_gpt_pretrain.sh | 7 +------ examples/llama2/data_convert_llama2_instruction.sh | 2 +- examples/llama3/data_convert_llama3_instruction.sh | 2 +- .../mcore/baichuan2/data_convert_baichuan2_pretrain.sh | 2 +- .../mcore/deepseek2/data_convert_deepseek2_pretrain.sh | 2 +- .../deepseek2_coder/data_convert_deepseek2_pretrain.sh | 2 +- .../mcore/llama2/data_convert_llama2_instruction.sh | 2 +- .../llama2/data_convert_llama2_instruction_pack.sh | 2 +- .../mcore/mistral/data_convert_mistral_instruction.sh | 2 +- examples/qwen/data_convert_qwen_instruction.sh | 2 +- 12 files changed, 17 insertions(+), 20 deletions(-) diff --git a/SECURITYNOTE.md b/SECURITYNOTE.md index a8476ca67..708462fcc 100644 --- a/SECURITYNOTE.md +++ b/SECURITYNOTE.md @@ -51,10 +51,12 @@ | 类型 | 开源代码地址 | 文件名 | 公网IP地址/公网URL地址/域名/邮箱地址 | 用途说明 | |--------|----------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----------| -| 开源代码引入 | 不涉及 | modellink/model/language_model.py:85 | https://github.com/kingoflolz/mesh-transformer-jax/ | 详情地址 | -| 开源代码引入 | 涉及 | tests/pipeline/common.py:6 | https://github.com/microsoft/DeepSpeed/blob/master/tests/unit/common.py | 源代码地址 | -| 开源代码引入 | 涉及 | tests/pipeline/conftest.py:6 | https://github.com/microsoft/DeepSpeed/blob/master/tests/conftest.py | 源代码地址 | - +| 自研 | 不涉及 | modellink/model/language_model.py:85 | https://github.com/kingoflolz/mesh-transformer-jax/ | 详情地址 | +| 自研 | 涉及 | tests/pipeline/common.py:6 | https://github.com/microsoft/DeepSpeed/blob/master/tests/unit/common.py | 源代码地址 | +| 自研 | 涉及 | tests/pipeline/conftest.py:6 | https://github.com/microsoft/DeepSpeed/blob/master/tests/conftest.py | 源代码地址 | +| 自研 | 不涉及 | examples/mcore/gemma/data_convert_gemma_pretrain.sh:5 | https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered/resolve/main/wikipedia-cn-20230720-filtered.json?download=true | 数据下载地址 | +| 自研 | 涉及 | modellink/tasks/rl/dpo.py:178 | https://github.com/huggingface/trl/blob/main/trl/trainer/dpo_trainer.py | 源代码地址 | +| 自研 | 不涉及 | modellink/core/transformer/moe/moe_utils.py:135 | https://arxiv.org/abs/2101.03961 | 论文地址 | ## 公开接口声明 ModelLink 暂时未发布wheel包,无正式对外公开接口,所有功能均通过shell脚本调用。5个入口脚本分别为[pretrain_gpt.py](https://gitee.com/ascend/ModelLink/blob/master/pretrain_gpt.py), [inference.py](https://gitee.com/ascend/ModelLink/blob/master/inference.py), [evaluation.py](https://gitee.com/ascend/ModelLink/blob/master/evaluation.py), [preprocess_data.py](https://gitee.com/ascend/ModelLink/blob/master/preprocess_data.py) 和 [convert_ckpt.py](https://gitee.com/ascend/ModelLink/blob/master/convert_ckpt.py)。 diff --git a/examples/chatglm3/data_convert_chatglm3_instruction.sh b/examples/chatglm3/data_convert_chatglm3_instruction.sh index 1dda86447..ae3d2d7fb 100644 --- a/examples/chatglm3/data_convert_chatglm3_instruction.sh +++ b/examples/chatglm3/data_convert_chatglm3_instruction.sh @@ -1,4 +1,4 @@ -# Alpaca数据集下载链接: https://huggingface.co/datasets/tatsu-lab/alpaca +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Alpaca 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset diff --git a/examples/gpt3/data_convert_gpt_pretrain.sh b/examples/gpt3/data_convert_gpt_pretrain.sh index 63ac1635a..06e59d9b0 100644 --- a/examples/gpt3/data_convert_gpt_pretrain.sh +++ b/examples/gpt3/data_convert_gpt_pretrain.sh @@ -1,13 +1,8 @@ +# 请根据 examples/README.md 下 “社区BUG列表” 章节下载 gpt2-vocab.json,gpt2-merges.txt 文件 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./dataset -# 下载 vocab file 和 merge table -# cd vocab_file -# wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json -# wget https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt -# cd .. - # 处理成训练数据 python ./preprocess_data.py \ --input ./dataset/ \ diff --git a/examples/llama2/data_convert_llama2_instruction.sh b/examples/llama2/data_convert_llama2_instruction.sh index 3902a1671..e752bf0e4 100644 --- a/examples/llama2/data_convert_llama2_instruction.sh +++ b/examples/llama2/data_convert_llama2_instruction.sh @@ -1,4 +1,4 @@ -# Sharegpt数据集下载链接: https://huggingface.co/datasets/shibing624/roleplay-zh-sharegpt-gpt4-data +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 ShareGPT 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset diff --git a/examples/llama3/data_convert_llama3_instruction.sh b/examples/llama3/data_convert_llama3_instruction.sh index e7cdfeb53..ebaad7195 100644 --- a/examples/llama3/data_convert_llama3_instruction.sh +++ b/examples/llama3/data_convert_llama3_instruction.sh @@ -1,4 +1,4 @@ -# Alpaca数据集下载链接: https://huggingface.co/datasets/tatsu-lab/alpaca +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Alpaca 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset diff --git a/examples/mcore/baichuan2/data_convert_baichuan2_pretrain.sh b/examples/mcore/baichuan2/data_convert_baichuan2_pretrain.sh index 456d68b01..4205d6b42 100644 --- a/examples/mcore/baichuan2/data_convert_baichuan2_pretrain.sh +++ b/examples/mcore/baichuan2/data_convert_baichuan2_pretrain.sh @@ -1,8 +1,8 @@ +请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Alpaca 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./dataset -# 数据集下载地址 https://huggingface.co/datasets/tatsu-lab/alpaca/resolve/main/data/train-00000-of-00001-a09b74b3ef9c3b56.parquet python ./preprocess_data.py \ --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ --tokenizer-name-or-path ./model_from_hf/Baichuan-hf/ \ diff --git a/examples/mcore/deepseek2/data_convert_deepseek2_pretrain.sh b/examples/mcore/deepseek2/data_convert_deepseek2_pretrain.sh index 5a8b10aa0..bdfdf674e 100644 --- a/examples/mcore/deepseek2/data_convert_deepseek2_pretrain.sh +++ b/examples/mcore/deepseek2/data_convert_deepseek2_pretrain.sh @@ -1,8 +1,8 @@ +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Enwiki 数据集(一般取第一条即可) # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./dataset -# 数据集下载地址 https://huggingface.co/datasets/lsb/enwiki20230101/blob/main/data/train-00000-of-00042-d964455e17e96d5a.parquet python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ --tokenizer-name-or-path ./model_from_hf/deepseek2-hf/ \ diff --git a/examples/mcore/deepseek2_coder/data_convert_deepseek2_pretrain.sh b/examples/mcore/deepseek2_coder/data_convert_deepseek2_pretrain.sh index 051054eee..fbc5863c4 100644 --- a/examples/mcore/deepseek2_coder/data_convert_deepseek2_pretrain.sh +++ b/examples/mcore/deepseek2_coder/data_convert_deepseek2_pretrain.sh @@ -1,8 +1,8 @@ +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Enwiki 数据集(一般取第一条即可) # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./dataset -# 数据集下载地址 https://huggingface.co/datasets/lsb/enwiki20230101/blob/main/data/train-00000-of-00042-d964455e17e96d5a.parquet python ./preprocess_data.py \ --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ --tokenizer-name-or-path ./model_from_hf/deepseek2-coder-hf/ \ diff --git a/examples/mcore/llama2/data_convert_llama2_instruction.sh b/examples/mcore/llama2/data_convert_llama2_instruction.sh index 410e2097e..0afa0e5bd 100644 --- a/examples/mcore/llama2/data_convert_llama2_instruction.sh +++ b/examples/mcore/llama2/data_convert_llama2_instruction.sh @@ -1,4 +1,4 @@ -# Alpaca数据集下载链接: https://huggingface.co/datasets/tatsu-lab/alpaca +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Alpaca 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset diff --git a/examples/mcore/llama2/data_convert_llama2_instruction_pack.sh b/examples/mcore/llama2/data_convert_llama2_instruction_pack.sh index 5a9b2904e..3edef8468 100644 --- a/examples/mcore/llama2/data_convert_llama2_instruction_pack.sh +++ b/examples/mcore/llama2/data_convert_llama2_instruction_pack.sh @@ -1,4 +1,4 @@ -# Alpaca数据集下载链接: https://huggingface.co/datasets/tatsu-lab/alpaca +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Alpaca 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset diff --git a/examples/mcore/mistral/data_convert_mistral_instruction.sh b/examples/mcore/mistral/data_convert_mistral_instruction.sh index 6787f4c41..6e419ca25 100644 --- a/examples/mcore/mistral/data_convert_mistral_instruction.sh +++ b/examples/mcore/mistral/data_convert_mistral_instruction.sh @@ -1,4 +1,4 @@ -# Alpaca数据集下载链接: https://huggingface.co/datasets/tatsu-lab/alpaca +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 Alpaca 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset diff --git a/examples/qwen/data_convert_qwen_instruction.sh b/examples/qwen/data_convert_qwen_instruction.sh index ded77c63b..b156b2bba 100644 --- a/examples/qwen/data_convert_qwen_instruction.sh +++ b/examples/qwen/data_convert_qwen_instruction.sh @@ -1,4 +1,4 @@ -# Sharegpt数据集下载链接: https://huggingface.co/datasets/shibing624/roleplay-zh-sharegpt-gpt4-data +# 请根据 examples/README.md 下 “数据集准备及处理” 章节下载 ShareGPT 数据集 # 请按照您的真实环境修改 set_env.sh 路径 source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset -- Gitee From d5d295c570c7b8fe2431c9fcda70c76ba80c3cff Mon Sep 17 00:00:00 2001 From: RuanZhiXiang Date: Wed, 25 Sep 2024 08:48:33 +0000 Subject: [PATCH 06/32] !1715 [1.2] add swap-attention ST and bugfix validation of enable-recompute-pp-rank Merge pull request !1715 from RuanZhiXiang/1.2-vpp-pp2-enable --- modellink/arguments.py | 5 +- tests/README.md | 12 +- .../llama2_tp2_pp4_vpp2_swap.json | 78 +++++++++++ .../shell_scripts/llama2_tp2_pp4_vpp2_swap.sh | 127 ++++++++++++++++++ 4 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json create mode 100644 tests/st/shell_scripts/llama2_tp2_pp4_vpp2_swap.sh diff --git a/modellink/arguments.py b/modellink/arguments.py index 4afd80bf4..8ed1979b3 100644 --- a/modellink/arguments.py +++ b/modellink/arguments.py @@ -579,7 +579,10 @@ def _validate_recompute_args(args): validate re-computation arguments. """ enable_pp_vpp = args.num_layers_per_virtual_pipeline_stage - enable_recomputation = args.recompute_granularity is not None and args.recompute_method == 'block' + enable_vanilla_recomputation = args.recompute_granularity is not None and args.recompute_method == 'block' + enable_swap = args.swap_attention + enable_recompute_activation = args.recompute_activation_function + enable_recomputation = enable_vanilla_recomputation or enable_swap or enable_recompute_activation if args.enable_recompute_layers_per_pp_rank and not (enable_pp_vpp and enable_recomputation): raise AssertionError("enable-recompute-layers-per-pp-rank should be works with pipeline and virtual pipeline, when enabling re-computation.") diff --git a/tests/README.md b/tests/README.md index 2fb717aba..dfccd1f23 100644 --- a/tests/README.md +++ b/tests/README.md @@ -13,8 +13,8 @@ Mem. - ST - Pretrain + ST + Pretrain Mcore TP,PP,VPP,重计算,enable_recompute_layers_per_pp_rank llama2_tp2_pp4_vpp2.sh @@ -30,6 +30,14 @@ Y Y + + Mcore + swap_attention,recompute_activation_function,enable_recompute_layers_per_pp_rank,reuse_fp32_param + llama2_tp2_pp4_vpp2_swap.sh + Y + Y + Y + Mcore glm_rope, rotary_percent diff --git a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json new file mode 100644 index 000000000..30552503d --- /dev/null +++ b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json @@ -0,0 +1,78 @@ +{ + "lm loss": [ + 1.47164, + 1.46151, + 1.47107, + 1.442299, + 1.427286, + 1.406914, + 1.388342, + 1.362971, + 1.361994, + 1.286056, + 1.288965, + 1.297596, + 1.289588, + 1.286705, + 1.269324 + ], + "throughput": [ + 50.4, + 106.9, + 106.5, + 106.0, + 106.3, + 105.7, + 106.5, + 106.4, + 106.1, + 106.8, + 106.2, + 106.3, + 106.6, + 106.4, + 106.5 + ], + "memo info": [ + { + "rank": 0, + "allocated memory": 13485.05908203125, + "max allocated memory": 13735.0615234375 + }, + { + "rank": 1, + "allocated memory": 13485.05908203125, + "max allocated memory": 13735.0615234375 + }, + { + "rank": 2, + "allocated memory": 12517.05908203125, + "max allocated memory": 12689.0615234375 + }, + { + "rank": 3, + "allocated memory": 12517.05908203125, + "max allocated memory": 12689.0615234375 + }, + { + "rank": 4, + "allocated memory": 12517.05908203125, + "max allocated memory": 12689.0615234375 + }, + { + "rank": 5, + "allocated memory": 12517.05908203125, + "max allocated memory": 12689.0615234375 + }, + { + "rank": 6, + "allocated memory": 13517.12451171875, + "max allocated memory": 13767.1416015625 + }, + { + "rank": 7, + "allocated memory": 13517.12451171875, + "max allocated memory": 13767.1416015625 + } + ] +} \ No newline at end of file diff --git a/tests/st/shell_scripts/llama2_tp2_pp4_vpp2_swap.sh b/tests/st/shell_scripts/llama2_tp2_pp4_vpp2_swap.sh new file mode 100644 index 000000000..a2d725817 --- /dev/null +++ b/tests/st/shell_scripts/llama2_tp2_pp4_vpp2_swap.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# A test case for swap attention, re-compute activation function and reuse fp32 param. + +export CUDA_DEVICE_MAX_CONNECTIONS=1 +export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True + +NPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6079 +NNODES=1 +NODE_RANK=0 +WORLD_SIZE=$(($NPUS_PER_NODE*$NNODES)) + +basepath=$(cd `dirname $0`; cd ../../../; pwd) + +CKPT_SAVE_DIR=/data/ckpt +CKPT_LOAD_DIR=/data/ci/llama-2-7b-mg-tp2-pp4-mcore-vpp2-test +DATA_PATH=/data/pretrain_dataset/alpaca_text_document +TOKENIZER_MODEL=/data/llama-2-7b-hf/tokenizer.model +TP=2 +PP=4 +VPP=2 + +DISTRIBUTED_ARGS=( + --nproc_per_node $NPUS_PER_NODE + --nnodes $NNODES + --node_rank $NODE_RANK + --master_addr $MASTER_ADDR + --master_port $MASTER_PORT +) + + +ACCELERATE_ARGS=( + --recompute-activation-function + --recompute-num-layers 1 + --swap-attention + --reuse-fp32-param + --enable-recompute-layers-per-pp-rank +) + + +DIST_ALGO=( + --tensor-model-parallel-size ${TP} + --pipeline-model-parallel-size ${PP} + --num-layers-per-virtual-pipeline-stage ${VPP} + --sequence-parallel +) + + +MODEL_ARGS=( + --use-mcore-models + --transformer-impl local + --num-layers 32 + --hidden-size 4096 + --ffn-hidden-size 11008 + --num-attention-heads 32 + --seq-length 4096 + --max-position-embeddings 4096 +) + +TRAINING_ARGS=( + --tokenizer-type Llama2Tokenizer + --tokenizer-model ${TOKENIZER_MODEL} + --micro-batch-size 1 + --global-batch-size 32 + --make-vocab-size-divisible-by 1 + --lr 1.25e-6 + --train-iters 15 + --lr-decay-style cosine + --untie-embeddings-and-output-weights + --disable-bias-linear + --attention-dropout 0.0 + --init-method-std 0.01 + --hidden-dropout 0.0 + --position-embedding-type rope + --normalization RMSNorm + --use-fused-rmsnorm + --swiglu + --use-flash-attn + --no-masked-softmax-fusion + --attention-softmax-in-fp32 + --min-lr 1.25e-7 + --weight-decay 1e-1 + --lr-warmup-fraction 0.01 + --clip-grad 1.0 + --adam-beta1 0.9 + --initial-loss-scale 65536 + --adam-beta2 0.95 + --no-gradient-accumulation-fusion + --no-load-optim + --no-load-rng + --use-fused-swiglu + --use-fused-rotary-pos-emb + --overlap-grad-reduce + --bf16 + --use-distributed-optimizer +) + +DATA_ARGS=( + --data-path $DATA_PATH + --split 949,50,1 +) + +OUTPUT_ARGS=( + --log-interval 1 + --save-interval 10000 + --eval-interval 1000 + --eval-iters 1 + --no-load-optim + --no-load-rng + --no-save-optim + --no-save-rng + --load ${CKPT_LOAD_DIR} + --save ${CKPT_SAVE_DIR} +) + + +torchrun ${DISTRIBUTED_ARGS[@]} $basepath/pretrain_gpt.py \ + ${DIST_ALGO[@]} \ + ${MODEL_ARGS[@]} \ + ${TRAINING_ARGS[@]} \ + ${ACCELERATE_ARGS[@]} \ + ${DATA_ARGS[@]} \ + ${OUTPUT_ARGS[@]} \ + --finetune \ + --log-throughput \ + --distributed-backend nccl -- Gitee From 0a8a2f2bcc3bb1d8e4303958f2a3047886a944ef Mon Sep 17 00:00:00 2001 From: wucong Date: Wed, 9 Oct 2024 12:19:57 +0000 Subject: [PATCH 07/32] =?UTF-8?q?!1751=20=E6=B7=BB=E5=8A=A0readme=E5=BE=AE?= =?UTF-8?q?=E8=B0=83=E6=80=A7=E8=83=BD=E6=95=B0=E6=8D=AE=20Merge=20pull=20?= =?UTF-8?q?request=20!1751=20from=20wucong/adddata1=5F2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 52 +++++++++---------- .../data_convert_chatglm3_instruction.sh | 2 +- .../llama3/data_convert_llama3_instruction.sh | 2 +- .../llama2/data_convert_llama2_instruction.sh | 2 +- 4 files changed, 27 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 270d762f7..0dca4d8ca 100644 --- a/README.md +++ b/README.md @@ -1244,19 +1244,15 @@ ModelLink预训练支持张量并行、流水线并行等多种加速算法和 ModelLink支持指令微调,方案与DeepSpeed统一,在微调效果保持一致的前提下,ModelLink可以表现出优异性能 【与DeepSpeed微调Loss对比】 - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + +
Llama2-7b模型与DeepSpeed微调5个epoch后的loss对比图 - -

-
Qwen-7b模型与DeepSpeed微调4个epoch后的loss对比图 - + + + + - - +
Llama2-7b模型与DeepSpeed微调5个epoch后的loss对比图 +

+
+Qwen-7b模型与DeepSpeed微调4个epoch后的loss对比图

-
@@ -1358,22 +1354,22 @@ My soul is full and my heart does soep.
llama2-7b llama2------
qwen-7bqwen------dynamic45.7dynamic40.4dynamic46.5
llama2-13bllama2dynamic28.4dynamic17.8dynamic24.9
diff --git a/examples/chatglm3/data_convert_chatglm3_instruction.sh b/examples/chatglm3/data_convert_chatglm3_instruction.sh index ae3d2d7fb..37e9805a3 100644 --- a/examples/chatglm3/data_convert_chatglm3_instruction.sh +++ b/examples/chatglm3/data_convert_chatglm3_instruction.sh @@ -4,7 +4,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset python ./preprocess_data.py \ - --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ + --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ --tokenizer-name-or-path ./model_from_hf/Chatglm3-hf/ \ --output-prefix ./finetune_dataset/alpaca \ --workers 4 \ diff --git a/examples/llama3/data_convert_llama3_instruction.sh b/examples/llama3/data_convert_llama3_instruction.sh index ebaad7195..faa10de76 100644 --- a/examples/llama3/data_convert_llama3_instruction.sh +++ b/examples/llama3/data_convert_llama3_instruction.sh @@ -4,7 +4,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset python ./preprocess_data.py \ - --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ + --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ --tokenizer-name-or-path ./model_from_hf/Llama3-hf/ \ --output-prefix ./finetune_dataset/alpaca \ --workers 4 \ diff --git a/examples/mcore/llama2/data_convert_llama2_instruction.sh b/examples/mcore/llama2/data_convert_llama2_instruction.sh index 0afa0e5bd..97f2a3a0e 100644 --- a/examples/mcore/llama2/data_convert_llama2_instruction.sh +++ b/examples/mcore/llama2/data_convert_llama2_instruction.sh @@ -4,7 +4,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset python ./preprocess_data.py \ - --input ./dataset/train-00000-of-00042-d964455e17e96d5a.parquet \ + --input ./dataset/train-00000-of-00001-a09b74b3ef9c3b56.parquet \ --tokenizer-name-or-path ./model_from_hf/Llama2-hf/ \ --output-prefix ./finetune_dataset/alpaca \ --workers 4 \ -- Gitee From 41d4c508dea223c0c30dbdabfca7af0d4c00aea5 Mon Sep 17 00:00:00 2001 From: fengliangjun Date: Fri, 11 Oct 2024 06:22:13 +0000 Subject: [PATCH 08/32] update tests/README.md. Signed-off-by: fengliangjun --- tests/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/README.md b/tests/README.md index dfccd1f23..5e0a26986 100644 --- a/tests/README.md +++ b/tests/README.md @@ -164,12 +164,14 @@ + ring_attn test_ringattn_context_parallel.py Y + ulysses test_ulysses_context_parallel.py Y -- Gitee From bb23fc6553016716c90f01281fb69edc2088578b Mon Sep 17 00:00:00 2001 From: RuanZhiXiang Date: Sat, 12 Oct 2024 06:14:24 +0000 Subject: [PATCH 09/32] =?UTF-8?q?!1775=20[1.0.RC3]=20docs:=20=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E5=95=86=E5=88=86=E5=88=86=E6=94=AF=E5=90=8D=20Merge?= =?UTF-8?q?=20pull=20request=20!1775=20from=20RuanZhiXiang/change-name-1.0?= =?UTF-8?q?.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0dca4d8ca..1d0cc3b70 100644 --- a/README.md +++ b/README.md @@ -52,9 +52,9 @@ ModelLink已发布版本维护策略: | **ModelLink版本** | **维护策略** | **当前状态** | **发布时间** | **后续状态** | **EOL日期** | |-----------------|-----------|--------|------------|-----------------------|-----------| | bk_origin_23 | Demo | EOL | 2023 | 生命周期终止 | 2024/6/30 | -| 1.0 | 常规版本 | 维护 | 2024/03/30 | 预计2024/9/30起无维护 | | -| 1.1 | 常规版本 | 维护 | 2024/06/30 | 预计2024/12/30起无维护 | | -| 1.2 | 常规版本 | 维护 | 2024/09/30 | 预计2025/03/30起无维护 | | +| 1.0.RC1 | 常规版本 | 维护 | 2024/03/30 | 预计2024/9/30起无维护 | | +| 1.0.RC2 | 常规版本 | 维护 | 2024/06/30 | 预计2024/12/30起无维护 | | +| 1.0.RC3 | 常规版本 | 维护 | 2024/09/30 | 预计2025/03/30起无维护 | | --- ## 配套版本与支持模型 -- Gitee From 7d7cc09332de02a5c554e87827b669bba552919e Mon Sep 17 00:00:00 2001 From: glhyy Date: Mon, 14 Oct 2024 11:12:39 +0000 Subject: [PATCH 10/32] =?UTF-8?q?!1769=20fix=EF=BC=9A=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E6=9D=83=E9=87=8D=E8=BD=AC=E6=8D=A2fc1=E9=87=8D=E5=A4=8D?= =?UTF-8?q?=E8=8E=B7=E5=8F=96bug,=201.0.RC3=20Merge=20pull=20request=20!17?= =?UTF-8?q?69=20from=20glhyy/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/bloom/ckpt_convert_bloom_hf2legacy.sh | 4 ++-- modellink/tasks/checkpoint/loader_hf.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/bloom/ckpt_convert_bloom_hf2legacy.sh b/examples/bloom/ckpt_convert_bloom_hf2legacy.sh index 3b50abe7c..4e84654bb 100644 --- a/examples/bloom/ckpt_convert_bloom_hf2legacy.sh +++ b/examples/bloom/ckpt_convert_bloom_hf2legacy.sh @@ -8,8 +8,8 @@ python convert_ckpt.py \ --save-model-type mg \ --target-tensor-parallel-size 8 \ --target-pipeline-parallel-size 1 \ - --load-dir --load-dir ./model_from_hf/Bloom-hf/ \ - --save-dir --save-dir ./model_weights/Bloom-legacy/ \ + --load-dir ./model_from_hf/Bloom-hf/ \ + --save-dir ./model_weights/Bloom-legacy/ \ --tokenizer-model None \ --model-type-hf bloom \ --add-qkv-bias \ diff --git a/modellink/tasks/checkpoint/loader_hf.py b/modellink/tasks/checkpoint/loader_hf.py index 1a6bde86f..9e47d84bb 100644 --- a/modellink/tasks/checkpoint/loader_hf.py +++ b/modellink/tasks/checkpoint/loader_hf.py @@ -203,8 +203,6 @@ def _get_message_layer_mlp(message, model, layer_idx, md=None, tp_size=1, is_moe else: mlp_l0_bias.append(model.get_layers_mlp_linear_fc1_bias(layer_idx=layer_idx, **kwargs)) - if md.linear_bias: - mlp_l0_bias.append(model.get_layers_mlp_linear_fc1_bias(layer_idx=layer_idx, **kwargs)) # Handle gated linear units. if md.swiglu: # Concat all the first halves ('W's) and all the second halves ('V's). -- Gitee From 31100602dc2b420e9c323963fbaaf2e212d5eaf0 Mon Sep 17 00:00:00 2001 From: wucong Date: Wed, 16 Oct 2024 09:29:19 +0000 Subject: [PATCH 11/32] =?UTF-8?q?!1787=20=E4=BF=AE=E6=94=B9readme->RC3=20M?= =?UTF-8?q?erge=20pull=20request=20!1787=20from=20wucong/fixReadRc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- examples/README.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1d0cc3b70..0f348b3d3 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ ModelLink已发布版本维护策略: | Driver | Ascend HDK 24.1.RC3 | | Firmware | Ascend HDK 24.1.RC3 | | CANN | CANN 8.0.RC3 | -| Torch | 2.1.0、2.2.0 | +| Torch | 2.1.0 | | Torch_npu | release v6.0.RC3 | 【预训练集群性能与线性度】 diff --git a/examples/README.md b/examples/README.md index 709bce526..6e7715746 100644 --- a/examples/README.md +++ b/examples/README.md @@ -45,7 +45,7 @@ | Driver | 在研版本 | | Firmware | 在研版本 | | CANN | 在研版本 | -| Torch | 2.1.0、2.2.0 | +| Torch | 2.1.0 | | Torch_npu | 在研版本 | @@ -53,6 +53,7 @@ ```shell git clone https://gitee.com/ascend/ModelLink.git + git checkout 1.0.RC3 git clone https://github.com/NVIDIA/Megatron-LM.git cd Megatron-LM git checkout core_r0.6.0 -- Gitee From c7a76cbb75e3828d68ff9a3beeaef7967a80186e Mon Sep 17 00:00:00 2001 From: wucong Date: Tue, 29 Oct 2024 11:08:23 +0000 Subject: [PATCH 12/32] =?UTF-8?q?!1823=20=E4=BF=AE=E5=A4=8Dreadme=E8=B5=84?= =?UTF-8?q?=E6=96=99=E9=94=99=E8=AF=AF=20->=20RC3=20Merge=20pull=20request?= =?UTF-8?q?=20!1823=20from=20wucong/fix=5Fdts1=5F0rc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- examples/README.md | 21 +++++++++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0f348b3d3..6bcd775bc 100644 --- a/README.md +++ b/README.md @@ -917,7 +917,7 @@ ModelLink支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重 专家并行 - --expert-model-parallel-size + --target-expert-model-parallel-size @@ -1029,7 +1029,7 @@ ModelLink支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重 专家并行 - --expert-model-parallel-size + --target-expert-model-parallel-size 流水并行动态划分 diff --git a/examples/README.md b/examples/README.md index 6e7715746..9bda280c6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -197,10 +197,10 @@ bash examples/llama2/ckpt_convert_llama2_hf2legacy.sh ModelLink Huggingface到Megatron-Mcore权重转换脚本命名风格及启动方法为: ```shell -# 命名及启动:bash examples/model_name/ckpt_convert_xxx_hf2mcore.sh +# 命名及启动:bash examples/mcore/model_name/ckpt_convert_xxx_hf2mcore.sh # 需要配置并行参数以及权重词表加载保存等路径 -bash examples/llama2/ckpt_convert_llama2_hf2mcore.sh +bash examples/mcore/llama2/ckpt_convert_llama2_hf2mcore.sh ``` ##### 2.2 Megatron-LM权重转换到Huggingface格式 @@ -233,10 +233,10 @@ bash examples/llama2/ckpt_convert_llama2_legacy2hf.sh ModelLink Megatron-Mcore到Huggingface的权重转换脚本命名风格及启动方法为: ```shell -# 命名及启动:bash examples/model_name/ckpt_convert_xxx_mcore2hf.sh +# 命名及启动:bash examples/mcore/model_name/ckpt_convert_xxx_mcore2hf.sh # 需要配置并行参数以及权重词表加载保存等路径 -bash examples/llama2/ckpt_convert_llama2_mcore2hf.sh +bash examples/mcore/llama2/ckpt_convert_llama2_mcore2hf.sh ``` ##### 2.3 Megatron-LM格式权重互转 @@ -313,6 +313,19 @@ mcore转legacy时设置此参数以指定保存权重格式为legacy --lora-target-modules query_key_value dense dense_h_to_4h dense_4h_to_h \ ``` +【lora-r】 + +`--lora_r`参数指的是LoRA中的秩(rank),它决定了低秩矩阵的大小。 + +【--lora-alpha】 + +`--lora_alpha`参数定义了LoRA适应的学习率缩放因子。这个参数影响了低秩矩阵的更新速度。 + +【--lora-target-modules】 + +`--lora-target-modules`定义了Lora目标模块,字符串列表,由空格隔开,无默认值。每一个字符串是需要进行LoRA微调的层的名称。 + + 【合并后转换为Megatron-Legacy权重】 ```shell -- Gitee From bdbe19c0d7ae2df8a2b4bad518dc6c647d66bb15 Mon Sep 17 00:00:00 2001 From: RuanZhiXiang Date: Wed, 6 Nov 2024 03:19:24 +0000 Subject: [PATCH 13/32] !1757 [1.0.RC3] fix bug when CP_Ulysses+GQA+KV_Interleave Merge pull request !1757 from RuanZhiXiang/test-cp-hybrid-1.0.rc3 --- .../core/transformer/dot_product_attention.py | 27 ++-- modellink/patchs/megatron_patch.py | 4 +- tests/README.md | 14 ++- .../st/baseline_results/chatglm3_gqa_cp8.json | 78 ++++++++++++ .../llama2_tp2_pp4_vpp2_swap.json | 30 ++--- .../mixtral_tp1_pp4_ep2_drop_dpp.json | 30 ++--- tests/st/shell_scripts/chatglm3_gqa_cp8.sh | 118 ++++++++++++++++++ .../test_ulysses_context_parallel.py | 2 + tests/ut/evaluation/test_evaluate.py | 2 +- tests/ut/inference/test_inference.py | 6 +- 10 files changed, 266 insertions(+), 45 deletions(-) create mode 100644 tests/st/baseline_results/chatglm3_gqa_cp8.json create mode 100644 tests/st/shell_scripts/chatglm3_gqa_cp8.sh diff --git a/modellink/core/transformer/dot_product_attention.py b/modellink/core/transformer/dot_product_attention.py index 6c760c744..e61ab50ad 100644 --- a/modellink/core/transformer/dot_product_attention.py +++ b/modellink/core/transformer/dot_product_attention.py @@ -116,6 +116,24 @@ def get_alibi(self, seq_length): self.alibi.alibi = alibi +def ulysses_context_parallel_forward_wrapper(fn): + """ + Do repeat KV to support GQA+Ulysses. This wrapper would be remove if mindspeed-core support ulysses+GQA. + """ + @wraps(fn) + def wrapper(self, query: Tensor, key: Tensor, value: Tensor, *args, **kwargs): + heads_per_gqa_group = self.local_attn.num_attention_heads_per_partition // self.local_attn.num_query_groups_per_partition + global_args = get_args() + should_kv_repeat_before_uly = global_args.use_flash_attn and global_args.kv_head_repeat_before_uly_alltoall + + if heads_per_gqa_group > 1 and should_kv_repeat_before_uly: + key = key.repeat_interleave(heads_per_gqa_group, dim=2) + value = value.repeat_interleave(heads_per_gqa_group, dim=2) + + return fn(self, query, key, value, *args, **kwargs) + return wrapper + + def dot_product_attention_forward_wrapper(fn): @wraps(fn) def wrapper(self, query, key, value, attention_mask, attn_mask_type, packed_seq_params): @@ -135,19 +153,14 @@ def dot_product_attention_forward_wrapper(fn): args = get_args() heads_per_gqa_group = self.num_attention_heads_per_partition // self.num_query_groups_per_partition - if not args.use_flash_attn: if heads_per_gqa_group > 1: key = key.repeat_interleave(heads_per_gqa_group, dim=2) value = value.repeat_interleave(heads_per_gqa_group, dim=2) else: - # Do repeat KV to support GQA+Ulysses and PFA - should_kv_repeat_before_uly = args.context_parallel_size > 1 and \ - args.context_parallel_algo in ['ulysses_cp_algo', 'hybrid_cp_algo'] and \ - args.kv_head_repeat_before_uly_alltoall + # Do repeat KV to support PFA should_kv_repeat_before_pfa = hasattr(args, 'use_kv_cache') and args.use_kv_cache - - if heads_per_gqa_group > 1 and (should_kv_repeat_before_uly or should_kv_repeat_before_pfa): + if heads_per_gqa_group > 1 and should_kv_repeat_before_pfa: key = key.repeat_interleave(heads_per_gqa_group, dim=2) value = value.repeat_interleave(heads_per_gqa_group, dim=2) diff --git a/modellink/patchs/megatron_patch.py b/modellink/patchs/megatron_patch.py index 3beeba61e..3acd6f510 100644 --- a/modellink/patchs/megatron_patch.py +++ b/modellink/patchs/megatron_patch.py @@ -132,7 +132,7 @@ def patch_core_models(args): from ..core import rotary_embedding_forward, apply_rotary_pos_emb_bshd from ..core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec_wrapper from ..core.transformer.dot_product_attention import dot_product_attention_init_wrapper, \ - dot_product_attention_forward_wrapper + dot_product_attention_forward_wrapper, ulysses_context_parallel_forward_wrapper from ..core.transformer.attention import attention_init_wrapper from ..core.models.gpt.gpt_model import gpt_model_init_wrapper from ..core import rotary_embedding_init_wrapper @@ -150,6 +150,8 @@ def patch_core_models(args): PatchManager.register_patch('megatron.core.transformer.dot_product_attention.DotProductAttention.forward', dot_product_attention_forward_wrapper) PatchManager.register_patch('megatron.core.transformer.custom_layers.transformer_engine.TEDotProductAttention.__init__', dot_product_attention_init_wrapper) PatchManager.register_patch('megatron.core.transformer.custom_layers.transformer_engine.TEDotProductAttention.forward', dot_product_attention_forward_wrapper) + # For GQA in ulysses and hybrid + PatchManager.register_patch('mindspeed.core.context_parallel.ulysses_context_parallel.UlyssesContextAttention.forward', ulysses_context_parallel_forward_wrapper) # Layer Definition # For NPU, we use local-mcore-structrue in te layer. diff --git a/tests/README.md b/tests/README.md index 5e0a26986..257e2de39 100644 --- a/tests/README.md +++ b/tests/README.md @@ -13,8 +13,8 @@ Mem. - ST - Pretrain + ST + Pretrain Mcore TP,PP,VPP,重计算,enable_recompute_layers_per_pp_rank llama2_tp2_pp4_vpp2.sh @@ -24,12 +24,20 @@ Mcore - CP,分布式优化器,reuse_fp32_param,recompute_activation_function, fused_rmsnorm,fused_swiglu,fused_rope,overlap_grad_reduce, overlap_param_gather + cp_ring,分布式优化器,reuse_fp32_param,recompute_activation_function,fused_rmsnorm,fused_swiglu,fused_rope,overlap_grad_reduce, overlap_param_gather llama2_tp2_cp4_mem_recompute.sh Y Y Y + + Mcore + cp_hybrid,gqa + chatglm3_gqa_cp8.sh + Y + Y + Y + Mcore swap_attention,recompute_activation_function,enable_recompute_layers_per_pp_rank,reuse_fp32_param diff --git a/tests/st/baseline_results/chatglm3_gqa_cp8.json b/tests/st/baseline_results/chatglm3_gqa_cp8.json new file mode 100644 index 000000000..200958907 --- /dev/null +++ b/tests/st/baseline_results/chatglm3_gqa_cp8.json @@ -0,0 +1,78 @@ +{ + "lm loss": [ + 9.594278, + 9.577759, + 9.465339, + 9.230533, + 9.159581, + 9.091681, + 8.877193, + 8.776001, + 8.755611, + 8.634124, + 8.602496, + 8.540811, + 8.529873, + 8.529889, + 8.538946 + ], + "throughput": [ + 81.0, + 188.6, + 191.3, + 191.1, + 190.8, + 189.7, + 190.0, + 189.8, + 189.4, + 189.5, + 189.1, + 188.6, + 188.4, + 188.0, + 188.0 + ], + "memo info": [ + { + "rank": 0, + "allocated memory": 5386.2021484375, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 1, + "allocated memory": 5386.1884765625, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 2, + "allocated memory": 5386.1865234375, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 3, + "allocated memory": 5386.1904296875, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 4, + "allocated memory": 5386.1865234375, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 5, + "allocated memory": 5386.1865234375, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 6, + "allocated memory": 5386.1865234375, + "max allocated memory": 29104.03466796875 + }, + { + "rank": 7, + "allocated memory": 5386.1865234375, + "max allocated memory": 29104.03466796875 + } + ] +} diff --git a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json index 30552503d..12f7f1c5a 100644 --- a/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json +++ b/tests/st/baseline_results/llama2_tp2_pp4_vpp2_swap.json @@ -17,21 +17,21 @@ 1.269324 ], "throughput": [ - 50.4, - 106.9, - 106.5, - 106.0, - 106.3, - 105.7, - 106.5, - 106.4, - 106.1, - 106.8, - 106.2, - 106.3, - 106.6, - 106.4, - 106.5 + 51.4, + 93.4, + 93.6, + 93.2, + 93.3, + 93.7, + 93.1, + 93.9, + 93.5, + 93.8, + 93.3, + 93.1, + 93.0, + 93.4, + 93.8 ], "memo info": [ { diff --git a/tests/st/baseline_results/mixtral_tp1_pp4_ep2_drop_dpp.json b/tests/st/baseline_results/mixtral_tp1_pp4_ep2_drop_dpp.json index 8550835bd..89d393dcd 100644 --- a/tests/st/baseline_results/mixtral_tp1_pp4_ep2_drop_dpp.json +++ b/tests/st/baseline_results/mixtral_tp1_pp4_ep2_drop_dpp.json @@ -1,20 +1,20 @@ { "lm loss": [ - 14.14179, - 14.11266, - 13.25863, - 11.26134, - 8.678722, - 8.656157, - 7.504652, - 7.164732, - 6.634584, - 6.487458, - 6.295924, - 5.948122, - 5.95405, - 5.73897, - 5.644468 + 14.14042, + 14.11082, + 13.25441, + 11.26227, + 8.679944, + 8.709602, + 7.502835, + 7.138363, + 6.601542, + 6.470576, + 6.224497, + 5.946918, + 5.930299, + 5.696367, + 5.602035 ], "throughput": [ 10.4, diff --git a/tests/st/shell_scripts/chatglm3_gqa_cp8.sh b/tests/st/shell_scripts/chatglm3_gqa_cp8.sh new file mode 100644 index 000000000..23e2bc6c2 --- /dev/null +++ b/tests/st/shell_scripts/chatglm3_gqa_cp8.sh @@ -0,0 +1,118 @@ +#!/bin/bash +export CUDA_DEVICE_MAX_CONNECTIONS=1 + +NPUS_PER_NODE=8 +MASTER_ADDR=localhost +MASTER_PORT=6001 +NNODES=1 +NODE_RANK=0 +WORLD_SIZE=$((NPUS_PER_NODE*$NNODES)) + +basepath=$(cd `dirname $0`; cd ../../../; pwd) + +CKPT_SAVE_DIR=/data/ckpt +DATA_PATH=/data/chatglm3-dataset-alpaca/alpaca_text_document +TOKENIZER_PATH=/data/chatglm3-6b-base-hf/ +CKPT_LOAD_DIR=/data/chatglm3-6b-tp1-pp1-cp8/ + +TP=1 +PP=1 +CP=8 +MBS=1 +GBS=8 +SEQ_LEN=65536 +CP_ALGO=hybrid_cp_algo + +DISTRIBUTED_ARGS=" + --nproc_per_node $NPUS_PER_NODE \ + --nnodes $NNODES \ + --node_rank $NODE_RANK \ + --master_addr $MASTER_ADDR \ + --master_port $MASTER_PORT +" + +GPT_ARGS=" + --use-mcore-models \ + --transformer-impl local \ + --tensor-model-parallel-size ${TP} \ + --pipeline-model-parallel-size ${PP} \ + --sequence-parallel \ + --num-layers 2 \ + --hidden-size 4096 \ + --ffn-hidden-size 13696 \ + --num-attention-heads 32 \ + --ulysses-degree-in-cp 4 \ + --seq-length ${SEQ_LEN} \ + --micro-batch-size ${MBS} \ + --global-batch-size ${GBS} \ + --context-parallel-algo ${CP_ALGO} \ + --context-parallel-size ${CP} \ + --max-position-embeddings ${SEQ_LEN} \ + --padded-vocab-size 65024 \ + --make-vocab-size-divisible-by 1 \ + --group-query-attention \ + --num-query-groups 2 \ + --disable-bias-linear \ + --add-qkv-bias \ + --position-embedding-type rope \ + --no-rope-fusion \ + --use-distributed-optimizer \ + --use-glm-rope \ + --rotary-percent 0.5 \ + --use-flash-attn \ + --use-fused-rmsnorm \ + --use-fused-swiglu \ + --normalization RMSNorm \ + --swiglu \ + --no-create-attention-mask-in-dataloader \ + --tokenizer-type PretrainedFromHF \ + --tokenizer-name-or-path ${TOKENIZER_PATH} \ + --lr 1e-6 \ + --train-iters 15 \ + --lr-decay-style cosine \ + --untie-embeddings-and-output-weights \ + --attention-dropout 0.0 \ + --init-method-std 0.01 \ + --hidden-dropout 0.0 \ + --no-masked-softmax-fusion \ + --attention-softmax-in-fp32 \ + --min-lr 1e-8 \ + --weight-decay 1e-1 \ + --lr-warmup-fraction 0.01 \ + --clip-grad 1.0 \ + --adam-beta1 0.9 \ + --initial-loss-scale 512 \ + --adam-beta2 0.95 \ + --no-gradient-accumulation-fusion \ + --fp16 \ + --num-workers 1 \ + --kv-head-repeat-before-uly-alltoall \ + --no-shared-storage \ + --finetune \ + --log-throughput \ + --use-cp-send-recv-overlap \ + --overlap-grad-reduce \ + --overlap-param-gather \ +" + +DATA_ARGS=" + --data-path $DATA_PATH \ + --split 949,50,1 +" + +OUTPUT_ARGS=" + --log-interval 1 \ + --save-interval 15 \ + --eval-interval 15 \ + --eval-iters 10 \ + --no-load-optim \ + --no-load-rng \ + --save $CKPT_SAVE_DIR \ + --load $CKPT_LOAD_DIR \ +" + +torchrun $DISTRIBUTED_ARGS $basepath/pretrain_gpt.py \ + $GPT_ARGS \ + $DATA_ARGS \ + $OUTPUT_ARGS \ + --distributed-backend nccl diff --git a/tests/ut/dist_algo/context_parallel/test_ulysses_context_parallel.py b/tests/ut/dist_algo/context_parallel/test_ulysses_context_parallel.py index eb0227427..ddc9980c5 100644 --- a/tests/ut/dist_algo/context_parallel/test_ulysses_context_parallel.py +++ b/tests/ut/dist_algo/context_parallel/test_ulysses_context_parallel.py @@ -34,6 +34,8 @@ class FlashSelfAttention(torch.nn.Module): self.causal = causal self.softmax_scale = softmax_scale self.dropout_p = attention_dropout + self.num_attention_heads_per_partition = 1 + self.num_query_groups_per_partition = 1 def forward(self, q, k, v, attention_mask, head_num): """Implements the multihead softmax attention. diff --git a/tests/ut/evaluation/test_evaluate.py b/tests/ut/evaluation/test_evaluate.py index 8e5399cd8..b3b8f662b 100644 --- a/tests/ut/evaluation/test_evaluate.py +++ b/tests/ut/evaluation/test_evaluate.py @@ -86,7 +86,7 @@ class TestEvaluate(DistributedTest): print(log_capture) expected_score = acquire_score(log_capture) - assert math.isclose(expected_score, 0.5333, abs_tol=1e-2), f"score {expected_score}, forward pass has been changed, check it!" + assert math.isclose(expected_score, 0.5666, abs_tol=1e-2), f"score {expected_score}, forward pass has been changed, check it!" @pytest.mark.parametrize("params", test_config["test_qwen_prompt_ceval_evaluate"]) def test_qwen_prompt_ceval_evaluate(self, build_args, params): diff --git a/tests/ut/inference/test_inference.py b/tests/ut/inference/test_inference.py index ab633f005..d6bad984e 100644 --- a/tests/ut/inference/test_inference.py +++ b/tests/ut/inference/test_inference.py @@ -77,7 +77,7 @@ class TestInference(DistributedTest): print(log_capture) context = acquire_context(log_capture) assert [context] == [ - "I'm doing well. I'm in the middle of a 3-day weekend, so I'm enjoying that." + "I'm doing well. I'm in the middle of a 3-day weekend, so I'm enjoying the extra" ], "forward pass has been changed, check it!" @pytest.mark.parametrize("params", test_config["test_lora_greedy_search"]) @@ -92,5 +92,5 @@ class TestInference(DistributedTest): print(log_capture) context = acquire_context(log_capture) assert [context] == [ - "I'm doing well. I'm in the middle of a 3-day weekend, so I'm enjoying the extra time off." - ], "forward pass has been changed, check it!" \ No newline at end of file + "I'm doing well. I'm in the middle of a 3-day weekend, so I'm enjoying that." + ], "forward pass has been changed, check it!" -- Gitee From 817b500d7e63c99d3553e4ba897ed92496f5f145 Mon Sep 17 00:00:00 2001 From: wucong Date: Thu, 7 Nov 2024 11:42:21 +0000 Subject: [PATCH 14/32] =?UTF-8?q?!1857=20=E4=BF=AE=E6=94=B9readme=E5=88=87?= =?UTF-8?q?=E6=8D=A2=E5=88=86=E6=94=AF=E9=94=99=E8=AF=AF=20-=E3=80=8BRC3?= =?UTF-8?q?=20Merge=20pull=20request=20!1857=20from=20wucong/fix=5Fread?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 47 ++++++++++++++++++++++++++++++++++++++-------- examples/README.md | 2 +- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6bcd775bc..8f7e92840 100644 --- a/README.md +++ b/README.md @@ -61,14 +61,45 @@ ModelLink已发布版本维护策略: 【版本配套环境】 -| 软件 | [版本](https://www.hiascend.com/zh/) | -| :-----------------------: |:----------------------------------:| -| Python | 3.8 | -| Driver | Ascend HDK 24.1.RC3 | -| Firmware | Ascend HDK 24.1.RC3 | -| CANN | CANN 8.0.RC3 | -| Torch | 2.1.0 | -| Torch_npu | release v6.0.RC3 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
依赖软件版本软件安装指南推荐硬件形态
昇腾NPU驱动Ascend HDK 24.1.RC3驱动固件安装指南Atlas 900 A2 PODc
昇腾NPU固件
Toolkit(开发套件)CANN 8.0.RC3CANN 软件安装指南
Kernel(算子包)
PyTorchrelease v6.0.RC3Ascend Extension for PyTorch 配置与安装
torch_npu插件
apex
【预训练集群性能与线性度】 diff --git a/examples/README.md b/examples/README.md index 9bda280c6..9b43a94ec 100644 --- a/examples/README.md +++ b/examples/README.md @@ -53,13 +53,13 @@ ```shell git clone https://gitee.com/ascend/ModelLink.git - git checkout 1.0.RC3 git clone https://github.com/NVIDIA/Megatron-LM.git cd Megatron-LM git checkout core_r0.6.0 cp -r megatron ../ModelLink/ cd .. cd ModelLink + git checkout 1.0.RC3 mkdir logs mkdir model_from_hf mkdir dataset -- Gitee From cd6e9823ecb1e9a973ccbaca7d5dc2b8e38690ca Mon Sep 17 00:00:00 2001 From: wucong Date: Mon, 11 Nov 2024 09:05:59 +0000 Subject: [PATCH 15/32] =?UTF-8?q?!1881=20=E4=BF=AE=E6=94=B9readme=E7=8E=AF?= =?UTF-8?q?=E5=A2=83=E9=85=8D=E5=A5=97=E9=94=99=E8=AF=AF=20-=E3=80=8B1.0.R?= =?UTF-8?q?C3=20Merge=20pull=20request=20!1881=20from=20wucong/fixpeitao?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/README.md | 47 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/examples/README.md b/examples/README.md index 9b43a94ec..f2005c9b9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -39,14 +39,45 @@ 【模型开发时推荐使用配套的环境版本】 -| 软件 | [版本](https://www.hiascend.com/zh/) | -|:---------:|:----------------------------------:| -| Python | 3.8 | -| Driver | 在研版本 | -| Firmware | 在研版本 | -| CANN | 在研版本 | -| Torch | 2.1.0 | -| Torch_npu | 在研版本 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
依赖软件版本软件安装指南推荐硬件形态
昇腾NPU驱动Ascend HDK 24.1.RC3驱动固件安装指南Atlas 900 A2 PODc
昇腾NPU固件
Toolkit(开发套件)CANN 8.0.RC3CANN 软件安装指南
Kernel(算子包)
PyTorchrelease v6.0.RC3Ascend Extension for PyTorch 配置与安装
torch_npu插件
apex
#### 1. 仓库拉取 -- Gitee From ed8b056349de6f25c67149350355c532264173f9 Mon Sep 17 00:00:00 2001 From: zhangjianxiang Date: Mon, 11 Nov 2024 09:16:09 +0000 Subject: [PATCH 16/32] =?UTF-8?q?!1850=20=E4=B8=BB=E9=A1=B5readme=E7=89=B9?= =?UTF-8?q?=E6=80=A7=E5=88=97=E8=A1=A8=E5=A2=9E=E5=8A=A0=E8=AF=B4=E6=98=8E?= =?UTF-8?q?=E9=93=BE=E6=8E=A5=20Merge=20pull=20request=20!1850=20from=20zh?= =?UTF-8?q?angjianxiang/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 226 ++++++++++++++++++++++++------------------------------ 1 file changed, 99 insertions(+), 127 deletions(-) diff --git a/README.md b/README.md index 8f7e92840..0500e42d0 100644 --- a/README.md +++ b/README.md @@ -1079,187 +1079,159 @@ ModelLink支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重 ## 预训练加速算法与融合算子 -ModelLink预训练支持张量并行、流水线并行等多种加速算法和融合算子,下表为各种加速特性对应的使能开关: +ModelLink预训练支持张量并行、流水线并行等多种加速算法和融合算子: - + - + - - - - - + + + + + - - - - + + + + - - - - + + + + - - - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - - - - - + + + + - - - - - + + + + - - - - + + + + - - - - + + + + + - - - - - + + + + - - - - + + + + + - - - - - + + + + + - - - - + + + + - - - - + + + + - - - - + + + + + - - - - + + + + - - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + + - - - - + + + + - - - - - + + + + - - - - - - - - - - + + + +
使用场景场景 特性名称具体参数 Mcore Legacy贡献方
PTD并行张量并行--tensor-model-parallel-sizeYesYesSPTD并行张量并行【昇腾】
流水线并行--pipeline-model-parallel-sizeYesYes流水线并行【昇腾】
流水线并行动态划分--num-layer-listYesYes虚拟流水并行【昇腾】
虚拟流水并行--num-layers-per-virtual-pipeline-stageYesYes
序列并行--sequence-parallelYesYes
分布式优化器--use-distributed-optimizerYesYes序列并行【昇腾】
长序列并行长序列并行--context-parallel-sizeYesNo
多并行方案--context-parallel-algoYesNo
Send/recv掩盖加速--cp-send-recv-overlapYesNoAscend Ring Attention 长序列并行【昇腾】
MOEMOE专家并行--expert-model-parallel-sizeYesNoUlysses 长序列并行【昇腾】
MOE重排通信优化--moe-permutation-async-commYesNo混合长序列并行【昇腾】
GEMM--moe-grouped-gemmYesNoMOEMOE 专家并行【昇腾】
显存优化参数副本复用--reuse-fp32-paramYesYesMOE 重排通信优化【计算研究部】
激活函数重计算--recompute-activation-functionYesYes显存优化参数副本复用【计算算法部】
Swap Attention--swap-attentionYesYes
分布式优化器【昇腾】
重计算程度--recompute-granularityYesYesSwap Attention【计算研究部】
重计算层数--recompute-num-layersYesYes重计算【计算研究部】
重计算方法--recompute-methodYesYes融合算子Flash attention【昇腾】
PP-Stage重计算--enable-recompute-layers-per-pp-rankYesYesFused rmsnorm【昇腾】
融合算子Flash attention--use-flash-attnYesYesFused swiglu【昇腾】
Fused rmsnorm--use-fused-rmsnormYesYesFused rotary position embedding【昇腾】
Fused swiglu--use-fused-swigluYesYesGMM【昇腾】
Fused rotary position embedding--use-fused-rotary-pos-embYesYes通信掩盖梯度reduce通算掩盖【昇腾】
Sliding window attention--sliding-windowYesYesRecompute in advance【昇腾】
通信梯度reduce通算掩盖--overlap-grad-reduceYesYes权重all-gather通算掩盖【昇腾】
权重all-gather通算掩盖--overlap-param-gatherYesNo
MC2--use-mc2YesYesMC2【昇腾】
+--- + **注意事项** 1. 具体的预训练方法见[examples/README.md](./examples/README.md) -- Gitee From 497b84352d837edbf2ae8bac712ec9a480bc8444 Mon Sep 17 00:00:00 2001 From: MaHongyi <1053362531@qq.com> Date: Fri, 15 Nov 2024 00:51:57 +0000 Subject: [PATCH 17/32] =?UTF-8?q?!1896=20=E9=AB=98=E5=8F=AF=E7=94=A8?= =?UTF-8?q?=E7=89=B9=E6=80=A7=E8=AF=B4=E6=98=8E=E4=BF=AE=E6=AD=A3=20->=20R?= =?UTF-8?q?C3=20Merge=20pull=20request=20!1896=20from=20MaHongyi/1.0.RC3?= =?UTF-8?q?=5Freadme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0500e42d0..dc535e8c8 100644 --- a/README.md +++ b/README.md @@ -1539,24 +1539,22 @@ export HCCL_DETERMINISTIC=True ## 基于昇腾芯片的高可用特性 -分布式优化器的思想是通过将优化器状态均匀地分布在数据并行组中来节省内存。基于该思想,设计了将数据并行组切分成两个副本数据并行组的方案,副本优化器将优化器状态均匀分布在副本数据并行组,实现优化器状态均有备份。结合华为自研的高可用框架,可实现以下功能: -1. 训练过程中,支持故障场景保存临终checkpoint,训练结果0损失。 -2. 训练过程中,支持HBM的UCE故障检测,并完成在线修复,达到Step级重计算。 +分布式优化器的思想是通过将优化器状态均匀地分布在数据并行组中来节省内存。基于该思想,设计了将数据并行组切分成两个副本数据并行组的方案,副本优化器将优化器状态均匀分布在副本数据并行组,实现优化器状态均有备份。结合华为自研的高可用框架,可实现训练过程中,支持故障场景保存临终checkpoint,训练结果0损失。 -开启高可用特性时,副本优化器使用的静态内存有所增加,每个参数的理论字节数为(其中“d”是数据并行大小): + +开启高可用特性时,副本优化器使用的静态内存有所增加,每个参数的理论字节数为(其中“d”是数据并行大小,增长关系仅供参考): | | Non-distributed optim | Distributed optim | Replica optim | |----------------------------------| ------ | ------ |---------------| -| fp16/bf16 param, fp16/bf16 grads | 20 | 4 + 16/d | 4 + 32/d | -| fp16/bf16 param, fp32 grads | 18 | 6 + 12/d | Supporting | -| fp32 param, fp32 grads | 16 | 8 + 8/d | Supporting | +| fp16/bf16 param, fp16/bf16 grads | 20 | 4 + 16/d | 4 + 32/d | +| fp16/bf16 param, fp32 grads | 18 | 6 + 12/d | 6 + 24/d | +| fp32 param, fp32 grads | 16 | 8 + 8/d | 8 + 16/d | -- 启动命令中加入开关,并安装华为自研高可用框架mindio_ttp.whl -- mindio_ttp相关说明:https://www.hiascend.com/document/detail/zh/mindx-dl/60rc1/mindio/mindiottp +- 启动命令中加入开关,并安装华为自研高可用框架 [mindio_ttp.whl](https://www.hiascend.com/document/detail/zh/mindx-dl/60rc3/clusterscheduling/ref/mindiottp/mindiotft009.html) +- mindio_ttp相关说明:[MindIO TTP 官网介绍](https://www.hiascend.com/document/detail/zh/mindx-dl/60rc3/clusterscheduling/ref/mindiottp/mindiotft001.html) ```shell --enable-high-availability #使能高可用特性的总开关 ---enable-optimizer-state-local-copy #使能保存上一步优化器状态,内存会进一步增加,默认可关闭 ``` --- -- Gitee From f6df730441bddb390daf423a8bbccd42d7602e0c Mon Sep 17 00:00:00 2001 From: wucong Date: Fri, 22 Nov 2024 02:32:56 +0000 Subject: [PATCH 18/32] =?UTF-8?q?!1892=20=E4=BF=AE=E5=A4=8D=E5=BE=AE?= =?UTF-8?q?=E8=B0=83=E6=95=B0=E6=8D=AE=E9=95=BF=E5=BA=A6=E8=B6=85=E8=BF=87?= =?UTF-8?q?seq=5Flen=E5=AF=BC=E8=87=B4nan=E9=97=AE=E9=A2=98=20->=20RC3=20M?= =?UTF-8?q?erge=20pull=20request=20!1892=20from=20wucong/fixnan=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modellink/data/decoder_packed_mtf_dataset.py | 83 +++++++++++++++++--- 1 file changed, 70 insertions(+), 13 deletions(-) diff --git a/modellink/data/decoder_packed_mtf_dataset.py b/modellink/data/decoder_packed_mtf_dataset.py index 8b2278916..f3ba08134 100644 --- a/modellink/data/decoder_packed_mtf_dataset.py +++ b/modellink/data/decoder_packed_mtf_dataset.py @@ -164,14 +164,7 @@ class DecoderPackedMTFDataset(torch.utils.data.Dataset): item = self.mtf_dataset[doc_idx] if self.args.is_pairwise_dataset: - res = { - "chosen_input_ids": self._cut_token(item["chosen_input_ids"], np.int64), - "chosen_attention_mask": self._cut_token(item["chosen_attention_mask"], np.int64), - "chosen_labels": self._cut_token(item["chosen_labels"], np.int64), - "rejected_input_ids": self._cut_token(item["rejected_input_ids"], np.int64), - "rejected_attention_mask": self._cut_token(item["rejected_attention_mask"], np.int64), - "rejected_labels": self._cut_token(item["rejected_labels"], np.int64) - } + return self._cut_pairwise_token(item, np.int64) elif self.args.reset_position_ids: position_ids = self._get_reset_position_ids(torch.from_numpy(item['input_ids'])) return { @@ -181,13 +174,29 @@ class DecoderPackedMTFDataset(torch.utils.data.Dataset): "position_ids": self._cut_token(position_ids.numpy(), np.int64) } else: - res = { - "input_ids": self._cut_token(item["input_ids"], np.int64), - "attention_mask": self._cut_token(item["attention_mask"], np.int64), - "labels": self._cut_token(item["labels"], np.int64), - } + return self._cut_instruction_token(item, np.int64) + + + def _cut_instruction_token(self, item, dtype): + IGNORE_INDEX = -100 + prompt_length = (item["labels"] != IGNORE_INDEX).nonzero()[0][0] + prompt_ids = item["input_ids"][:prompt_length] + label_ids = item["labels"][prompt_length:] + source_len, target_len = _infer_seqlen( + len(prompt_ids), len(label_ids), self.seq_length + ) + prompt_ids = prompt_ids[:source_len] + label_ids = label_ids[:target_len] + input_ids = np.append(prompt_ids, label_ids) + labels = np.append(IGNORE_INDEX * np.ones(source_len), label_ids) + res = { + "input_ids": input_ids.astype(dtype), + "attention_mask": np.ones_like(input_ids).astype(dtype), + "labels": labels.astype(dtype) + } return res + def _cut_token(self, token, dtype): token_length = len(token) @@ -196,6 +205,54 @@ class DecoderPackedMTFDataset(torch.utils.data.Dataset): return token.astype(dtype) + def _cut_pairwise_token(self, item, dtype): + """Cut prompt and response proportionally for pairwise datasets.""" + IGNORE_INDEX = -100 + prompt_length = (item["chosen_labels"] != IGNORE_INDEX).nonzero()[0][0] + prompt_ids = item["chosen_input_ids"][:prompt_length] + chosen_ids = item["chosen_input_ids"][prompt_length:] + rejected_ids = item["rejected_input_ids"][prompt_length:] + source_len, target_len = _infer_seqlen( + len(prompt_ids), max(len(chosen_ids), len(rejected_ids)), self.seq_length + ) + prompt_ids = prompt_ids[:source_len] + chosen_ids = chosen_ids[:target_len] + rejected_ids = rejected_ids[:target_len] + + chosen_input_ids = np.append(prompt_ids, chosen_ids) + chosen_labels = np.append(IGNORE_INDEX * np.ones(source_len), chosen_ids) + rejected_input_ids = np.append(prompt_ids, rejected_ids) + rejected_labels = np.append(IGNORE_INDEX * np.ones(source_len), rejected_ids) + + res = { + "chosen_input_ids": chosen_input_ids.astype(dtype), + "chosen_attention_mask": np.ones_like(chosen_input_ids).astype(dtype), + "chosen_labels": chosen_labels.astype(dtype), + "rejected_input_ids": rejected_input_ids.astype(dtype), + "rejected_attention_mask": np.ones_like(rejected_input_ids).astype(dtype), + "rejected_labels": rejected_labels.astype(dtype) + } + + return res + + +def _infer_seqlen(source_len: int, target_len: int, cutoff_len: int): + r""" + Computes the real sequence length after truncation by the cutoff_len. + """ + if target_len * 2 < cutoff_len: # truncate source + max_target_len = cutoff_len + elif source_len * 2 < cutoff_len: # truncate target + max_target_len = cutoff_len - source_len + else: # truncate both + max_target_len = int(cutoff_len * (target_len / (source_len + target_len))) + + new_target_len = min(max_target_len, target_len) + max_source_len = max(cutoff_len - new_target_len, 0) + new_source_len = min(max_source_len, source_len) + return new_source_len, new_target_len + + def _build_index_mappings( name, data_prefix, -- Gitee From 7696913c7ae81bc3f8f5f437314132dc96b97e06 Mon Sep 17 00:00:00 2001 From: wucong Date: Wed, 27 Nov 2024 11:32:02 +0000 Subject: [PATCH 19/32] =?UTF-8?q?!1936=20=E4=BF=AE=E5=A4=8D=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E9=A2=84=E5=A4=84=E7=90=86=E8=BF=87=E6=BB=A4=E8=B6=85?= =?UTF-8?q?=E8=BF=87seq=5Flength=E6=95=B0=E6=8D=AE=E8=AE=A1=E6=95=B0=20-?= =?UTF-8?q?=E3=80=8B=20RC3=20Merge=20pull=20request=20!1936=20from=20wucon?= =?UTF-8?q?g/fix=5Fskip=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modellink/tasks/preprocess/data_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modellink/tasks/preprocess/data_handler.py b/modellink/tasks/preprocess/data_handler.py index f6f19c7d4..af40c07bf 100644 --- a/modellink/tasks/preprocess/data_handler.py +++ b/modellink/tasks/preprocess/data_handler.py @@ -188,6 +188,7 @@ class BaseDatasetHandler(object): for j, sentences in enumerate(batch): for k, sentence in enumerate(sentences): if (j, k) in skip_indices: + skip_num = skip_num + 1 continue total_bytes_processed += len(sentence) * np.int32().itemsize @@ -201,7 +202,7 @@ class BaseDatasetHandler(object): mbs = total_bytes_processed / elapsed / 1024 / 1024 logger.info("Processed %s documents (%s docs/s, %s MB/s).", batch_id, batch_id / elapsed, mbs) - logger.info("Skip %s sample exceeded seq-length(%s)", skip_num, self.args.seq_length) + logger.info("Skip %s sample exceeded seq-length(%s)", skip_num / len(self.args.json_keys), self.args.seq_length) for key in self.args.json_keys: builders[key].finalize(output_idx_files[key]) -- Gitee From 1a96df172eb136e14f0319a8497cee5129181450 Mon Sep 17 00:00:00 2001 From: wucong Date: Thu, 28 Nov 2024 14:16:03 +0000 Subject: [PATCH 20/32] =?UTF-8?q?!1964=20=E4=BF=AE=E5=A4=8Dllama2-7b?= =?UTF-8?q?=E8=AE=AD=E7=BB=8316=E6=AD=A5=E5=B7=A6=E5=8F=B3oom=20-=E3=80=8B?= =?UTF-8?q?=20RC3=20Merge=20pull=20request=20!1964=20from=20wucong/fix=5Fo?= =?UTF-8?q?om=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/llama/pretrain_llama_7b_ptd.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/llama/pretrain_llama_7b_ptd.sh b/examples/llama/pretrain_llama_7b_ptd.sh index 490963637..f549b629c 100644 --- a/examples/llama/pretrain_llama_7b_ptd.sh +++ b/examples/llama/pretrain_llama_7b_ptd.sh @@ -2,6 +2,7 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 export NPU_ASD_ENABLE=0 +export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True GPUS_PER_NODE=8 MASTER_ADDR=localhost -- Gitee From 198da431e9ddd65b522bd3a570ff91c11ef0d1e2 Mon Sep 17 00:00:00 2001 From: MeiFei Date: Mon, 2 Dec 2024 06:24:07 +0000 Subject: [PATCH 21/32] !1975 bug fix:add DETERMINISTIC to gemma2 ST Merge pull request !1975 from MeiFei/1.0.RC3-test --- .../baseline_results/gemma2_tp8_pp1_ptd.json | 60 +++++++++---------- tests/st/shell_scripts/gemma2_tp8_pp1_ptd.sh | 2 + 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/tests/st/baseline_results/gemma2_tp8_pp1_ptd.json b/tests/st/baseline_results/gemma2_tp8_pp1_ptd.json index af751deda..5a6223dfb 100644 --- a/tests/st/baseline_results/gemma2_tp8_pp1_ptd.json +++ b/tests/st/baseline_results/gemma2_tp8_pp1_ptd.json @@ -1,37 +1,37 @@ { "lm loss": [ - 1.387083, - 1.455181, - 1.352648, - 1.309051, - 1.241118, - 1.118676, - 1.113002, - 1.067905, - 1.112425, - 1.110263, - 1.048378, - 1.055717, - 1.055748, - 1.023589, - 1.044405 + 1.385735, + 1.454418, + 1.355794, + 1.314839, + 1.248124, + 1.126178, + 1.113734, + 1.070928, + 1.117932, + 1.112961, + 1.058354, + 1.065326, + 1.060758, + 1.028861, + 1.048179 ], "throughput": [ - 43.8, - 91.9, - 92.8, - 92.9, - 92.9, - 93.1, - 93.0, - 92.6, - 92.9, - 92.9, - 92.9, - 92.9, - 93.1, - 92.8, - 93.1 + 4.5, + 19.2, + 19.3, + 19.7, + 19.5, + 19.7, + 19.9, + 20.1, + 20.4, + 20.6, + 20.1, + 19.7, + 19.7, + 20.5, + 19.9 ], "memo info": [ { diff --git a/tests/st/shell_scripts/gemma2_tp8_pp1_ptd.sh b/tests/st/shell_scripts/gemma2_tp8_pp1_ptd.sh index c7966c75f..42099b519 100644 --- a/tests/st/shell_scripts/gemma2_tp8_pp1_ptd.sh +++ b/tests/st/shell_scripts/gemma2_tp8_pp1_ptd.sh @@ -1,5 +1,6 @@ #!/bin/bash export CUDA_DEVICE_MAX_CONNECTIONS=1 +export HCCL_DETERMINISTIC=true NPUS_PER_NODE=8 MASTER_ADDR=localhost @@ -81,6 +82,7 @@ GPT_ARGS=" --no-load-rng \ --vocab-size 256000 \ --log-throughput \ + --use-deter-comp \ --finetune \ --bf16 " -- Gitee From bbefac2b68b0420a543c702e789194d5a1fb7df9 Mon Sep 17 00:00:00 2001 From: MeiFei Date: Tue, 3 Dec 2024 13:14:00 +0000 Subject: [PATCH 22/32] !1972 rename: repo url/path/note from modellink to mindspeed-llm Merge pull request !1972 from MeiFei/1.0.RC3-rename --- README.md | 61 +++++++++--------- SECURITYNOTE.md | 12 ++-- examples/README.md | 34 +++++----- examples/llama2/evaluate_llama2_7B_ptd.sh | 2 +- .../llama2/evaluate_llama2_7b_full_ptd.sh | 2 +- .../llama2/evaluate_llama2_7b_agieval_ptd.sh | 2 +- .../llama2/evaluate_llama2_7b_bbh_ptd.sh | 2 +- .../llama2/evaluate_llama2_7b_boolq_ptd.sh | 2 +- .../llama2/evaluate_llama2_7b_ceval_ptd.sh | 2 +- .../evaluate_llama2_7b_full_mmlu_ptd.sh | 2 +- .../evaluate_llama2_7b_humaneval_ptd.sh | 2 +- .../llama2/evaluate_llama2_7b_mmlu_ptd.sh | 2 +- examples/qwen/convert_ckpt_qwen_hf2legacy.sh | 2 +- examples/qwen/convert_ckpt_qwen_legacy2hf.sh | 2 +- modellink/arguments.py | 2 +- modellink/core/datasets/gpt_dataset.py | 4 +- modellink/core/transformer/moe/moe_utils.py | 2 +- .../core/transformer/moe/token_dispatcher.py | 2 +- modellink/patchs/megatron_patch.py | 2 +- .../inference/text_generation/infer_base.py | 10 +-- setup.py | 6 +- sources/images/logo.png | Bin 24244 -> 7050 bytes ...a2_7b_MindSpeed-LLM_DeepSpeed_compare.png} | Bin ...en_7b_MindSpeed-LLM_DeepSpeed_compare.png} | Bin tests/README.md | 2 +- .../pipeline/baichuan2-13B/test_generation.py | 2 +- tests/pipeline/bloom-7B/test_generation.py | 2 +- tests/pipeline/chatglm3-6B/test_generation.py | 2 +- tests/pipeline/gemma-7B/test_generation.py | 2 +- tests/pipeline/qwen15-7B/test_generation.py | 2 +- tests/ut/inference/test_inference.py | 4 +- 31 files changed, 87 insertions(+), 86 deletions(-) rename sources/images/{tune_llama2_7b_ModelLink_DeepSpeed_compare.png => tune_llama2_7b_MindSpeed-LLM_DeepSpeed_compare.png} (100%) rename sources/images/{tune_qwen_7b_ModelLink_DeepSpeed_compare.png => tune_qwen_7b_MindSpeed-LLM_DeepSpeed_compare.png} (100%) diff --git a/README.md b/README.md index dc535e8c8..9e6e949a8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

+

@@ -13,14 +13,14 @@

-ModelLink旨在为华为 [昇腾芯片](https://www.hiascend.com/) 上提供端到端的大语言模型方案, 包含模型,算法,以及下游任务。 +MindSpeed-LLM,原仓名ModelLink,旨在为华为 [昇腾芯片](https://www.hiascend.com/) 上提供端到端的大语言模型方案, 包含模型,算法,以及下游任务。 --- -## ModelLink大模型方案概览 +## MindSpeed-LLM大模型方案概览 -当前ModelLink支撑大模型使用功能: +当前MindSpeed-LLM支撑大模型使用功能: * 权重转换:[Huggingface与Megatron-LM权重转换](#jump1) 【昇腾】【OCK】【GTS】 * 数据集处理:[预训练数据集/指令微调数据集](./examples/README.md) 【NAIE】【昇腾】 * 分布式预训练:[加速算法/融合算子/并行策略](#jump2)【昇腾】【计算算法部】【计算研究部】 @@ -34,22 +34,22 @@ LoRA、DPO、奖励模型、PPO等特性即将上线 --- -## ModelLink版本维护策略 +## MindSpeed-LLM版本维护策略 -ModelLink版本有以下五个维护阶段: +MindSpeed-LLM版本有以下五个维护阶段: | **状态** | **时间** | **说明** | | ------------------- | -------- |----------------------------------------------------------------------| | 计划 | 1—3 个月 | 计划特性 | | 开发 | 3 个月 | 开发特性 | -| 维护 | 6-12 个月| 合入所有已解决的问题并发布版本,针对不同的ModelLink版本采取不同的维护策略,常规版本和长期支持版本维护周期分别为6个月和12个月 | +| 维护 | 6-12 个月| 合入所有已解决的问题并发布版本,针对不同的MindSpeed-LLM版本采取不同的维护策略,常规版本和长期支持版本维护周期分别为6个月和12个月 | | 无维护 | 0—3 个月 | 合入所有已解决的问题,无专职维护人员,无版本发布 | | 生命周期终止(EOL) | N/A | 分支不再接受任何修改 | -ModelLink已发布版本维护策略: +MindSpeed-LLM已发布版本维护策略: -| **ModelLink版本** | **维护策略** | **当前状态** | **发布时间** | **后续状态** | **EOL日期** | +| **MindSpeed-LLM版本** | **维护策略** | **当前状态** | **发布时间** | **后续状态** | **EOL日期** | |-----------------|-----------|--------|------------|-----------------------|-----------| | bk_origin_23 | Demo | EOL | 2023 | 生命周期终止 | 2024/6/30 | | 1.0.RC1 | 常规版本 | 维护 | 2024/03/30 | 预计2024/9/30起无维护 | | @@ -101,13 +101,14 @@ ModelLink已发布版本维护策略: + 【预训练集群性能与线性度】 -ModelLink 通过模型并行与数据并行来训练大语言模型,为了演示如何使用多个昇腾芯片和模型大小进行扩展性训练,我们使用 `GPT3-175B` 稠密大模型,从128颗 NPU 扩展到 7968颗 NPU 进行实验,下图是实验数据: +MindSpeed-LLM 通过模型并行与数据并行来训练大语言模型,为了演示如何使用多个昇腾芯片和模型大小进行扩展性训练,我们使用 `GPT3-175B` 稠密大模型,从128颗 NPU 扩展到 7968颗 NPU 进行实验,下图是实验数据:

报告的吞吐量是针对端到端训练进行测量的,涵盖所有操作,包括数据加载、优化器步骤、通信,甚至日志记录。请注意,示例大模型没有训练至收敛。 -图中呈现了对应集群规模下的 `MFU` 值与集群整体的 `线性度`情况. 计算公式已经放到社区,点击链接可进行参考:[MFU计算公式](https://gitee.com/ascend/ModelLink/wikis/%E6%9C%AF%E8%AF%AD%E5%AE%9A%E4%B9%89/%E5%A4%A7%E6%A8%A1%E5%9E%8B%20MFU%20%E8%AE%A1%E7%AE%97%E5%85%AC%E5%BC%8F),[线性度计算公式](https://gitee.com/ascend/ModelLink/wikis/%E6%9C%AF%E8%AF%AD%E5%AE%9A%E4%B9%89/%E7%BA%BF%E6%80%A7%E5%BA%A6%E5%85%AC%E5%BC%8F) +图中呈现了对应集群规模下的 `MFU` 值与集群整体的 `线性度`情况. 计算公式已经放到社区,点击链接可进行参考:[MFU计算公式](https://gitee.com/ascend/MindSpeed-LLM/wikis/%E6%9C%AF%E8%AF%AD%E5%AE%9A%E4%B9%89/%E5%A4%A7%E6%A8%A1%E5%9E%8B%20MFU%20%E8%AE%A1%E7%AE%97%E5%85%AC%E5%BC%8F),[线性度计算公式](https://gitee.com/ascend/MindSpeed-LLM/wikis/%E6%9C%AF%E8%AF%AD%E5%AE%9A%E4%B9%89/%E7%BA%BF%E6%80%A7%E5%BA%A6%E5%85%AC%E5%BC%8F) 下述列表中支持的模型,我们在[examples/README.md](./examples/README.md)中提供了相应的使用说明,里面有详细的模型训练、推理、评估流程 @@ -898,7 +899,7 @@ ModelLink 通过模型并行与数据并行来训练大语言模型,为了演 ## Huggingface与Megatron-LM权重转换 -ModelLink支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重格式互转,具体功能列表如下: +MindSpeed-LLM支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重格式互转,具体功能列表如下: @@ -1079,7 +1080,7 @@ ModelLink支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重 ## 预训练加速算法与融合算子 -ModelLink预训练支持张量并行、流水线并行等多种加速算法和融合算子: +MindSpeed-LLM预训练支持张量并行、流水线并行等多种加速算法和融合算子:
@@ -1104,7 +1105,7 @@ ModelLink预训练支持张量并行、流水线并行等多种加速算法和 - + @@ -1167,7 +1168,7 @@ ModelLink预训练支持张量并行、流水线并行等多种加速算法和 - + @@ -1223,7 +1224,7 @@ ModelLink预训练支持张量并行、流水线并行等多种加速算法和 - + @@ -1244,17 +1245,17 @@ ModelLink预训练支持张量并行、流水线并行等多种加速算法和 --- ## 分布式指令微调 -ModelLink支持指令微调,方案与DeepSpeed统一,在微调效果保持一致的前提下,ModelLink可以表现出优异性能 +MindSpeed-LLM支持指令微调,方案与DeepSpeed统一,在微调效果保持一致的前提下,MindSpeed-LLM可以表现出优异性能 【与DeepSpeed微调Loss对比】
【昇腾】
虚拟流水并行虚拟流水并行 【昇腾】【计算研究部】
重计算重计算 【计算研究部】【昇腾】
MC2MC2 【昇腾】
Llama2-7b模型与DeepSpeed微调5个epoch后的loss对比图 -

+

Qwen-7b模型与DeepSpeed微调4个epoch后的loss对比图 -

+

@@ -1263,7 +1264,7 @@ ModelLink支持指令微调,方案与Question - ModelLink + MindSpeed-LLM DeepSpeed @@ -1342,7 +1343,7 @@ My soul is full and my heart does soep. 模型 --prompt-type - ModelLink + NPU + MindSpeed-LLM + NPU DeepSpeed + NPU DeepSpeed + 参考 @@ -1457,9 +1458,9 @@ My soul is full and my heart does soep. ## 大模型Benchmark基线评估 -ModelLink支持大模型在公开基准数据集上进行准确率评估,当前支持的Benchmark如下: +MindSpeed-LLM支持大模型在公开基准数据集上进行准确率评估,当前支持的Benchmark如下: -| Benchmark | 下载链接 | 验证集 | ModelLink | OpenCompass | +| Benchmark | 下载链接 | 验证集 | MindSpeed-LLM | OpenCompass | |-----------|------------------------------------------------------------------------------------------|------|----------------------------------------------------------------------|------------------------------------------------------------------| | MMLU | [GitHub](https://people.eecs.berkeley.edu/~hendrycks/data.tar) | test | [45.73%](./examples/mcore/llama2/evaluate_llama2_7b_mmlu_ptd.sh) | [45.3%](https://hub.opencompass.org.cn/dataset-detail/MMLU) | | CEval | [HuggingFace](https://huggingface.co/datasets/ceval/ceval-exam/blob/main/ceval-exam.zip) | val | [33.87%](./examples/mcore/llama2/evaluate_llama2_7b_ceval_ptd.sh) | [32.5%](https://hub.opencompass.org.cn/dataset-detail/C-Eval) | @@ -1468,9 +1469,9 @@ ModelLink支持大模型在公开基准数据集上进行准确率评估,当 | AGIEval | [GitHub](https://github.com/ruixiangcui/AGIEval/tree/main) | test | [20.6%](./examples/mcore/llama2/evaluate_llama2_7b_agieval_ptd.sh) | [20.6%](https://hub.opencompass.org.cn/dataset-detail/AGIEval) | | HumanEval | [GitHub](https://github.com/openai/human-eval/tree/master/data) | test | [12.8%](./examples/mcore/llama2/evaluate_llama2_7b_humaneval_ptd.sh) | [12.2%](https://hub.opencompass.org.cn/dataset-detail/HumanEval) | -ModelLink已支持模型的评估数据统计如下: +MindSpeed-LLM已支持模型的评估数据统计如下: -| 模型 | 任务 | ModelLink | 社区 | 模型 | 任务 | ModelLink | 社区 | +| 模型 | 任务 | MindSpeed-LLM | 社区 | 模型 | 任务 | MindSpeed-LLM | 社区 | |---------------|--------|-----------|----------------------------------------------------------------------|------------------|--------|-----------|-----------------------------------------------------------------------------------| | Aquila-7B | BoolQ | 77.3% | -- | Aquila2-7B | BoolQ | 77.8% | -- | | Aquila2-34B | BoolQ | 88.0% | -- | Baichuan-7B | BoolQ | 69.0% | [67.0%](https://hub.opencompass.org.cn/dataset-detail/BoolQ) | @@ -1505,7 +1506,7 @@ MiniCPM-2B | MMLU | 51.6% | [53.4%](https://github.com/OpenBMB/MiniCPM? ## 基于昇腾芯片采集Profiling数据 -Modellink支持基于昇腾芯片采集profiling数据,以提供对模型运行情况的分析,主要API如下: +MindSpeed-LLM支持基于昇腾芯片采集profiling数据,以提供对模型运行情况的分析,主要API如下: ```bash @@ -1524,7 +1525,7 @@ Modellink支持基于昇腾芯片采集profiling数据,以提供对模型运 --- ## 基于昇腾芯片的确定性计算功能 -昇腾芯片默认采用了不确定计算加速模型训练,有时为了重复实验与对比实验需要确定性的计算结果,ModelLink使能确定性计算的开关如下: +昇腾芯片默认采用了不确定计算加速模型训练,有时为了重复实验与对比实验需要确定性的计算结果,MindSpeed-LLM使能确定性计算的开关如下: - 启动命令中加入开关 ```shell @@ -1561,7 +1562,7 @@ export HCCL_DETERMINISTIC=True ## 致谢 -ModelLink由华为公司的下列部门联合贡献 : +MindSpeed-LLM由华为公司的下列部门联合贡献 : - 昇腾计算产品部 - 计算算法部 - 计算研究部 @@ -1569,10 +1570,10 @@ ModelLink由华为公司的下列部门联合贡献 : - 公共开发部:NAIE - 全球技术服务部:GTS -感谢来自社区的每一个PR,欢迎贡献 ModelLink +感谢来自社区的每一个PR,欢迎贡献 MindSpeed-LLM --- ## 安全声明 -[ModelLink安全声明](https://gitee.com/ascend/ModelLink/wikis/%E5%AE%89%E5%85%A8%E7%9B%B8%E5%85%B3/%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E) +[MindSpeed-LLM安全声明](https://gitee.com/ascend/MindSpeed-LLM/wikis/%E5%AE%89%E5%85%A8%E7%9B%B8%E5%85%B3/%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E) diff --git a/SECURITYNOTE.md b/SECURITYNOTE.md index 708462fcc..84f48a974 100644 --- a/SECURITYNOTE.md +++ b/SECURITYNOTE.md @@ -6,12 +6,12 @@ ``` ## 运行用户建议 -出于安全性及权限最小化角度考虑,不建议使用root等管理员类型账户使用Modellink。 +出于安全性及权限最小化角度考虑,不建议使用root等管理员类型账户使用MindSpeed-LLM。 ## 文件权限控制 1. 建议用户在主机(包括宿主机)及容器中设置运行系统umask值为0027及以上,保障新增文件夹默认最高权限为750,新增文件默认最高权限为640。 -2. 建议用户对个人数据、商业资产、源文件、训练过程中保存的各类文件等敏感内容做好权限管控。涉及场景如Modellink安装目录权限管控、多用户使用共享数据集权限管控,管控权限可参考表1进行设置。 -3. Modellink在数据预处理中会生成训练数据,在训练过程会生成权重文件,文件权限默认640,用户可根据实际需求对生成文件权限进行进阶管控。 +2. 建议用户对个人数据、商业资产、源文件、训练过程中保存的各类文件等敏感内容做好权限管控。涉及场景如MindSpeed-LLM安装目录权限管控、多用户使用共享数据集权限管控,管控权限可参考表1进行设置。 +3. MindSpeed-LLM在数据预处理中会生成训练数据,在训练过程会生成权重文件,文件权限默认640,用户可根据实际需求对生成文件权限进行进阶管控。 **表1 文件(夹)各场景权限管控推荐最大值** | 类型 | linux权限参考最大值 | @@ -38,13 +38,13 @@ ## 数据安全声明 -1. ModelLink会在megatron中的checkpointing模块中保存模型文件,其中部分模型文件使用了风险模块pickle,可能存在数据风险。 +1. MindSpeed-LLM会在megatron中的checkpointing模块中保存模型文件,其中部分模型文件使用了风险模块pickle,可能存在数据风险。 ## 运行安全声明 1. 建议用户结合运行资源状况编写对应训练脚本。若训练脚本与资源状况不匹配,如数据集加载内存大小超出内存容量限制、训练脚本在本地生成数据超过磁盘空间大小等情况,可能引发错误并导致进程意外退出。 -2. ModelLink内部用到了pytorch,可能会因为版本不匹配导致运行错误,具体可参考pytorch[安全声明](https://gitee.com/ascend/pytorch#%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E)。 +2. MindSpeed-LLM内部用到了pytorch,可能会因为版本不匹配导致运行错误,具体可参考pytorch[安全声明](https://gitee.com/ascend/pytorch#%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E)。 ## 公网地址声明 @@ -59,7 +59,7 @@ | 自研 | 不涉及 | modellink/core/transformer/moe/moe_utils.py:135 | https://arxiv.org/abs/2101.03961 | 论文地址 | ## 公开接口声明 -ModelLink 暂时未发布wheel包,无正式对外公开接口,所有功能均通过shell脚本调用。5个入口脚本分别为[pretrain_gpt.py](https://gitee.com/ascend/ModelLink/blob/master/pretrain_gpt.py), [inference.py](https://gitee.com/ascend/ModelLink/blob/master/inference.py), [evaluation.py](https://gitee.com/ascend/ModelLink/blob/master/evaluation.py), [preprocess_data.py](https://gitee.com/ascend/ModelLink/blob/master/preprocess_data.py) 和 [convert_ckpt.py](https://gitee.com/ascend/ModelLink/blob/master/convert_ckpt.py)。 +MindSpeed-LLM 暂时未发布wheel包,无正式对外公开接口,所有功能均通过shell脚本调用。5个入口脚本分别为[pretrain_gpt.py](https://gitee.com/ascend/MindSpeed-LLM/blob/master/pretrain_gpt.py), [inference.py](https://gitee.com/ascend/MindSpeed-LLM/blob/master/inference.py), [evaluation.py](https://gitee.com/ascend/MindSpeed-LLM/blob/master/evaluation.py), [preprocess_data.py](https://gitee.com/ascend/MindSpeed-LLM/blob/master/preprocess_data.py) 和 [convert_ckpt.py](https://gitee.com/ascend/MindSpeed-LLM/blob/master/convert_ckpt.py)。 ## 通信安全加固 diff --git a/examples/README.md b/examples/README.md index f2005c9b9..d6674e5df 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,4 +1,4 @@ -# ModelLink 使用指南 +# MindSpeed-LLM 使用指南

@@ -83,13 +83,13 @@ #### 1. 仓库拉取 ```shell - git clone https://gitee.com/ascend/ModelLink.git + git clone https://gitee.com/ascend/MindSpeed-LLM.git git clone https://github.com/NVIDIA/Megatron-LM.git cd Megatron-LM git checkout core_r0.6.0 - cp -r megatron ../ModelLink/ + cp -r megatron ../MindSpeed-LLM/ cd .. - cd ModelLink + cd MindSpeed-LLM git checkout 1.0.RC3 mkdir logs mkdir model_from_hf @@ -142,7 +142,7 @@ 更多社区资源可以在`模型`列链接中获取,如`Chat/Instruct`权重等 -权重可以基于网页直接下载,也可以基于命令行下载,保存到ModelLink/model_from_hf目录,比如: +权重可以基于网页直接下载,也可以基于命令行下载,保存到MindSpeed-LLM/model_from_hf目录,比如: ```shell @@ -206,7 +206,7 @@ python convert_ckpt.py \ 【--model-type-hf】 -huggingface模型类别,默认为llama2,目前支持的模型见 [model_cfg.json](https://gitee.com/ascend/ModelLink/blob/master/modellink/tasks/checkpoint/model_cfg.json) +huggingface模型类别,默认为llama2,目前支持的模型见 [model_cfg.json](https://gitee.com/ascend/MindSpeed-LLM/blob/master/modellink/tasks/checkpoint/model_cfg.json) 【--tokenizer-model】 @@ -218,7 +218,7 @@ huggingface模型类别,默认为llama2,目前支持的模型见 [model_cfg. 【启动脚本】 -ModelLink Huggingface到Megatron-Legacy权重转换脚本命名风格及启动方法为: +MindSpeed-LLM Huggingface到Megatron-Legacy权重转换脚本命名风格及启动方法为: ```shell # 命名及启动:bash examples/model_name/ckpt_convert_xxx_hf2legacy.sh # 需要配置并行参数以及权重词表加载保存等路径 @@ -226,7 +226,7 @@ ModelLink Huggingface到Megatron-Legacy权重转换脚本命名风格及启动 bash examples/llama2/ckpt_convert_llama2_hf2legacy.sh ``` -ModelLink Huggingface到Megatron-Mcore权重转换脚本命名风格及启动方法为: +MindSpeed-LLM Huggingface到Megatron-Mcore权重转换脚本命名风格及启动方法为: ```shell # 命名及启动:bash examples/mcore/model_name/ckpt_convert_xxx_hf2mcore.sh # 需要配置并行参数以及权重词表加载保存等路径 @@ -254,7 +254,7 @@ python convert_ckpt.py \ 【启动脚本】 -ModelLink Megatron-Legacy到Huggingface的权重转换脚本命名风格及启动方法为: +MindSpeed-LLM Megatron-Legacy到Huggingface的权重转换脚本命名风格及启动方法为: ```shell # 命名及启动:bash examples/model_name/ckpt_convert_xxx_legacy2hf.sh # 需要配置并行参数以及权重词表加载保存等路径 @@ -262,7 +262,7 @@ ModelLink Megatron-Legacy到Huggingface的权重转换脚本命名风格及启 bash examples/llama2/ckpt_convert_llama2_legacy2hf.sh ``` -ModelLink Megatron-Mcore到Huggingface的权重转换脚本命名风格及启动方法为: +MindSpeed-LLM Megatron-Mcore到Huggingface的权重转换脚本命名风格及启动方法为: ```shell # 命名及启动:bash examples/mcore/model_name/ckpt_convert_xxx_mcore2hf.sh # 需要配置并行参数以及权重词表加载保存等路径 @@ -331,7 +331,7 @@ mcore转legacy时设置此参数以指定保存权重格式为legacy 其余参数意义参考2.1 -注:上述权重legacy和mcore互转为高阶功能,modellink基于llama2提供基础能力,并进行版本迭代看护,其余模型的支持需要用户自行修改支持 +注:上述权重legacy和mcore互转为高阶功能,MindSpeed-LLM基于llama2提供基础能力,并进行版本迭代看护,其余模型的支持需要用户自行修改支持 ##### 2.4 lora权重与base权重合并 @@ -416,7 +416,7 @@ bash examples/llama2/ckpt_convert_llama2_legacy2hf_lora.sh #### 1. 数据集下载 -从Huggingface等网站下载开源数据集,保存到ModelLink/dataset/ 目录 +从Huggingface等网站下载开源数据集,保存到MindSpeed-LLM/dataset/ 目录 常用的预训练数据集有: - [Enwiki数据集](https://huggingface.co/datasets/lsb/enwiki20230101) @@ -487,7 +487,7 @@ python ./preprocess_data.py \ 数据预处理并行加速参数。当需要预处理的数据集比较大时,可以通过并行处理进行加速,方法为设置参数`--n-subs`,通过该参数设置并行处理数量。在数据预处理过程会将原始数据集切分为`n_sub`个子集,对子集进行并行处理,然后合并,从而实现加速。建议预处理数据集超过GB级别时加上该参数。 -ModelLink预训练数据集处理脚本命名风格及启动方法为: +MindSpeed-LLM预训练数据集处理脚本命名风格及启动方法为: ```shell # Legacy # 命名及启动:examples/model_name/data_convert_xxx_pretrain.sh @@ -714,7 +714,7 @@ python ./preprocess_data.py \ 则会提取数据集里的`"messages"`列,其中角色格式可以为:`"role": "user"、"role": "assistant"`,内容格式为`"content": "具体内容"` -ModelLink微调数据集处理脚本命名风格及启动方法为: +MindSpeed-LLM微调数据集处理脚本命名风格及启动方法为: ```shell # Legacy # 命名及启动:examples/model_name/data_convert_xxx_instruction.sh @@ -990,7 +990,7 @@ DATA_PATH="./finetune_dataset/alpaca" #数据集路径 #### 1. Generate:流式推理 -ModelLink 流式推理脚本命名风格及启动方法为: +MindSpeed-LLM 流式推理脚本命名风格及启动方法为: ```shell # Legacy # 命名及启动:examples/model_name/generate_xxx.sh @@ -1011,7 +1011,7 @@ bash examples/llama2/generate_llama2_7b_ptd.sh ``` #### 2. Chat:指令微调后chat对话 -ModelLink 指令微调后chat对话脚本命名风格及启动方法为: +MindSpeed-LLM 指令微调后chat对话脚本命名风格及启动方法为: ```shell # Legacy # 命名及启动:examples/model_name/chat_xxx.sh @@ -1048,7 +1048,7 @@ bash examples/llama2/chat_llama2_7b_ptd.sh ## 大模型分布式评估 #### 1. 基准评估 -ModelLink 基准评估脚本命名风格及启动方法为: +MindSpeed-LLM 基准评估脚本命名风格及启动方法为: ```shell # Legacy # 命名及启动:examples/model_name/evaluate_xxx.sh diff --git a/examples/llama2/evaluate_llama2_7B_ptd.sh b/examples/llama2/evaluate_llama2_7B_ptd.sh index 606e7b068..68785b08f 100644 --- a/examples/llama2/evaluate_llama2_7B_ptd.sh +++ b/examples/llama2/evaluate_llama2_7B_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../mmlu/test/" TASK="mmlu" diff --git a/examples/llama2/evaluate_llama2_7b_full_ptd.sh b/examples/llama2/evaluate_llama2_7b_full_ptd.sh index 4e6704a40..0f6493047 100644 --- a/examples/llama2/evaluate_llama2_7b_full_ptd.sh +++ b/examples/llama2/evaluate_llama2_7b_full_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../mmlu/test/" TASK="mmlu" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_agieval_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_agieval_ptd.sh index 457fded55..5e6d649bc 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_agieval_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_agieval_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../agieval" TASK="agieval" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_bbh_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_bbh_ptd.sh index fbb40fcb2..a9a2e741b 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_bbh_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_bbh_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../bbh/test" TASK="bbh" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_boolq_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_boolq_ptd.sh index 708abcbb5..a4ea35b4c 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_boolq_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_boolq_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../boolq/dev" TASK="boolq" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_ceval_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_ceval_ptd.sh index ed54bd455..64ed04562 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_ceval_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_ceval_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../ceval/val" TASK="ceval" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_full_mmlu_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_full_mmlu_ptd.sh index e1277043a..cdb885b00 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_full_mmlu_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_full_mmlu_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../mmlu/test/" TASK="mmlu" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_humaneval_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_humaneval_ptd.sh index 7c736a7ad..4e2786a72 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_humaneval_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_humaneval_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../human_eval" TASK="human_eval" diff --git a/examples/mcore/llama2/evaluate_llama2_7b_mmlu_ptd.sh b/examples/mcore/llama2/evaluate_llama2_7b_mmlu_ptd.sh index 01c6991d1..676bf0ae4 100644 --- a/examples/mcore/llama2/evaluate_llama2_7b_mmlu_ptd.sh +++ b/examples/mcore/llama2/evaluate_llama2_7b_mmlu_ptd.sh @@ -9,7 +9,7 @@ source /usr/local/Ascend/ascend-toolkit/set_env.sh # modify script model path and tokenizer path TOKENIZER_PATH=/../llama2-7b-hf/ #tokenizer path -CHECKPOINT=/../llama2-7b-modellink-tp1 #model path +CHECKPOINT=/../llama2-7b-mindspeed-llm-tp1 #model path # configure task and data path DATA_PATH="/../mmlu/test/" TASK="mmlu" diff --git a/examples/qwen/convert_ckpt_qwen_hf2legacy.sh b/examples/qwen/convert_ckpt_qwen_hf2legacy.sh index cfcc457b2..de5faebcf 100644 --- a/examples/qwen/convert_ckpt_qwen_hf2legacy.sh +++ b/examples/qwen/convert_ckpt_qwen_hf2legacy.sh @@ -1,4 +1,4 @@ -# 修改modellink_qwen.py文件第39行,将: +# 修改modelling_qwen.py文件第39行,将: # SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7 # 修改为: # SUPPORT_FP16 = True diff --git a/examples/qwen/convert_ckpt_qwen_legacy2hf.sh b/examples/qwen/convert_ckpt_qwen_legacy2hf.sh index 0f87a2aa3..9543345d9 100644 --- a/examples/qwen/convert_ckpt_qwen_legacy2hf.sh +++ b/examples/qwen/convert_ckpt_qwen_legacy2hf.sh @@ -1,4 +1,4 @@ -# 修改modellink_qwen.py文件第39行,将: +# 修改modelling_qwen.py文件第39行,将: # SUPPORT_FP16 = SUPPORT_CUDA and torch.cuda.get_device_capability(0)[0] >= 7 # 修改为: # SUPPORT_FP16 = True diff --git a/modellink/arguments.py b/modellink/arguments.py index 8ed1979b3..6625f2f4c 100644 --- a/modellink/arguments.py +++ b/modellink/arguments.py @@ -840,7 +840,7 @@ def validate_args_decorator(megatron_validate_args): _add_dummy_args(args) from modellink.utils import print_args - print_args('ModelLink Arguments', args) + print_args('MindSpeed-LLM Arguments', args) return args return wrapper diff --git a/modellink/core/datasets/gpt_dataset.py b/modellink/core/datasets/gpt_dataset.py index 4a37d53b2..dfd6118f6 100644 --- a/modellink/core/datasets/gpt_dataset.py +++ b/modellink/core/datasets/gpt_dataset.py @@ -156,7 +156,7 @@ def _build_document_sample_shuffle_indices( ) if any(sample_index[:, 0] < 0): - _url = "https://gitee.com/ascend/ModelLink/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98" + _url = "https://gitee.com/ascend/MindSpeed-LLM/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98" raise GPTDatasetSampleIndexError(f"Bad sample index. Visit {_url} for more information") numpy.save(path_to_sample_index, sample_index, allow_pickle=True) @@ -212,7 +212,7 @@ def _build_document_sample_shuffle_indices( sample_index = numpy.load(path_to_sample_index, allow_pickle=True, mmap_mode='r') if any(sample_index[:, 0] < 0): - _url = "https://gitee.com/ascend/ModelLink/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98" + _url = "https://gitee.com/ascend/MindSpeed-LLM/wikis/megatron%20data%20helpers%E5%8F%AF%E8%83%BD%E5%BC%95%E5%85%A5%E7%9A%84%E9%97%AE%E9%A2%98" raise GPTDatasetSampleIndexError(f"Bad sample index. Visit {_url} for more information") t_end = time.time() diff --git a/modellink/core/transformer/moe/moe_utils.py b/modellink/core/transformer/moe/moe_utils.py index d38492eb6..c9cfb07e8 100644 --- a/modellink/core/transformer/moe/moe_utils.py +++ b/modellink/core/transformer/moe/moe_utils.py @@ -60,7 +60,7 @@ def topk_softmax_with_capacity( drop_policy: str = "probs", ): """ - Migrated from megatron r0.7.0,. This would be removed after ModelLink switches to megatron r0.7.0. + Migrated from megatron r0.7.0,. This would be removed after MindSpeed-LLM switches to megatron r0.7.0. Apply capacity and padding to the top-k selection. Args: diff --git a/modellink/core/transformer/moe/token_dispatcher.py b/modellink/core/transformer/moe/token_dispatcher.py index 1371a1154..150156461 100644 --- a/modellink/core/transformer/moe/token_dispatcher.py +++ b/modellink/core/transformer/moe/token_dispatcher.py @@ -14,7 +14,7 @@ class MoEAlltoAllTokenDispatcher(MoETokenDispatcher): """ Mainly migrated from megatron r0.7.0. for drop and pad feature, and add few optimizations controlled by args.moe_permutation_async_comm. - This would be removed after ModelLink switches to megatron r0.7.0. + This would be removed after MindSpeed-LLM switches to megatron r0.7.0. AlltoAll Based Token dispatcher. """ diff --git a/modellink/patchs/megatron_patch.py b/modellink/patchs/megatron_patch.py index 3acd6f510..0ee13b7e5 100644 --- a/modellink/patchs/megatron_patch.py +++ b/modellink/patchs/megatron_patch.py @@ -61,7 +61,7 @@ def get_modellink_args(): """ global _ARGS if _ARGS is None: - parser = argparse.ArgumentParser(description='ModelLink Arguments', allow_abbrev=False) + parser = argparse.ArgumentParser(description='MindSpeed-LLM Arguments', allow_abbrev=False) _ARGS, _ = process_args(parser).parse_known_args() return _ARGS diff --git a/modellink/tasks/inference/text_generation/infer_base.py b/modellink/tasks/inference/text_generation/infer_base.py index a9ed2c1e2..14cf36577 100644 --- a/modellink/tasks/inference/text_generation/infer_base.py +++ b/modellink/tasks/inference/text_generation/infer_base.py @@ -101,7 +101,7 @@ def task_greedy_search(args, model): if dist.get_rank() == 0: logging.info("\n=============== Greedy Search ================") - logging.info("\nYou:\n%s\n\nModelLink:\n%s", instruction, output) + logging.info("\nYou:\n%s\n\nMindSpeed-LLM:\n%s", instruction, output) logging.info("==============================================") logging.info("\nElapsed: %ss", round(time.time() - t, 2)) @@ -124,7 +124,7 @@ def task_do_sample(args, model): if dist.get_rank() == 0: logging.info("\n================ Do Sample =================") - logging.info("\nYou:\n%s\n\nModelLink:\n%s", instruction, output) + logging.info("\nYou:\n%s\n\nMindSpeed-LLM:\n%s", instruction, output) logging.info("============================================") logging.info("\nElapsed: %ss", round(time.time() - t, 2)) @@ -147,7 +147,7 @@ def task_beam_search(args, model): if dist.get_rank() == 0: logging.info("\n=============== Beam Search =================") - logging.info("\nYou:\n%s\n\nModelLink:\n%s", instruction, output) + logging.info("\nYou:\n%s\n\nMindSpeed-LLM:\n%s", instruction, output) logging.info("=============================================") logging.info("\nElapsed: %ss", round(time.time() - t, 2)) @@ -171,7 +171,7 @@ def task_beam_search_with_sampling(args, model): if dist.get_rank() == 0: logging.info("\n======== Beam Search with sampling ==========") - logging.info("\nYou:\n%s\n\nModelLink:\n%s", instruction, output) + logging.info("\nYou:\n%s\n\nMindSpeed-LLM:\n%s", instruction, output) logging.info("=============================================") logging.info("\nElapsed: %ss", round(time.time() - t, 2)) @@ -251,7 +251,7 @@ def chat_get_instruction(args, histories_no_template, histories_template, prompt def chat_print_and_update_histories(args, responses, histories_no_template, histories_template, prompt): - response_template = "\nModelLink:\n" + response_template = "\nMindSpeed-LLM:\n" output = "" if dist.get_rank() == 0: diff --git a/setup.py b/setup.py index 69bdfc5fe..de345fdfa 100644 --- a/setup.py +++ b/setup.py @@ -19,12 +19,12 @@ import sys import setuptools if sys.version_info < (3,): - raise Exception("Python 2 is not supported by ModelLink.") + raise Exception("Python 2 is not supported by MindSpeed-LLM.") -__description__ = 'ModelLink for LLMs of Ascend' +__description__ = 'MindSpeed-LLM for LLMs of Ascend' __version__ = '0.0.1' __author__ = 'Ascend' -__long_description__ = 'ModelLink for LLMs of Ascend' +__long_description__ = 'MindSpeed-LLM for LLMs of Ascend' __keywords__ = 'Ascend, langauge, deep learning, NLP' __package_name__ = 'modellink' __contact_names__ = 'Ascend' diff --git a/sources/images/logo.png b/sources/images/logo.png index e68882beafff23e533329c462fa5b8eb975fa269..2d32cf37c4869ffe5f8b58e6f0c2c744f2832f03 100644 GIT binary patch literal 7050 zcmd6Mi9gg|^!L~!NB)sitJ(BlU8b`@ zeXcWlH|_>-=<9$BuX_0US-k4$Npq(^97MjR?H{(3tIN)q%}v#`Fn~|_O^(T!8T&dW z{M)epU(-n4Y@XWR=!=-z7nlFwJh%C>Wv_}s4fwl}5&LIb@+rSGM`tttVy3;yXS>+H zw8@TufvLj-!<$p)z8m9H2NCo!((lxpdyAtHJ#4Fo&kXne^wm7tzIpS}4&iWjGplHC zp^rXw*aX+-Pf>it)gF8|B$Bv;JiX;`=rC_kDD;8Qx*#o;{xVdkC_88a1u5YyL;)>rWs$44 z@g$nZOH0GQ1EiLZ?ib3qnP@yT@NAO3`LCS|WM!K;yj!h^6kBgNEi-+8>)k!xq)R4H zQ|L;JXCgMEw!0%>1}w#}4EnaLcyCT)5Sp*F0)ep*irWVHmk$Gn#ww!F|~;(AGlhWUm&6m^zh zXiw8bd~usPRf5zt3!%30W-n*A${W0rjFxAD-Eq4%5BuNJKVr7+Q*r#lwjGRH_o=ao zPMG?uIraz*)`%z&c^t7)?sHh0W+KY{9d)02C>1WvR!vD&-bNk`3Gaq;05z9gz8GY7zJlsk?t(jL|863!PB6#)0CH;X*Fb6(eHmWM?rln>^*L?zRB*MbWO{ijpz{sAeZx)WRE>ylI>P_TK(yw zOo#gbuEkDgXYul~?)gfo{79dpQ8Cn$Ra6O}rIg(!JAm`2@C&DFWk`B7Vb!N8xt z_SYyIdI!9QPsTc={0v-}IQ>i<^CO(fX|vNEMqRaPkbW(6z8danv4v)^+Ux(1p>}VM z)BEJ7S9neV_;5b;0reuqHvEH&S@!{>H{aFd)Gr&f=CZYo1EIgnKd-F^X}SP?bpn7B z-Vyl*mGD-D*VOWG3c{l}8ay>(iKJVDxRyxWybaG&;qknMcWxN2%0-s;cz)T{F%ca4 z4pS@7^I+?E-e`Tauw=PA)<3O4#L?^7eqyHxcK!2~DokhUNu!sW+3MM8S0lZLEDPD) zI}R|qvR$#DaBM?onSWDQQXa@0;|h++@rlh9hQTUBLj;~7k`nYBL_OZzhSN5=4H2*AWEctTvM*hPwIfH+G zBIQ!`>t-*{fU8$wid`_lq|xox>*|S8tNsY&ITa9g(Rog{Ly8ijK+bTzsU6FKw-ZEV*PSeDAEsZ3e5=>X-zx`K z%iOKR`gacTshdkRow)+@g&izyncnMDcWdyk`!+GmbDI_bb`!YNe1ZS@#D!}A;cICw zLeY8*p;E=6Uosm|Ny0rI6ntkx?%sX>feyGbWwqTl^UaXMT+T}ozwY5<9j3H==g+~8 zvZO&^MnGfM9Y@(tvXULv_af(v%B9hHqnC!1(9h^&e)t*_spRT5O9Z_fi;%y6`+{BTiKy2} zx2_v6a9d(i$ zuuEy`s(MzW^<5#Ca)xG2ez(AU>ygnFqbXFWMq?nJatns{KDfx^rETqyvhZo&bu(Ui z^{v|T3;~k2rEO}LRk^)iDvsXPufs)x{-gtsFLDFwWPj&QOBSdCg>9lMChCKq(8Rf+sYlyGQy zwkGPHiEoXWkKAw>KDo0wQx=`l`Ag5s#v?lR4pJLHZuf@#ZU+50rDh&HdjD0n{A0Rj zH&1Bd0!Qbb90v11IEwBr|Ab4;ye_}@$uj*{Vq}d~2`)K&O*}(W7OnpZ&30yeqVO|)KqCi(=|L_aMbj&R_o$=~JMTCnIqy{} z1YaNcULxu=A}0gLeKz>1S%hN0>%m+dK3q4i%Xxf6)ECF?Vh*>ot8z=CZ*rk6exRkE z`~H~9DF;NZqlj9E;)v+ww~r4IoCZIPX7taS&2U6v=Vz2vdn0d>0IxBtVNo+G8=l!& zg7{-hDmOK(MX~UvnEGZq0U5nL>=2vDhCFVkt*K}sWSs=rBtMOeVNP|Dts6XWI~4~8 zxSGN=#iggo(s17I8EH3#Rs$lHKGlrm-BbYonDw%nE`{ztEQjxAKLxECT4Q>lghu+p2p5 zr-&*PWr24mBenWsh@7~c zJbWJ5$O)g7gH5(*nVmwfj4h4^2whfbj&3^EhfvZArNyf>h3is$hlQVdq$Nce;kJJg z>p{V+=@Jat$J1`tbx2l&apyUtMhD?l=8<1*uK8s(EE1e;xLi^pN-*G-h9{z|S2|B` z!Qec6mpgp(auXM*-i-H*c#)mOcGZSwvQ^9_ee-Isd{=T)EW(bq|B1|M=a3fJFmlKv zNS(JB-ZH9g2YPzzJnMmCCr`{NHo)F&=X!^m#4&z>Zqjr-G?g=9GSA4xwW}hk*D}GK zA~llJ&-3>?+y8yj;}h48{A zpH(w{h=;7b|B{71C5TWuQY^$W(Y%zmKAAoWh>AUI5;=QquQJQFHT>zw9sYtQ`rkag z6E zau%ofMSY40tr;po_m;m08Q2$ilX45{#pO-cTh=UB-H{55m2Sd4;@JF-6#eJ_tw2GJ z)u{GRj2dt1##AKH!oV(eRTg`(dz^v zGnF#<;mN7`*Ye-hiy%pxa_~A?s>hZf)oeU}X&syV=*6Y0>|{DoTSm&(h6^lcXExI1 zhrxwlErdp+pWVJDyHM74y2kT@*A73*+?NmFIR&TXt*iYVi6lwX-54GpwKJ<;zLOvB z(5VsS9cFXUf{-Y_Tgd>YJRC`}IQV>xxd)bb0(?b=DbP3%mVX(~RFOcLMI`VU1^%8O z*U~uZo(T>a$35jAdm@UbCJS~+sdAoNiCMn@#$D0(alTFaaz7+xN{rU4cnUXnN=7@937rzRe2I{=t|`igw5>WN8&XT_NgUl&3J@;cXbQ zGeP!U$-6`MGK?^1?S}hm1f1&M@d$qtTTDj@Pl|uELO-}dkAIF-AUw!;sokYJ-l4XV zmnMY_PSJX4PnJ3P+dg9-zM79Dpk~UhIS{^-;kmaI0nQO8>{4;c07@j4^oX+5h+`Zx z7UDjkm0V-{)5?roft15tA$F}_@qZHwe13-ii3?EhiD3KI$E|_Vcl5}p;19GP?nM0! zv$x!NFFi1*3U9=Gpa;B#w6dJ^@%q17O6Qqc#D)jx^5Gn<p5t&*o&`tZngQIh zSc<&gm;dm1WXS0=JC`jr)ypwy`r?*JTgtExuTJl@XIrUVehdE?%hZ$NMhZ*1;ZUNO zRP1vYvb=8~OMQlUP&M-XyjnT3+-G9~*t}y-{9mpXPH(=7_HSLNus1z@ z>r%BC9}rlyF;@>{DuN9baTw7wJrVz&h1_F4i3A1q2=z3cW~qVdsQTWUEMg)zv`1o` zW}1T88F@&#E-ADzJ^rh3rbAPK+z71jyZ%&*CM80XpY9NU3K z;R6MHW|us_$tKcYtb>v@-4z~?=pktlbJ|+k(Fu`Q{#!vnO(=&DBuhx^X1`c*AWAw$ zNWd&VKM&)>Ek0~DSoA;_8CP_Bum_h_j}8#T5Az1t9b5Mm$?l1E&1i{!rTQn&>$dah z&-SN$kI2J?-VfRRbqH^9af`_6$gj$3T7Y*l41f?mcUfoq0S!0rt>H?Uln#Sx#i59P zX2b(?94iS~4kv$Y!;8S`*@*Mx*x$8Dr$j9hCM^e8@#Ax}v+4y6d-qw3PK`~WeWS|15g2HCBlVz))tw!U z!wYmWq%y}$m$Mrw?lN$vdV<*@em1{$<@ z*Qtn6Tx`y}=5{fG5hGl$&x=by3%Tc++zww;rL;KU<~eTTf=;x!+%Y1KPnf(K|c)N}^5+CEVpu z$0=c&lkwNJ$qaMnp%>RU8PZNx3GK#!j(=ulzQYqNB$0*B5jH(U&71PRhx-Ev8}?MOTC$2ys~y- zhhM)4jnJ0^^?9B{=9=H3IsGHvbT3ZV66vpQ{f-$F1og5RpoN++G3{N)Q3|MFtunsd zN=#OPb|>7A+$HcCmD%s*BmD!{UF21;ig6v|5SRXzF7gHuw1bNkZe~5(_gynwnqist znc8U1b0*ACm5q8(m?yM~JNYQ&t+#o6TCSsB_khmY`QHucJ;XFIT#|jKK|gEMh8UMD zA*Ow)zct@G!%MoB6dm-C87Mk=Z)rY#6(hjRnFue8PeGCn`lYvN2;GiulO6L7T5> zt{r72BSe1|T>7Q;wLJSzVu&-rU>Hj+;PTsg{|be6NcIfVB|j!QuZJe~=rk^UqGWhPAu>@Xu=vFyO<_+L`4=-^N@K(TAvD>Kl22skSvU>siP z6xr3_d?PHR@NUSi=(^x2eg^e=jN&htr;t2N0d8=+&2Z6wz^g3D!FC`RCrTa)4X@cH zDio8r+Ydxzx{gigqGr$CA%NN;LNi2l*hk9S0ZOVy`Iy*Ru-0MGoh0Dp(3co=n$ozY z8OTyOi9Jr?9U;NjUqsC6D{++6!O6Z(q(1_?AwH+H*P1eIY9E1n__?L174%>aB4ayZ zNMy|+VsTk`4$a z+B*;oNv4-qPRxpmtCN|t1X*4FE+8ZF=;1J#S_bf#Z;k*67ybSc&)o-?Ti(2KXy2Mme@AcCJuVzUmbTt#&CvnV2Me8hi&U>w{cuIWY zO0knZmqqD`B3@wL?RVX-e_+5pCcon}0Zww_euX zjgu@*FAOB5bPK{Ze}%g{A=rG?{$$s*a8L0`NP29S=@hm-h!X43byD~_k|>02JnNpL zzcawxBbnE6S?T+SiWvcJkNcQuZ|GuDgQf3}c|{3b8%f=P9kCaj1$rMQop(advZn6; zBSEOHFJHdZP6Q*a3@?8ve|T+RDXKrB)&hE8FM3|K8}3)Gl>@P3=tA?u1DQPU#Wu-6 z_j%rrADJBdT+b3wVGLkW;kl?NF?M9Kduq3&R@BeNsN~8an`25J3=28}JTC5Ni;It$ z3~W=Um4{2gWN8X#poKhP*KOr}mFQs8-$ zN<>OFdsbd83!PHt^(gtqysCiw`DLKn6rl&sB^D+MZPW%40V8RJ>#9MV3_>?tNJ%qC zUwt=n_oVXZ0%YS6PaSSTo?4)QO(c#wo`NoO*QrSyDQ}!MOKbRuR_d_5+UoT% z+ML&1FgD!`>HS?jj=No7#=4df?+@`}=uZTLb;jt;_;0hMyORyiD2$nd4T2=->pvmH z_0d?T=9m6USZ%xfqPoSqDr7^u;JZSP(x=87r&`BnJdWWwMV(|iw6Q~?xf(K0`JwXa zv=t8SN?r#{azhupZ%XmpdS#e^q;`MXEO_@ z{{)J&lhexo6EIU26m|MfAlVso2xY1(I=&~qPwW5-9)tpp>|qMX7+l_6w1ZpR2knJb z994=y%t<_iKH8nG@etG*VwO<;uIe6R3P<&n@h~4INzeZ$8u1VRY_>4)pWVbYc8!?z PGk~?Zty#5+SN#70RR*KD literal 24244 zcmdSA^ z&&~UN&gXnT=RY_<5PPtVd)NJp>v=t|SIk=t6*6K5Vk|5yGSycvw6U=8Ie_2e3GspN zCI=*rzz=M9Z52hV@54-6zymxR1$6~1EMx-774#nPn8@XozB?8cdDq<+cAs;JCGa{` z)fWmnJ|=rP1ph2`Gd66LdlaS+Liiq2S{~pWPuDkL17#&7ummVr2b|3;2v?(ikvMAk z`62YvV;-W1h1&CGgxRkv z-)$~6o`pZlY5POO7EY;1_&?ujK4eq>9}j^)*xr-m_+L*U93ys9`+r|?_uE(6wdDWv zT?pA;@b+{6&zt_|$rA)z^#Y2O$g%%CzswGFMA0f zpWAXO)yEVo)3}~$s#oz*_1ERHJo+lfBBHi|Qt=H=)2>^j{A4NIcORn3qTNb;eL5L%=Cq_5c{JxKJ#pUK_(v_{NyxSw1qorPA z<%hsO3v1fH$dM%XJ)RD3iHD8Qbl-chV16@<^cys+D>1mQ8101H*CY!3JLg-kiMz{- zRs!x7@n-OKW#G_HdGPHGM!wwOKHJ0cHRqd+ri~UD`dN^x4Y~{zNvdjf7$q-*zSO+k zn%}sNOL-J`0;;B zgL8oFs9S-+ctQHGyZm<5okEwNaE$qY)Nu8a3M4q@=Zf_5dFi=yUs?Ba8o0i5+jYkh znfSuA^w~m|UvXa0VlZs|!F4d(Tijdb&TLp4T1u5Lq@B$^JC?Rrk1hP*ZOyyit(Pnr z`XWeOH9LB`Fz_viIte?M3IwSDKA6D8N0+y1{f!AowV0AFh*irn3si{l`YYgAsaF@`{1qT+0JWO%=2g=|?kIPGoAVO-Wp>p{e| zzx>q-eT#i%OVb4%74e3#y^Mw~cW-wTi{HYoX_@v0uhl{f^kL%kMUF*^^eA3D0cwoV z(Zc}~d@~Y^Y57gNEO<%(Xdx;1W|Q?~%Gi7V&8)D5u$t*JVm6=VJx9X~3nloc7%_F3 zf4?kR2v~fM<&XV8He#KEz{|4RI23aKE;E)Jh*EGa%&+B2KNi{+v<;9o(*0d6In)Tc*46lHOo(<*+X=`{##$ z|N6YegK>+@nx!uO@RmE%6|`^cMLaW_Xig|q1iyB233M^gzkZ$K-KyQ21Hv!&co#2A zmurtI#S9kziD@)WkSOc za`FOl^{?+i`dNJbf`&Ef^UMpwK#P89(^8M5|6ISJKYWg=gH+>?cv#m`wv+zj8&0-v z%Mb06J2?sHniQwTt)k1J%U`r=MXAG28FpAchU7FV32 zDf=UGP+f&TeR*=UREpDI)yQui$@#jRHhF;-&V?zNuWL`XUz=vWJ9#_#kn&6ExcaE{ z5>ov{eNWhVNz^(I@^|>|a=XlOsHQBpIN`?W_W5Gr1GpfHaLUCn-A1Cqr6RIaVS)>k z&1tCte_9xMWB+cGUV=>34**>4VLM{2r$IMI(x@AO_54@GFOmwg?ABk&zg&~v5y*!o zQrWuwn=C#R3tolfhldjqL=vX1jxuT$kE_f5{FEH8CZ0m@C>atzUi7WD?+9w$sNZSL zO#rIQB87)EAEgNGcMJUbQx&uc5!GrejUA2-s?{A?yGs&DN6&!^~^wR=KI{kBd5qj~;3w`i-UF zcoAXn@fO!lLJ@iddH)MWV~JNKj}Zzr`&Mx6N83fE9yHaYueRvfjGxmRSh=Egy}f$P zBH}WA;>89kYxX`wbWzhYfK4IVTbvz5ljG{PB(r-{XCA63|3~4r6kuIy%xsKt#5$@} zS1LjFG%@^of(7r4FWz2fA;m&5mm?>R6go77x%PFkfcngkj$^+<9cd;<3D#f6?F zmNvLQt87Iwr&AeqeY;pp!rl4sh8};FJKuvw`*($ytbzApgKoK1=bI3IN%n8Ni0xmz zO1wEr6fNJzsGmEmQ0jDycrX^##f6w&{Z1GP5?Ifvba@6pmC^29wA3=+Dgqi^@gb~0 zth@P%V)Ut*E=LE8)0|HiW2?;)NrKtqr#aVW^2@+|Hinq;H#{yjZ|54yZTWIh=QZOs z?`tieelgf~1wL^+yuW!@1Nz>l84s7VB7I%d9YJ(Da!@xQhbaDA$G#*|uv!2mKy8F@ z3uj=ICyMp@kHgVYD%`u-V!rHbLVW^=m>mCob+ot`#S{w6Is-%`&*J0A!<0%U2cRFC z?;?L8y4g;^m&VCz;zVQW5M9-x3J;nlmfMjaFD0~FL%8-aVFZrRhi435c8p)LnzjP2)R%4X^V0fjJWXa}i? z>Y@d+J8FbcUZUC&s8^JWFAB2)9Db|_{wKAA&YJp$g8XYSt(&_1X@d;*OqQoP70&{o zx7&j>87KmI0qDSQzhYsZhd`!^U>Bd=gM4}!yaNV}$9yF7^ z<{wjpm>$uGa1x|KDG8=Ny0}Ddn%;^QM}86i>XfM_&K65x@mQ%p@e><2890(|iOtIP z{KsD1^0CR)@7kfu?Xj$-8ehzxf=al-tvk8Q!J{L0?#7MGrt+y`8tbBY3DH0k%HBya zir**0dH~7Dk$mP8Zg6Yv22`$8YTI$}R&`wumkmd_ZJy|~%mPpyL>)Jyjop4eI+FA8 zyg6UrAG4P`z+PfN@UsUH7iYUw%${-99FH^VEYC5SUe2*?c#G$giAgkGfLUKka?J58 z;@!~)-Q_t_iRSK$CC+a`vPrlT0)*^MgajH)J0`t6Ju8yxg;_83xs-V2%fCy_Hy5*1 zAUcOznlM2ncD@qjm2!Po&3b93LH_8!9-Sx;#^R#oTCwT1%u2j0YBh1>c3m3R9q#@D z<%w?$&+D9n{lELNHuL0MVuHJO>-F3>j3Bi}Py0=f7fZ>R-}Xp{vYgZglb`i+hxAw& z0%;?9Po~sya=D@qs1RZQ4d4l&y2s5HsT8I;QCoGCg;kRBv&x_UjM%t7zRQoj>x=0G zFn_|vuQ1IaDR3k>$-->pNLzo$Ro&!s^R?n~cD-G7`wOleT&Y z?j%85x$5=i?N!cp*7g0>yINu4u^gD7vpkIJSVc}joT4VK{{Qd=RO8HO4YM5Q)vt`& z+rGv!@8+Uq-O5;$T+M6rw#T^j7AK*NtVjo~-qlAj)=0IZ z6tg7ZQ9P@5cpXS_lreN{^jS8P24@3w#ea(b<5?-Dejxj`E;9n1wTpA=5Eou703(jy@D?o{#fs6o zzj9YmU{o^ZZ$1S%1!^W~Zx#Ivu>!rgsEc0ByFFI{Mh4p9#6z4e7`jNs#f6aSBT!NP ze)o3$`q|`S{KPNzfxK=lL$8MPkof#x+m6tza&U1%-D`2QxdWb^8KOON{34gL@LUZ6 zy_ea&k3{5INk4(UkO9cK-f}wu{f*$*#GE(rn7W4iwbW5gJX$2x$?q_BS_~7RvV64x z)h%D|Twr`|+`jESF=vrbUGBX3X_cwmDs}e9#i__g7qzulX(8!b#qOIXPC+UYcU%JC zXS2g~s!^UleAJH9)x=|r5MQB`5Uem*^eRj^=>3vrLaJ#{Um4m3Y~o8%T>)vq1WN1h zYdk@{sRtRmbnxp^jQ{i`0}_xCyj3*w_lH9m*6F>a#4pG%e(jgbZAr5UL8X=m%EfTq z6w5gvMR$%&J$29RSvKg?N*=wFlE2=fwO-Dey4X1yzO~F%2NgKh=jfe0jC+(#EvY81 z0RO}HZF{5tm@EGc#dtInFDlGtr$TeTqG^92qsyOCHLOQu0IR(q9-FA`DAS%r?#TP! zrSH-AEsqQ~&=)A>2PHe8k*3eWCJ9h0a=)Ywmt9LHf9zyG?t1u0+*R|_d5HgqEW625lE2>)#eW*BP*H{KsC+~q z?Wx|34YV!dcJY>cWwA4Gi)V(_Ji8?L-ENLcAXihUeFPirbi6l#ztELo3#$sJ|Fjx; z9K(a1P0$(kX|n6;qiP=22g@1vTQAUIz#Kr;J36jk^GDozxkYNbgtshpa9ucWwr|&e zqVr7X`P&e55M$$JZdAF+x6g~*pMmZbt{_Xx2Xc2*cAvyVQsHKqz4;5fZ{@&*aQPs( zAIZ31-dbohs=|k*_%#MU;xgi8MD5Q%RZz+^Jdzj9nlOV&5XFIm1@>%jC&5S_IssDn zB0>G#7N>*lVN;Ra0!{!3)CE5^r?unz&_y*Olv|w+V{-U@U?|oSV-8!TWLJtkaX2Pv z^Zt3YxO}*MwjAKTP+hg2`XpnodwUVH)N1D{1+{7HA+#^n5s2C}h$=?ufeb6W<-obm z$w7VaIvL`7BlP=vC_ij>@(mWarW@#oN{D^m+=Jfk^aiTVt7w~L0|GNyyFgCx4`)F{ zRSp}x(0$nW-+q0IW4Zj!1GSvq(NjN)5OXAkFnlL;rGD>A3FYV$Q1-!hGw0)vdI5dS zk)Z3nWhbd>o35axrc|e}IU?*$k|6e8nFwzZ4Th_w!nhc$BZlk&q(43(hL4Fhb@dai z&j(khImCXL9wIMg*lRw=9AFaTzNfP`LqYZVw>Qi66@Dw5F;8;iS$H9S6Io)FexEhG zit#vK>^z#Mr0XmV*na1q0u#p;>txD5WxcDv{ziUhx;G`)|J4F6KZZZW zDB02Y;F2g+m}%#IGw$`y7E=i4F4s#qt6P{2rUi4|adbi3v~OqO92WE_lqu3{r&_Qx zGe23DYwCrIqR5_r@+D#2V2+h(*op9?Td3*#i#frd_SrBDifCWpSnRdG z4KQ*XyJxm5bWa5yY&Y%EekKH zgZfh*q-~dZPVGu86!jg21c8b$@Sy?6g4rvpAPO8)>jDn9B5lQ`IIqWg>S&jD7`nHHLq7W#yIF zK+4-P%1jIV6{r2BhslACm`VDmnd((TCEdTIHCry0q1_a|M5DbcBVPrUfv(Fce2vPdyp~@$HjOWqW@h zC*FcAi6E0mx?_vEF+m-p{Y+0X&FdG3g)T8@wkRW`S{Vkg4u;nR8HCwyV!aDXw^XN4 zfhgaQ&og$_`~ZNGUsvoAoQ^!JKidI%L9XLAe#-Eus;BBU_~Fr7b5i)QkblG3r(X=Zvb!AwqS_D9 zQ>~ezriF0v#*e@VezEVJ@ZEs_0a$<9pfGG)1>82DoMw@5-7srn=yy3^|J|>Xt^D01 zo5nZ%2~h-}_Z;crQKd1q_&zGKHXI`S$wYP}5?PQW?|D1(q?niQ-k;(25=It$r0t1_ zsF}w4cu^0|GD!)FHq>}R31f$oncfCQDzpyuKLx+X25)h~D3;EsI3B>o-h~f2&c{1E ztG0F>yb1VC;}6i?$P*Tp0GD(c%f$D;>Eo$-StuaS*8Zb0=X%Ae`>|y=fK^ci?pOV>-@JVLzo;MzYZc!5UccS%Kw}hTB zMKhC$hri-zxw`LHES%d?U#%y0KXA)f0`a&r5R{f2}INRPtB(y zmrF%59me8|9&NMWtc$UP;qQK3{{d&K$N+gVjSTsj^RoTv1THT5qYbraZ4hTKG`^r+ z*36l2PUjS)@t8~f}NGiWJ5*4#}?{7chw3c~mTod=pl1UF- zA0n#48z!iG`Lo}T;c*S=R#jMKADC#;lvj9t@MV=zw07Mxavw6jYs%}%_T@lRE?`|8 z+V#)6m>qE{@uL~me!V_Y=KpV{N7q@x(p}B0L$9+PHuRiS9i(lZV$jx4GfO)pnk<-zyL&a+;7HVFGAK?>yl2qKqAY$zLJ@f+ku9j)F%il0f8ijQW-LnSO)EI5Wo z)rWj`cG~B=d%SOcCkx1ep5$snGBiECvq{1ca@JA)6z{J?vh;HrmpsE zX%j3&F#7YIq<2iG65+1@PPq$O64L9HkJ@lD%#mrUVHM0CcR|3(E*)AJbzzZs+$4Pyg z`{~}U90xp4DZ=3yewtBL$-B050pDTMZK3`RB)|#UE@-EN(0C})IbFRR<648K)i!xH zr!9^>$qoy55)-4N{q6NgrC&FD*1J&8SH^bI`JOa_rDinHT0xCB?4)9XuyOV1)7rUb z#;AMLoJ<&JgG6((FggKkFIVB1vvH;UlE8Km@{SB0A%OtMb$p8Vl?8pGn%1^LIQ&qa z)0{=iW>@dmcG2U0{EPe}^5+1_~G z{r4RG2rKCo_>yDL-{hTqlF-xN4UQM2U&sHY1<&?1KVWIOKr?~?u?5i8p#Vh^9$CwC zAK1WU{e}1YX*H^MBbWlsQy!i|)Gb>p$NE*T#ZKv{&A-;?FiE%4$I?~J%*;X^RoYub zxk?V>HVNa0VWSc?OAE8inxArL53^6N7@NTrHX!7{lLtP-^D51BKKei?*7d;kaVx@_FfH=IjSQ@ zzJq5me3^j2-r$yw$Fe0JclQ1g3St>95M~doQo!;uy*_Z+CmrpRm6Jm0SQ(8;Zl>lr zx{lfIA8H>BTz^?`dBAiZeqy4kMi$iG5DjiJemejPY0*dHjts)%ey z&Zia)9%5z68^C8|a{Lc_Jm^@?x<2JXZ(W$)?xgBgMuuH~Kbd_i=$PxWA)-K6{6eh; z-1e6Iv1p3_0vkB}Z3~sTK<6(ua|Tw~!q;}0CF?m#GwsErI*p!Bp)0F}F%DE9IZ9~g z84ZS9%n{}N#q{5Ih?~kDt^**lw(zvvxD1c0gU*_Je`u#@&!+%#_>mzYC~rm|Qlh%B zK2CQH#HusOp6?~=#pu(0SdD6=m2F6uh^HJ#uN)Ne+@$znHWnZ>TxaS1M{x^?kf>1> ziv`J@2ANH&t2&P*(ggfp?00`pC|9`N*A8FMGC@ zvetGqEMj;ZYf=M7m#;McO<_#WBO41mlXbZ9t|XUSODxkfs*>P<6yj}%{e=gUpaJ!0 z=nZ4n!BbAe+n#UTB68-BPMJrDw3Oe;W+_gc3liinGLYidDC(fXQOmj)AH9}* zgV0K26jM+>73qyPmV%I5x-u|K=Vr=sz?3uaW0kiiyrSousDH zxLlX5(Z?k7aW!%DU(0XRaerqne|xidMN+m>91;0v|IJ>G3Z$Rdz}mz;h0mxM{A%f` zt;T&OYjwjp#1opzAttTGLH4N8{ZH^M2G?)eG}sv}El3DsoeGTAWwuSsj1d@6jo~BS5nc^HBbz2l z|2)Edq&wWes9dQkB!d1MU&_6?n^HC-iZ2}&%?9J1;oaal>Gm*7eXLmunVO?nz5H=9 z^>8MvdPx&bZ_(~f@FWxshMQS{HljiJ1M2#YzV~-NDVRQS%uTNwSJ#v~uK{$+xuDBU zoRZg(%phH<(!x&{_oB!V0FYfX?~BhjmZSbu=LPN~4%@Bjl7+-f^=|tej%FD zo@$FC-XV@neh!lg&1F7cK#!}tR2l4f8t1yw+8cSPwi*!Lzx@3mSf*O9TMBvW+i})O zAG(Z#EeWrESpl)pjnRk+TG5XxxHr9ST-h6H8&v*`6v|IvA%4mN%0pCk8sNt3WmluW zCGkyhcl5vN!KAt6;vO-@f0$xH&a%1XJ;UPN z*))jxURla<>j}x~n|C$)$!3K|Cn|<)AKNdo-jcDHMQE#-tP0BMNlT_HM@oHtN_YC} zW6V$bOY=G*fhdns>>gg9Y`mwU zcgkL!bWbyZ`gY+nu&%3dM#p0>k%EqO?D^#x7@LyzER|KJg5XUhQ}wH+i< zfv?r#gL|YPr7#oq2ihK*BcrNEubQc3W^`80k#ol-$6bApqsEaMG+Ly{nyYE8&8 zNfGR$49Mb$&ExTg)%fMi>(i1p)L!E2qKbR?(|v<{)yQ*iwry`x{o8%?6FH;}+D^D; zJK2mZF>I`Z%L;mJT!Q#es$iLTBF?t$9OW3Wn@~c69m$l6O`K$*#$ZJ@@f>9L$>!O8 zM8B#rO8h;D312J##znrMNe7R^6~kr&czCnc<%bN}h%+G(Kkf%`(!!ZTieF0tWcVkX zlBe}i6Z3Y@UgoMZ0;=PF*F%9}MD@zAH@QWG{HWQyOQtgx0|VEdMZXcbONWyRSIC~P z$B?MEF25JW6u#5wSZ3zod>hNb_gaDC-UGb_(n{)r?SY2iIrVNO3&DwZM}4?sQ*=TqX4g<$z#ed)bQ*4bGV}!M8_%%8D*X zxXB0V4Lr#IB+i)$41~P&u1hB1y|_5PY}3rT6w~l8JxOs)Al2y&vO0T;%X`3$V8++Lcwug@7Jw$62Z~ z;H#|2?pB#DNu73?NpdK%m%0N)&H7YlD=J~#3kONc1qzl0ECP`DS6ru~9So@o_M?I^ zf}cg}r#q!n?UMgcFJJEiOIcermM6=|J~y!?XWv11SRWF$Pf z|FEN0RJerQs&xPm^l=bOt5hKOGq8A1w(!5R*V!Dnp+ zfIa7<=f&nPlyJl9T8dsTfng2V&O%zp?!e^XMpE8_eNj#_RuJC%V_cP}6K@kyb$gAN zA~kXBZ#M?P4|qU^$R8(8VuzHZ9~he6nat#8Sc4%CRCZ^LOi!jWo|d0 z$Worwd>7fF^)QsfQ#htS?$hxi2&BPdq{jt}4+#nsi0WdU#`hxdi|?>Y6s+i2fJ$c(zIqBnDVIA|n(q?`j}p1#cTwj0$6?P5 zXEJr(aGso*UDF{i7ooq5tLM(_va27W$+0`$|BO6big)J{4)XUJWi(56RuuTnm0rgZ zsf2?lOKhiS9n^k(c>06ez`laLj3wfzX<62(ZXM0y@1kZbPIPo0*g?{{C)i+Sef!kl z_&IvpJ>YzhAT#(unRuHo81r|~S^Sn?Ckzt2infuT58$q!%BF@ZP{$T4XusPu6Zp%3 zfLeAdEm6$>1JTH0t3ikp<{$ud@UV%BZUBG{E>l6q~W%y;>UB4tiW71(%$)ql} zjVH!}#BmHZFI`JL-bnyTHH~b9f0w|0#J=zZ_X>AmpBv$sjctDTecs=Ap(_c@91xx~ z6F);t7wb|_Glk>(1#5rGaqpj4!b~MZ1?2T_#WZ{>38p1bC0EI= zB4-Oj2Cu7x7%S>zJ6Sk90!K(a%;}e&?>|ps03)7 zUN*i3MV@!4QC_#-eBQ3<+0?gClqZ!&aXq20!hw(YgtxaT7$!dG&1L<_zj$1ow`f+E zq||n`o@aL*73$^%HZ(yKkfp)U&XrU8oBebYrQPXVA-^%A$a?ppKQNEAkQ#( z6W#1P4dxeDvDYAC`eSWu3FosAnV4bbm3V;K>T;`|TaBOg+E^JN@wl#1Xv^LVTqn!LODO3o51jtl@(A#?Gq%xZKU*VmVzCf$&VX z-_$JQ@3$+NpVTD@k0f8#d{R3(NftgJ#?}xTK^exnWIBy|QOAlb3%~yoE6e8WEtiyG86#HPX)f z>HApD*W%aadBcv?f-f%0H?s2Ho^WN~xhWh5%Ymu4@@_y5NqqgZK?AS`KJ@S1Zc+*BBm6Kq{@NsP z>$Q6siwx_IbBjUXiTpFn99{)5854w%#TxXOFJD}8Msy{>hh+Yf*8@EKqdGhsN6&vi zXGtJ4$o;lHED6#&r~qECGWHpDWN;lQs#1Xsqe7%*^d^iZh(@y}AnTWjeqe$w>YrX% z4^G#o?#2=LCdFUS8HC=T|MY_vY~3v6ll8E`R0=uyjiJ}<(2)NpL$E3;C^zs_|HvKb zx3~jr{dYOlQ=M=mg@U5!E%mjE-VR6%@Faws_tTJ>8uT*5^nm+9?3pNln-jU0(Y!`3 zf^0V$nG0S^>;q-Ew}_MOQdM9MY$0H(TpIvO9AS8%b{WV@X~nj>>V?Z5%$ssG6n&;6 zSd~OIY10Ak{qWa-+iC(IYGsvBrVJGfmaFVGEt5g>do5q{Rr6OOoSrh5FPk_bI>vxq zeidMaOb(-f6&cy8VMqEo=GPCqze*W+{$19DtCH7ih8P>&|tW z3-BZX7h$ONMnO&PJz;}Z9Wvg}g9bKfa41J@dLdbqRlbjzE*bqsG{ehnwl=<=nV}2c z9YaYD;{3A~=P01iLE{rfd`b_o{}2D;kN%`n=+wZ(4oFu=f3r%DJ@`u1AS zR()+j%SgahuwYYL4O*k%q@?{_X(_T}JE89f!72^_5x4W+_IK>4aL;QNL9+1FwjbcoCv0gd;6*COkFyCjs56 z!q!GK#Sd4PVH$k{QC2vWYQsNz`n5J=r&WyBg3D^?^{$zOwe<}GnOIxQuMcj=$;H$; zJe3sn-d{)4-Vzx}^S%yEQPAu1+h{Use6RCiNWg5ScJ8m;Fq6qFrpPS&-n&8nNR=#s z>fqq(9hGwMEfav#xat#Qi z|H}->qCEQIdn0 z3ADXepFHhJC3AEI%~-JuWcM3Rt_yiKP(oAA9jI^Ea6$icc1e|n7N)A8wDGAF^`FGy z5e1zWh6ytxGK1t|c7bND+odE!UT*ysP0-nRJ5TJpv;&q1K-z`MCZo?FEx+#BgZbwm zdG8eq9LBttuRopBSKhh(%hosEFK~A-Jno~n1l7{?5xs{AG?Im74A3VH49vlw?+z)E z9erqpMt!Xsh(#0>Lr5f=U$Y^^CJK4iT#Uf?^#+7Hfc&GE$PCpjLS+q@8f$3$Rq`(IPrYD$qYG-AxACZ%07*e_z%yV9v?Ph59!z0;s;b@#ItUyH|4 z3X$Buj+}Dm>W=)$sZANhtrfBNliVR>>7JjF#De{Z9UC&@*q^Sw_rcftT%0K>uA_9} zKkK;DJ%{+ywMt9&P~lb}QvO8|GVY%Itn>vcPdGJYE!^XGqFKx4l1@-68asU6__8V)ms=JF~rpiFHQY&{27+i68C}hCWoOaD(4D!|FyI7D?~fh%<-;3E8Z) zzuT$ig9b3sLU7X~CuX7tgWQG}3#}Gb#4Y01*)*u9uvaPP%(WH2s1o;MSuuCdCo8{% zo+kMNWgqN9>T$6(ty^(JB$1K6bD`l3Gp29IW=PW!D7&;V6RL&FA7voTyq|{Yr z3o{@4x}5Il>-i@?O$VSE6X5eQy})`qpoq0yc~xUr;&^)+7To4D@+`JLtK7T|%!)gq zue`I^mZ80D0lXq2@9;Y05-w=MkLc<95g`h6&CIE&d*4rd6Ea2UkdLsYjgAh=dN#Yt z62$qWI!Pu3Bz9d+f{Hi_B`j%@r0rM*s*7c)_#AP%h<2`UG!2gS>g8?gXnCgst(Pbg zTq`H<`+J2$-UnRoFe#CFtbI#XG!DC^%uPil_V#7bpoiqU3f0H^+vfMX)F|-||7LV-}M!tIw z3z0tR6byg=iaSzq0G0yuN!Y7{7lZYxPEXzrD=qOs@Z~TwfR7m2^O5B#)eBYq>D{qn zdD)AHw~logHbL?El>zGy)p8aAzSgwuC=N|^BF?CWaQNXzoM=TOL+Ys0x3rIjR zKTfzSXW5Sl5$3W-xk@T39hvW-_xZ?RqR|6=4MBwQ16$t+V}4s#oSD6`>T)xJ%8L@h zemW-44)X6XcSt3B23h^AQsY>?wlwx1$Q=*WO~F&1hS>ZZ;Ke}e&~?ozawbO%OHEf} z3Hx_40vxq$Akp^415g$`+OaqF_{j-2H}&(ZYysy3ywytLZlipzpJ4Jg(X$*pUm#fg zBC9Y9>&Pe~NI#aEr07tv<_(3JdZIdm46?tjRT%sOJSY)Gv~+O^5ry(uyws1B=uMB> zPKAG}7FCP>ZEmAknA~X_wh;LkEQtq1*vn+!UTavAi#22~?8RF1%Lduia>5s+=@$#Q-05^+Zb^M^>Orp`c@eFI`t%8 z#L9+SeM6_DK4_wl=E>S}(E7{jnX>Hq|>uJH8As z+64Iv+#+}NQzcr%HcOOATIIDw*OJJk&{GfkUTWWBE zDoKRamg=80x%xbyk&%W+SAQB;hr?mxv{f-bnQJN1V`n&pGeBRQ`IHhj-#h>h0Iurl zuO~9Svan6CVPT2t6Q8FpuWCFiSOQPVp`L-CzdHrW)u$S{Lu}(go1eGV;!dZCq>AEi{Ox9@5)=f=+ctAU;qS z|EL1yq~Ql2FE2U00~EXkKzb-n-=&W@Ub?;!|e>O1##k8Q^V4 zGMNjmn#x0%^!Iu`dgkdLjUjdst_Y!;Ozka=I>`6Nj}{W}h{UM;UkgO#aoJLTzQmR< zc|HK-?2?tZmi!PX*)%YB-6MGKn{^De9F4U3S_elE{)>IuSnGY*6Wg16E(M!(_!U%9MJ!%YOSFM%7zK`g zP{&7=6Y*BSB9z1jqk$}x6S1`>|M zf5H%*&&1uP-fm#n4O()sPDMwmqz5bJ7@{O;ss?hyKl$(Ho&uv~q*O1GL6InglcPgQ z)8%a!?C^Tc1Va`^`!M<>sg~99UdKvK-KlsXq4F;UNTCx#v$nPRzCON_^{BmrSf>OK zULz(2gNT9wAg{{)>lK&KefdSa<^2&r@Fh318G(uX_;_MOry@Smn3^>H+gtdEf9+v! zAGo6KHM<5;ssbnH9C0KF&}Tk<1re9uI8=;AOF8&JBAb^Va zarxjkACy0pifr5yGAqeVa3EM+Z1#;?vWzLvg1$njuPx~9{!cqzaPF&oD1uZqmyGoC zp|Hav=r0RtPVrV>KBg z0v2?yCN;FcQ-=ISG+a-b_PkPk15J$m_ZD>rU5$L#D^aI&)Q^hyTh3O>#1|v=8@bnH zN*KW3ZUhSE1pbG|3liL=a>~6auMZD+=Tlqa0ZSjFSKW(#2EH>tXAr7IV#|o*B9R%1M?m^n<&5~AlCHWB8Fp12ynXR-FO6exz9hG`MC|`lzj8uk{;-5 z;nWR+5XbKAYs@mzj-Pa+_13d;(EEerBKMe=rKWsIo_#GXPk@cX8a|+k4C^++6K~=;g~fKk^NkM*>*!c#+xHuAt%i=W0gd zIn<6Sr^iX#L-EF3KqnKV^5m-o(dnzV`;j9?yOkGl;_-HbS$ypu8X~^(jVb?OOX2I{ zyL9QS<7WF93rvw~wr-#{NaSPCOLOQ;V&uN?oLNf7!JQiO&gX`JENj<2>q~OUHno{- zaP89d{$6?Nb-=%_rgTT~QsLo$RwsK&*noI#;Zr?FMBDq@bzKi1I+Ov(F`54R)+v9? z;@&&jEC`MWLmB>N{0ALW%ZUww+_|jVkq3oaiJWHGd}AR+ZgJYQYSz;9uOccg4|N?h zt+p;O9hB=Cq>--6O*zvQzFxDRS-_Rz9CRcAoMO_I0*(@=11AX^Kkao_eg0mf9WZ6H z?eBX$rU&Ma=pY@9{N8r6RJMMaFU>Ir+ogYO6|!-7pS^0@Zm?h`S_&+czxj5^)$FkM z;}mlWcl#S->!;N7PZ#kZR5HfiYHN4yjos4mxH|X|iFsdJCg8C+VHDODfm<$0WDbv- zr<|3bPihVqy`(3apH_l2h`jb@Tt6B=Pa~iBwcN%`f zDe-IOqFDuXB4V!*=L%lQB@1d5Q6d$V#v=B0hQupj{6=u%Il-l}yWCM7A}H@6?hdz4 zipK70h$>FDkt-R}Dd+JM(H{hW8R(>8WKR!ZhbSx)+e32=PTQor-ZW#eipvKa`cSE^ zpv))M;NxPou3aCi8XPV&7{W#0Wz)#RC2BFqL*G`4Sfg^;1oH>YeC!@HQE^pv2?^zG z{USF)57ZpfoN(YTebaKNQCv?`oQT=Ib7ocf7uQ@9>p$oBND^vUoZA zSJSbbq4*(eolSqXFo*ZU^2b^V72mHHS9~Nz&{jb3KAvtQPiO>Knq(B)24;9W&8Bk9 z>S^O2@ZFuiU}0`PAF94v>jQ(=-C7?IM*vF{JH_3>GY9)@u>QJ=E{Q9|(YJQ5u2`pH zBLvuL(pxrTmD;cYaOq@s@zTb+NF2K-|HGQAglFb)tt%)RMQtyyyYZu0^-VUvL z6pBqk5jgaX_ z=OjcZGXNTswqu2^%KIK#?;OG1zD~YHpeI$S5%^M2=S;$y(t#1}lEGXu(_NyxchO(s z)_iJ19cxw6{g%cJ|JD8ppvk?$IStbiAUhJowowm7Vq>-?EF^i^88XE&nPuo^Glve( zglggctDN)vhO=$gcz6)KMRZYuCwgxYB}zm~qK%OtGbFlb5uIQ#(IrGDh!O^aAVeEt zCVHZb-l9dcgy<3b8t=2;o%aveANJa7Sw75~dvRa)eU# zj)|*bD@l%lv2i7~oRLr}4j41?DU2&pu3xdg={&vs3N`ezKJq3dDODgCa7lOnSPp}^ z*pkbMRTPveZ)^V9=CBhWjHegFWW%=JjBS>eek&=DroK=MGu_}TdGBa-yHva>v0nF= z#A!s0NA2S;&&w1TYygdC4B8Y(4`~paAhg3p;PnR&^&YD%$fe&U7H3 z`bq12BFA?aoHzc8IcOLS2!GWZ0R%taQ`|;CTI6BcV3T2}57qVbuLJKulf&iXTP5E% z<(RcM+5QWA6Q~9vn}_68)lV85_ikYt>8#6J{AcWy5Fm!e1mFQGI|dD*&*dPkDpwB| zr8!Fq4i^932ac=HYQH-0W@U%3lmtAr_7?f=fl~L9Owf`HIRzi#h=K>2vWvmAbw_Pd z#B8)JY=}51Yw$vd*l^A^HYtZJ)_NFY<5C4kBKDf#+8JQ5nR0%KNXN`)K8BoKhv;})| z8gyK1d66Q~2Rdh&{5T4X`|Z^#VR@&0*_tI$IQNmL7I7(acz>QC^X7 zUpzSU$**M3RsrTdTzgmdV@9fyVSP)rDwNeDourWeqDMi!;wDZ<1 z-0aAt12p4~FQK{HIJ+M6Y2;y`qPRT!)=9dfABO{5_H`R0>*G|Vc%6>Ndg_wjYg1C< zw=h|z_BTM4!k*Q<96r=iK4)U7jK@K+njftLzRL);?-#C@ONqA|#q56shIT7zGCg?h z$ezMhxOt%$j;mGfc*BeZUn{CS1*Z0$5t|0ikY8rNU){LVe=UAH_{yzkoJlH5@R8hz zCZNdaQc%Y6RkzuZ#fX$yC1}K%xD=Bh-VU^@&bq_mT#lDtNMpwf<%# zx{BsRh2RNY_Imn*>4md+T#Q|ZoBggbWadZ0#ebbT*}}JrOzP9~B6fUG14YP`s!>6& zW%n5(eF8e8IY#1^jOR4)6*tS_6+gP%<-oo(A)>kx|-JK4}9*?x=*X^_$ ziiL}k=|u`k@4KTvY%1A4wWW_QHd;xT3*tE6V>BT_@=AU+IkbS3cEu=QA+fn`w}ELs z7*|0jDK5R@ToU8FX^H|bq`r0aX79T!ecr+XyS^{u!n|eNuEGyg;JcFr;0U(TDoa}9 zgn2)6vE`N%W3&mM&V$>x(x@(1t}tLlz^*#iywCYFwo`hDHJ(NEancmRgt(oIEN0dy zrVu1sS2wDA*nX0FmeY64@j%z_xfYV&B4RDJU=V{M=lyVBg!xMYryBU=pu;tMADe_m zz&V6bwt}B%&2J|@BuxbP%Q~emp}P%yrOBY_Uc5%Oa=nMekj!xl>Wtpxr;;}|-*5Si z)JKFrH_UYL?_#o_;fc5 zY{Hmy!&rbU!vSeTPN7kFizKf1uCETZyxz8LD_WY9+?2xNF9ZrxJ-9Ae?;d)G7)TINsLCgoRT@qwl7KuLFQ+Y-*H-I za!bi25q%V!Qhbi|4UzVorvauPC^Qdd(jq+jZg6lp*Wu_Z*tP_j6QqJVYOEv}Rg$KB z`!!&NW()hf?Y%kOVlD_BrTp*jrKxkkYyR-&ww(ZVyR6+@(6fk?yWEU&kecO;s7%+_ zR|IS&&J)t~eq1gmLuwnJCjC|po7XDduc*!R*q6G6&|V9upE*+B0;#!gt^jz_3<4@Tj(BrxKyy|bH&@plV7rMm1i6cBUwASaDS7C8Eta|}`3 zbHg}Q+5u~^UvBG~4Ad9dn-&=+wB0R4drQxSYv2{}>Ibei#F+1a3oURuR*owyHDqqv zhaaY5yhS;Mr2-7D$w>Bp@r$N0rKD-EQtdHT5&vT07GU{vU9Pd$UbQB{kGrwdW6r8t zb<}L5W}smt1+_*X;T3GLr|iu&lK+QG=hOS z5;=xxizKDvONXVo@p#B4s#2Y@oYG(wBImE#(Zs9d zj@-D=?uHPhT>snSB+b@+Lvp4=>H}-NX1F_Q;E9z`D2Y{PBc0Vq>&ay3h#8H22Ti83 zzIYTtX#LjhlH(WYb47(#OClQ}UCQSHK>Vs%GL%yMmCfW}bk!u+>&{sM0IeYxHN{b9&R2K9ESH8h^5P z4;G9s5QptfmZ|J>21Pn#f_s5}78rswQjlejY0e#C_!`R}&lVwm2N*jz-{5*ar47e$ zW^lnU!rFJjG_qw-S1QtwdDqj%O&ZLAl?8vOjk?KOQyu4ti47z)b_b?WyNuOZY-r-9rdHj|q))DI0R2R1 z$@?anD@`foTVe@3RlL7&OzwbxjMJ%ez1GOO p2+ZT4R2m<%1iOy>H{>eyvX&HNe zdxTGB?nWuFo@{^Xj_hDh9ko9GSla2S1hi>TsO(P8szPeBa`}r%WEv3{YbW>(3M4M) zSM*@-uo4*;*i7mbx)NUYjy7U1#{6j;=2)eIJ!irl%JnN{E&K`#))Sfj5STlxY-awJQcZGo~J1B50^o!;;7Ht?nKAno;bbAcVupY_$8eGcG#dG3!iFddvgt;3>8!=8kLtiC^zq{7P*5YUiZ6WFTu{SyUr8(3iH$If)6PT z&fAGgaWTrd|Afwt2>4|;yttFcV^^u-7;udsRPuDs5GIzHd0g&nk|8{(E=GfFMR9K| z{DSD=U}}YVEmR&d+pIBO_WY)&9(r&BYE)(zrX4rplaR*X?Ofsa?vSgH@-qzOocr2~ zDRF*Iikdb|zVT9n+NMD`J7)`!^&w+0Urgw8NT${ByNR)eeu)yLlb>t7do~fGVq^xI z&w*@vaXjOBUgzWC(QTKPUY-+p|ivJjFdM|He!+KQ)X54~(> z+zq)7X3%N}cTUzTUXrD_g=mrBhrxAa?S4E9u$227?NY=#oF3_*(+k~$GCwLN8RB`- z=<5_t2#?J9s5g+T1?Vx74SfnPw1EPi((-;SuM4tuJ0-j?B<7-gmuO-{JNxTN{5u(% zqb|1E`$GsL`BXiL=?}tHZPB0>J7`)RRsu8(StE@B69|t2$0OfZKQVzbf586QeYJ^z z?XF@D6>2)N{@IiMkVg;>bUsh>_zl;Nf0tx`+heWNP%_ci+EpWK2cpw81p#(yZ&7gp z0e66v?|V`Z1sKM6k(;iU+VYK%g*FG8b6v>c$;vWgOH2>*nK-J40=rT9R9^maxx#w%HlQfz5&zmr!6?H9=-$fi%XOrS&5cD+< z$MGyp`Rl7~W_@0lmP3l=-9@G>hMkrB=DXhq94FGZJGhHR$ zz_z9Za3uI6Q4X!qYVDZ=Y~R#%#gDXA6D}Ii)KS_ScIwP}Ac1@O^R{x@eqPMcaVs}B ziIijL^DA-6v`s^xkyP6yxRe|213LY{yG1#^T{!Ql2h$J{s4k{~X0n!aA+NG_>0enw zTqky%lZ4h1Q6CxVD8v7{c7UqR7yS_^N6IWKNu1p1fK?0&h>7#-wUGi5TpF1G;d(3^ z)RP0F4^)dnFWaWQHt8a1&<}GDEK;QMI^zV54%dNH!PB|fePFP8Sce$|ajepoH=Ixo zMONyj$ar~pOEDOj{)SA)IrxSZG-_G80G3Jn4(icqWuM=B4DWj*v~&t1*Gjxl)?{v? zae61GQDFdE}H0Oicfi?rBBh~Io0(vp)j*NCE7A0}Wt6Eo=BsFYho~}9b zeGB0{=ojoyZL{$ivnk>S&R;#xTd*I|_N1A2Zl^P+Jo@5n>FNSWmmWAU&*yyxItW`2 zL<=7}RB=L1^!ya`&n4BLA*yQSgkJNwHT|giJaDMAGUZT*8@xB$_;A;7sZF{`p3R|M zqcAfk{NY@Ba_oDf^>+)a%log%_g$fT1oc&DT9&#u2AbND*fVunSTc*A`gtkl;jfZ? zd63UV)ID(PF0&2_ zF2a;!7W04ASUnHTT`NV0u;b(>du7x(%FcRYoz+!Gnc1<&D|I^EIIQ=y>=*}4|^u$GD?B3i&(h?#l zU}%dHTRmJY#ai}Fy&n!ZV7`0oR(1f?C9|c;vfJr5uyIZqiL^}BQrN{ zg!x{UX)RP7#J#G(-lo?XRD}3SmX=lJ+rbxK^$HtC|C#ZD`F&{j#aAX~j{la^?w;Q;Yem?bLMXR;tI466soJOwDF8Nj67V;iYq9h@2 z*F9%XM7<*96(Xic=@CrE$?dDaz%=%9NSI8Kys9|zB=jPA6pYwXdE$c-T>~+nN2`ac z6r1(#JMv?SNk3r(y_L?bXF`9?c)Bp5d9+N4m@)3suqsn3D)nCqqWNg`PLk-ZR#o3O z`mi5VbkY?>qcIWE%$_$5aUq(fq|9<-4#<@pI?l+5W8+x`mlpReGBY&}WN?_5?F63` zg1()qdSkAnZK`oLn93gnTqb&_5N{tou)%~@4XCd4v2yh*E~Lst3ip|4AT5WTHBrat z-5c?SNSS=@l)t&p>u|ih> z1#68*+*szG+GY0}7RREBw%@mQ-;X@BeR9GPPLr-1`=T|}MK6-_hyb$I5-l=m zzw2=GIlu0I{K&F(M`SVn$4B5BvJe07eY*NT19s$=K7W&7ZAlZqzKQ#`H8!^wl0h6ng`Rwvve_a0m69-Nh>a#m1{QBPnrmI)( bpPr*skreZEaU2Dt`E$D3x3nrWU}66SXj3vP diff --git a/sources/images/tune_llama2_7b_ModelLink_DeepSpeed_compare.png b/sources/images/tune_llama2_7b_MindSpeed-LLM_DeepSpeed_compare.png similarity index 100% rename from sources/images/tune_llama2_7b_ModelLink_DeepSpeed_compare.png rename to sources/images/tune_llama2_7b_MindSpeed-LLM_DeepSpeed_compare.png diff --git a/sources/images/tune_qwen_7b_ModelLink_DeepSpeed_compare.png b/sources/images/tune_qwen_7b_MindSpeed-LLM_DeepSpeed_compare.png similarity index 100% rename from sources/images/tune_qwen_7b_ModelLink_DeepSpeed_compare.png rename to sources/images/tune_qwen_7b_MindSpeed-LLM_DeepSpeed_compare.png diff --git a/tests/README.md b/tests/README.md index 257e2de39..17258ef91 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,4 +1,4 @@ -## ModelLink 测试用例贡献说明 +## MindSpeed-LLM 测试用例贡献说明 ### 门禁看护列表 diff --git a/tests/pipeline/baichuan2-13B/test_generation.py b/tests/pipeline/baichuan2-13B/test_generation.py index 7c5ff7690..6b8f63982 100644 --- a/tests/pipeline/baichuan2-13B/test_generation.py +++ b/tests/pipeline/baichuan2-13B/test_generation.py @@ -12,7 +12,7 @@ from tests.test_tools.utils import build_args, create_testconfig, setup_logger from ut.inference.test_inference import acquire_context -PATTERN = r"ModelLink:\n(.*)" +PATTERN = r"MindSpeed-LLM:\n(.*)" class TestInference(DistributedTest): diff --git a/tests/pipeline/bloom-7B/test_generation.py b/tests/pipeline/bloom-7B/test_generation.py index db4816eb4..1c0c62084 100644 --- a/tests/pipeline/bloom-7B/test_generation.py +++ b/tests/pipeline/bloom-7B/test_generation.py @@ -11,7 +11,7 @@ from tests.test_tools.utils import build_args, create_testconfig, setup_logger from ut.inference.test_inference import acquire_context -PATTERN = r"ModelLink:\n(.*)" +PATTERN = r"MindSpeed-LLM:\n(.*)" class TestInference(DistributedTest): diff --git a/tests/pipeline/chatglm3-6B/test_generation.py b/tests/pipeline/chatglm3-6B/test_generation.py index c54d8359b..391dea2d9 100644 --- a/tests/pipeline/chatglm3-6B/test_generation.py +++ b/tests/pipeline/chatglm3-6B/test_generation.py @@ -11,7 +11,7 @@ from tests.test_tools.utils import build_args, create_testconfig, setup_logger from ut.inference.test_inference import acquire_context -PATTERN = r"ModelLink:\n(.*)" +PATTERN = r"MindSpeed-LLM:\n(.*)" class TestInferenceWorldSize2(DistributedTest): diff --git a/tests/pipeline/gemma-7B/test_generation.py b/tests/pipeline/gemma-7B/test_generation.py index c7de50f75..4c3711e1d 100644 --- a/tests/pipeline/gemma-7B/test_generation.py +++ b/tests/pipeline/gemma-7B/test_generation.py @@ -11,7 +11,7 @@ from tests.test_tools.utils import build_args, create_testconfig, setup_logger from ut.inference.test_inference import acquire_context -PATTERN = r"ModelLink:\n(.*)" +PATTERN = r"MindSpeed-LLM:\n(.*)" class TestInference(DistributedTest): diff --git a/tests/pipeline/qwen15-7B/test_generation.py b/tests/pipeline/qwen15-7B/test_generation.py index e1614e603..eb39eb528 100644 --- a/tests/pipeline/qwen15-7B/test_generation.py +++ b/tests/pipeline/qwen15-7B/test_generation.py @@ -11,7 +11,7 @@ from tests.test_tools.utils import build_args, create_testconfig, setup_logger from ut.inference.test_inference import acquire_context -PATTERN = r"ModelLink:\n(.*)" +PATTERN = r"MindSpeed-LLM:\n(.*)" class TestInference(DistributedTest): diff --git a/tests/ut/inference/test_inference.py b/tests/ut/inference/test_inference.py index d6bad984e..ab0136263 100644 --- a/tests/ut/inference/test_inference.py +++ b/tests/ut/inference/test_inference.py @@ -26,13 +26,13 @@ from tests.test_tools.dist_test import DistributedTest from tests.test_tools.utils import build_args, create_testconfig, setup_logger -PATTERN = r"ModelLink:\n(.*)" +PATTERN = r"MindSpeed-LLM:\n(.*)" def acquire_context(log_capture): # Acquire the final score for evaluation tasks, still universal. context_str = log_capture[0] - context_pattern = r"ModelLink:\s*(.*?)(?=\n|$)" + context_pattern = r"MindSpeed-LLM:\s*(.*?)(?=\n|$)" match = re.search(context_pattern, context_str) if match: context = match.group(1) -- Gitee From 01546f7a0be89312d30b833a7740c1bdb64ab8e6 Mon Sep 17 00:00:00 2001 From: wucong Date: Wed, 11 Dec 2024 14:50:40 +0000 Subject: [PATCH 23/32] =?UTF-8?q?!2015=20=E4=BF=AE=E5=A4=8Dreadme=20->=20R?= =?UTF-8?q?C3=20Merge=20pull=20request=20!2015=20from=20wucong/fix=5Freadm?= =?UTF-8?q?e=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 38 +++++++++++++++++------------ examples/README.md | 61 +++++----------------------------------------- 2 files changed, 28 insertions(+), 71 deletions(-) diff --git a/README.md b/README.md index 9e6e949a8..e299195c8 100644 --- a/README.md +++ b/README.md @@ -16,18 +16,22 @@ MindSpeed-LLM,原仓名ModelLink,旨在为华为 [昇腾芯片](https://www.hiascend.com/) 上提供端到端的大语言模型方案, 包含模型,算法,以及下游任务。 --- +## 用户使用指南 +* MindSpeed-LLM[使用指南](./examples/README.md) -## MindSpeed-LLM大模型方案概览 +* MindSpeed-LLM[预置大模型列表](#预置大模型) + +## ModelLink大模型方案 当前MindSpeed-LLM支撑大模型使用功能: -* 权重转换:[Huggingface与Megatron-LM权重转换](#jump1) 【昇腾】【OCK】【GTS】 +* 权重转换:[Huggingface与Megatron-LM权重转换](#huggingface与megatron-lm权重转换) 【昇腾】【OCK】【GTS】 * 数据集处理:[预训练数据集/指令微调数据集](./examples/README.md) 【NAIE】【昇腾】 -* 分布式预训练:[加速算法/融合算子/并行策略](#jump2)【昇腾】【计算算法部】【计算研究部】 -* 分布式指令微调:[Prompt模板/动态padding/多轮对话](#jump3)【NAIE】【昇腾】 +* 分布式预训练:[加速算法/融合算子/并行策略](#预训练加速算法与融合算子)【昇腾】【计算算法部】【计算研究部】 +* 分布式指令微调:[Prompt模板/动态padding/多轮对话](#分布式指令微调)【NAIE】【昇腾】 * 分布式推理:[流式推理/人机对话](./examples/README.md) 【NAIE】【昇腾】【GTS】 -* 分布式评估:[MMLU/CEVAL/HumanEval/BBH/BoolQ/AGIEval](#jump4)【NAIE】 -* 昇腾工具链:[Profiling采集](#jump5)/[确定性计算](#jump6)/[高可用](#jump7)【昇腾】【计算研究部】 +* 分布式评估:[MMLU/CEVAL/HumanEval/BBH/BoolQ/AGIEval](#大模型benchmark基线评估)【NAIE】 +* 昇腾工具链:[Profiling采集](#基于昇腾芯片采集Profiling数据)/[确定性计算](#基于昇腾芯片的确定性计算功能)/[高可用](#基于昇腾芯片的高可用特性)【昇腾】【计算研究部】 LoRA、DPO、奖励模型、PPO等特性即将上线 @@ -59,7 +63,7 @@ MindSpeed-LLM已发布版本维护策略: ## 配套版本与支持模型 -【版本配套环境】 +### 【版本配套环境】
@@ -102,7 +106,7 @@ MindSpeed-LLM已发布版本维护策略:
-【预训练集群性能与线性度】 +### 【预训练集群性能与线性度】 MindSpeed-LLM 通过模型并行与数据并行来训练大语言模型,为了演示如何使用多个昇腾芯片和模型大小进行扩展性训练,我们使用 `GPT3-175B` 稠密大模型,从128颗 NPU 扩展到 7968颗 NPU 进行实验,下图是实验数据:

@@ -110,6 +114,8 @@ MindSpeed-LLM 通过模型并行与数据并行来训练大语言模型,为了 图中呈现了对应集群规模下的 `MFU` 值与集群整体的 `线性度`情况. 计算公式已经放到社区,点击链接可进行参考:[MFU计算公式](https://gitee.com/ascend/MindSpeed-LLM/wikis/%E6%9C%AF%E8%AF%AD%E5%AE%9A%E4%B9%89/%E5%A4%A7%E6%A8%A1%E5%9E%8B%20MFU%20%E8%AE%A1%E7%AE%97%E5%85%AC%E5%BC%8F),[线性度计算公式](https://gitee.com/ascend/MindSpeed-LLM/wikis/%E6%9C%AF%E8%AF%AD%E5%AE%9A%E4%B9%89/%E7%BA%BF%E6%80%A7%E5%BA%A6%E5%85%AC%E5%BC%8F) +### 【预置大模型】 + 下述列表中支持的模型,我们在[examples/README.md](./examples/README.md)中提供了相应的使用说明,里面有详细的模型训练、推理、评估流程 `参数`列中的超链接指向模型的预训练文件下载地址,`模型`列中的超链接指向更多的社区资源地址,包括Chat/Instruct权重等 @@ -120,7 +126,7 @@ MindSpeed-LLM 通过模型并行与数据并行来训练大语言模型,为了 `认证`【Pass】表示经过昇腾官方版本测试的模型,【Test】表示待测试模型 -表中为开启 mc2 特性【内部在研特性】后预训练实测性能,该特性只在24RC2以上版本支持,本仓库代码层面默认关闭,若要使用,请参考[加速算法与融合算子](#jump2)章节 +表中为开启 mc2 特性【内部在研特性】后预训练实测性能,该特性只在24RC2以上版本支持,本仓库代码层面默认关闭,若要使用,请参考[加速算法与融合算子](#预训练加速算法与融合算子)章节 @@ -897,7 +903,7 @@ MindSpeed-LLM 通过模型并行与数据并行来训练大语言模型,为了 --- -## Huggingface与Megatron-LM权重转换 +## Huggingface与Megatron-LM权重转换 MindSpeed-LLM支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权重格式互转,具体功能列表如下: @@ -1078,7 +1084,7 @@ MindSpeed-LLM支持Huggingface、Megatron-Legacy以及Megatron-Core之间的权 --- -## 预训练加速算法与融合算子 +## 预训练加速算法与融合算子 MindSpeed-LLM预训练支持张量并行、流水线并行等多种加速算法和融合算子: @@ -1244,7 +1250,7 @@ MindSpeed-LLM预训练支持张量并行、流水线并行等多种加速算法 --- -## 分布式指令微调 +## 分布式指令微调 MindSpeed-LLM支持指令微调,方案与DeepSpeed统一,在微调效果保持一致的前提下,MindSpeed-LLM可以表现出优异性能 【与DeepSpeed微调Loss对比】 @@ -1456,7 +1462,7 @@ My soul is full and my heart does soep. --- -## 大模型Benchmark基线评估 +## 大模型Benchmark基线评估 MindSpeed-LLM支持大模型在公开基准数据集上进行准确率评估,当前支持的Benchmark如下: @@ -1505,7 +1511,7 @@ MiniCPM-2B | MMLU | 51.6% | [53.4%](https://github.com/OpenBMB/MiniCPM? --- -## 基于昇腾芯片采集Profiling数据 +## 基于昇腾芯片采集Profiling数据 MindSpeed-LLM支持基于昇腾芯片采集profiling数据,以提供对模型运行情况的分析,主要API如下: @@ -1524,7 +1530,7 @@ MindSpeed-LLM支持基于昇腾芯片采集profiling数据,以提供对模型 --- -## 基于昇腾芯片的确定性计算功能 +## 基于昇腾芯片的确定性计算功能 昇腾芯片默认采用了不确定计算加速模型训练,有时为了重复实验与对比实验需要确定性的计算结果,MindSpeed-LLM使能确定性计算的开关如下: - 启动命令中加入开关 @@ -1539,7 +1545,7 @@ export HCCL_DETERMINISTIC=True --- -## 基于昇腾芯片的高可用特性 +## 基于昇腾芯片的高可用特性 分布式优化器的思想是通过将优化器状态均匀地分布在数据并行组中来节省内存。基于该思想,设计了将数据并行组切分成两个副本数据并行组的方案,副本优化器将优化器状态均匀分布在副本数据并行组,实现优化器状态均有备份。结合华为自研的高可用框架,可实现训练过程中,支持故障场景保存临终checkpoint,训练结果0损失。 diff --git a/examples/README.md b/examples/README.md index d6674e5df..96fa8167f 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,38 +3,6 @@

-## 目录 - - -- [环境安装](#jump1) - * [仓库拉取](#jump1.1) - * [环境搭建](#jump1.2) -- [权重下载及转换](#jump2) - * [权重下载](#jump2.1) - * [权重转换](#jump2.2) -- [数据集准备及处理](#jump3) - * [数据集下载](#jump3.1) - * [数据集处理](#jump3.2) - * [数据集合并](#jump3.3) -- [大模型分布式预训练](#jump4) - * [准备工作](#jump4.1) - * [配置参数](#jump4.2) - * [启动预训练](#jump4.3) -- [大模型分布式指令微调](#jump5) - * [准备工作](#jump5.1) - * [配置微调参数](#jump5.2) - * [启动全参微调](#jump5.3) -- [大模型分布式推理](#jump6) - * [Generate:流式推理](#jump6.1) - * [Chat:指令微调后chat对话](#jump6.2) -- [大模型分布式评估](#jump7) - * [基准评估](#jump7.1) - * [指令微调评估](#jump7.2) - * [LoRA权重评估](#jump7.3) -- [社区BUG列表](#jump8) - ---- - ## 环境安装 【模型开发时推荐使用配套的环境版本】 @@ -162,20 +130,18 @@ cd ../../ ``` #### 2. 权重转换 - +在`example`目录下每个模型都已经预置好权重转换脚本,可以根据需要来进行修改 ##### 2.1 Huggingface权重转换到Megatron-LM格式 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh python convert_ckpt.py \ --model-type GPT \ --load-model-type hf \ --save-model-type mg \ - --target-tensor-parallel-size 2 \ - --target-pipeline-parallel-size 4 \ - --num-layer-list 8,8,8,8 \ + --target-tensor-parallel-size 1 \ + --target-pipeline-parallel-size 2 \ + --num-layer-list 16,16 \ --model-type-hf llama2 \ --load-dir ./model_from_hf/llama-2-7b-hf/ \ --save-dir ./model_weights/llama-2-7b-legacy/ \ @@ -237,8 +203,7 @@ bash examples/mcore/llama2/ckpt_convert_llama2_hf2mcore.sh ##### 2.2 Megatron-LM权重转换到Huggingface格式 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh +# 转换到Huggingface格式时,`target-tensor-parallel-size`与`target-pipeline-parallel-size`均需设为1 python convert_ckpt.py \ --model-type GPT \ @@ -273,8 +238,6 @@ bash examples/mcore/llama2/ckpt_convert_llama2_mcore2hf.sh ##### 2.3 Megatron-LM格式权重互转 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh # legacy转legacy python convert_ckpt.py \ @@ -360,8 +323,6 @@ mcore转legacy时设置此参数以指定保存权重格式为legacy 【合并后转换为Megatron-Legacy权重】 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh python convert_ckpt.py \ --model-type GPT \ @@ -385,8 +346,6 @@ bash examples/llama2/ckpt_convert_llama2_legacy2legacy_lora.sh 【合并后转换为Huggingface权重】 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh python convert_ckpt.py \ --model-type GPT \ @@ -439,12 +398,10 @@ cd .. ``` #### 2. 数据集处理 - +在`example`目录下每个模型都已经预置好数据集预处理脚本,可以根据需要来进行修改 ##### 2.1 预训练数据集处理方法 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./dataset python ./preprocess_data.py \ @@ -518,8 +475,6 @@ cd .. 在指令监督微调时,instruction 列对应的内容会与 input 列对应的内容拼接后作为人类指令,即人类指令为 instruction\ninput。而 output 列对应的内容为模型回答。如果指定了history,则会将历史对话内容也加入进来。如果指定system 列,则对应的内容将被作为系统提示词。 ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset python ./preprocess_data.py \ @@ -629,8 +584,6 @@ cd .. ``` Sharegpt格式数据预处理脚本: ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset python ./preprocess_data.py \ @@ -681,8 +634,6 @@ OpenAI格式示例: OpenAI格式数据预处理脚本: ```shell -# 请按照您的真实环境修改 set_env.sh 路径 -source /usr/local/Ascend/ascend-toolkit/set_env.sh mkdir ./finetune_dataset python ./preprocess_data.py \ -- Gitee From b5bf05399b88761d731ac0736c01972f896c11d0 Mon Sep 17 00:00:00 2001 From: wucong Date: Wed, 11 Dec 2024 15:29:07 +0000 Subject: [PATCH 24/32] =?UTF-8?q?!2016=20=E4=BF=AE=E5=A4=8Dreame2->=20RC3?= =?UTF-8?q?=20Merge=20pull=20request=20!2016=20from=20wucong/fix=5Freadme2?= =?UTF-8?q?=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e299195c8..1e19470b1 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,8 @@ MindSpeed-LLM,原仓名ModelLink,旨在为华为 [昇腾芯片](https://www. * MindSpeed-LLM[预置大模型列表](#预置大模型) -## ModelLink大模型方案 +--- +## MindSpeed-LLM大模型方案 当前MindSpeed-LLM支撑大模型使用功能: -- Gitee From 3f0a58fca49824a65377d4b04dbc5728515fb4f1 Mon Sep 17 00:00:00 2001 From: jzh Date: Mon, 23 Dec 2024 08:59:45 +0000 Subject: [PATCH 25/32] =?UTF-8?q?!2022=20=E9=95=9C=E5=83=8F=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=20Merge=20pull=20request=20!2022=20from=20jzh/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + docs/DOCKER_GUIDE.md | 59 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 docs/DOCKER_GUIDE.md diff --git a/README.md b/README.md index 1e19470b1..b51de5cd9 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ MindSpeed-LLM已发布版本维护策略:
+已安装好以上配套环境的镜像的获取和使用方法见[镜像使用指南](./docs/DOCKER_GUIDE.md)。 ### 【预训练集群性能与线性度】 diff --git a/docs/DOCKER_GUIDE.md b/docs/DOCKER_GUIDE.md new file mode 100644 index 000000000..892d2c3f9 --- /dev/null +++ b/docs/DOCKER_GUIDE.md @@ -0,0 +1,59 @@ + +## 1.镜像下载(待补充昇腾社区的镜像下载地址) +通过uname -a确认自身系统是ubuntu_x86 或者 openeuler +根据需要下载对应的镜像,如下为下载链接: +https://www.hiascend.com/developer/ascendhub/detail/e26da9266559438b93354792f25b2f4a + +## 2.镜像加载 +```bash +# 挂载镜像,确认挂载是否成功 +docker image list +``` + +## 3.创建镜像容器 +注意当前默认配置驱动和固件安装在/usr/local/Ascend,如有差异请修改指令路径。 +当前容器默认初始化npu驱动和CANN环境信息,如需要安装新的,请自行替换或手动source,详见容器的bashrc +```bash +# 挂载镜像 +docker run -dit --ipc=host --network host --name 'llm_test' --privileged -v /usr/local/Ascend/driver:/usr/local/Ascend/driver -v /usr/local/Ascend/firmware:/usr/local/Ascend/firmware -v /usr/local/sbin/:/usr/local/sbin/ -v /home/:/home/ mindspeed-llm:tag +``` + +## 4.登录镜像并确认环境状态 +```bash +# 登录容器 +docker exec -it llm_test /bin/bash +# 确认npu是否可以正常使用,否则返回3.检查配置 +npu-smi info +``` + +## 5.拉取配套版本 +当前镜像推荐配套版本,用户可根据自己所需的版本配套,进行MindSpeed-LLM和MindSpeed的更新使用。 +rc+序号为对应配套版本,镜像与分支名是配套的。例如: +1. 2024.rc2-arm/2024.rc2-x86 镜像版本匹配 [MindSpeed-LLM的1.0.RC2分支](https://gitee.com/ascend/MindSpeed-LLM/tree/1.0.RC2/) +2. 2024.rc3-arm/2024.rc3-x86 镜像版本匹配 [MindSpeed-LLM的1.0.RC3分支](https://gitee.com/ascend/MindSpeed-LLM/tree/1.0.RC3/) +3. ... + +**注意:master为研发分支,无支持镜像。** + +下面以MindSpeed-LLM的1.0.RC3分支进行配套说明。 +镜像根据系统区分选择2024.rc3-arm/2024.rc3-x86。 +```bash +# 从Gitee克隆MindSpeed-LLM仓库 (git checkout 1.0.RC3) +git clone https://gitee.com/ascend/MindSpeed-LLM.git +cd MindSpeed-LLM +git checkout 1.0.RC3 +# 从Gitee克隆MindSpeed仓库(git checkout 4ea42a23) +git clone https://gitee.com/ascend/MindSpeed.git +cd MindSpeed +git checkout 4ea42a23 +pip install -e . +cd .. +# 拉取megatron并切换对应版本放到MindSpeed-LLM下 +git clone https://github.com/NVIDIA/Megatron-LM.git +cd Megatron-LM +git checkout core_r0.6.0 +cp -r megatron ../ +``` + +## 6.单机以及多机模型的预训练任务运行 +基于拉取的镜像和仓库代码,执行单机和多机的预训练任务,具体可参考[MindSpeed-LLM 使用指南](..%2Fexamples%2FREADME.md) \ No newline at end of file -- Gitee From 159d627603c8e750bb6e593003c6118a46776ab7 Mon Sep 17 00:00:00 2001 From: fengliangjun Date: Mon, 23 Dec 2024 09:09:01 +0000 Subject: [PATCH 26/32] update docs/DOCKER_GUIDE.md. Signed-off-by: fengliangjun --- docs/DOCKER_GUIDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/DOCKER_GUIDE.md b/docs/DOCKER_GUIDE.md index 892d2c3f9..794dde7d0 100644 --- a/docs/DOCKER_GUIDE.md +++ b/docs/DOCKER_GUIDE.md @@ -56,4 +56,4 @@ cp -r megatron ../ ``` ## 6.单机以及多机模型的预训练任务运行 -基于拉取的镜像和仓库代码,执行单机和多机的预训练任务,具体可参考[MindSpeed-LLM 使用指南](..%2Fexamples%2FREADME.md) \ No newline at end of file +基于拉取的镜像和仓库代码,执行单机和多机的预训练任务,具体可参考[MindSpeed-LLM 使用指南](../examples/README.md) \ No newline at end of file -- Gitee From ae21aaea9efa0f28a9888df3d2ca2c0963073dc3 Mon Sep 17 00:00:00 2001 From: wucong Date: Fri, 27 Dec 2024 07:47:25 +0000 Subject: [PATCH 27/32] =?UTF-8?q?!2036=20=E5=A2=9E=E5=8A=A0skip=5Fnum=20ut?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=20Merge=20pull=20request=20!2036=20from=20wu?= =?UTF-8?q?cong/fix=5Fguodian=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ci/access_control_test.py | 2 +- .../llama2/ckpt_convert_llama2_legacy2hf.sh | 2 +- modellink/tasks/preprocess/utils.py | 2 +- .../test_process_instruction_data_lf.json | 19 ++++++ .../test_process_instruction_data_lf.py | 64 ++++++++++++++++++- 5 files changed, 85 insertions(+), 4 deletions(-) diff --git a/ci/access_control_test.py b/ci/access_control_test.py index 785dabdb9..dc512a3f5 100644 --- a/ci/access_control_test.py +++ b/ci/access_control_test.py @@ -89,7 +89,7 @@ class UTTest: exsit_ut_files = [file for file in full_path if os.path.exists(file) and file.endswith(".py")] self.ut_files = " ".join(exsit_ut_files) - command = f"pytest -x {self.ut_files}" + command = f"pytest -x --log-cli-level=INFO {self.ut_files}" code = acquire_exitcode(command) if code == 0: print("UT test success") diff --git a/examples/llama2/ckpt_convert_llama2_legacy2hf.sh b/examples/llama2/ckpt_convert_llama2_legacy2hf.sh index 553e473a3..edd6e469e 100644 --- a/examples/llama2/ckpt_convert_llama2_legacy2hf.sh +++ b/examples/llama2/ckpt_convert_llama2_legacy2hf.sh @@ -7,7 +7,7 @@ python convert_ckpt.py \ --loader megatron \ --saver megatron \ --save-model-type save_huggingface_llama \ - --load-dir ./model_weights/llama2-legacy/ \ + --load-dir ./model_weights/llama-2-legacy/ \ --target-tensor-parallel-size 1 \ --target-pipeline-parallel-size 1 \ --save-dir ./model_from_hf/llama-2-7b-hf/ # <-- 需要填入原始HF模型路径,新权重会存于./model_from_hf/llama-2-7b-hf/mg2hg/ diff --git a/modellink/tasks/preprocess/utils.py b/modellink/tasks/preprocess/utils.py index 87e315251..173929f7b 100644 --- a/modellink/tasks/preprocess/utils.py +++ b/modellink/tasks/preprocess/utils.py @@ -238,7 +238,7 @@ def convert_alpaca_to_intermediate(sample: Dict[str, List[Any]], dataset_attr: " outputs = {"prompt": [], "response": [], "system": [], "tools": []} prompt = [] - if dataset_attr.history and hasattr(sample, "history") and isinstance(sample[dataset_attr.history], dict): + if dataset_attr.history and (isinstance(sample[dataset_attr.history], list) or isinstance(sample[dataset_attr.history], dict)): for old_prompt, old_response in sample[dataset_attr.history]: prompt.append({"role": Role.USER.value, "content": old_prompt}) prompt.append({"role": Role.ASSISTANT.value, "content": old_response}) diff --git a/tests/ut/process_data/test_process_instruction_data_lf.json b/tests/ut/process_data/test_process_instruction_data_lf.json index 75a849896..136b37288 100644 --- a/tests/ut/process_data/test_process_instruction_data_lf.json +++ b/tests/ut/process_data/test_process_instruction_data_lf.json @@ -8,6 +8,7 @@ "output-prefix": "/data/tune_dataset/alpaca/alpaca", "tokenizer-name-or-path": "/data/qwen-7b/", "workers": 4, + "overwrite-cache": null, "log-interval": 1000, "prompt-type": "qwen" } @@ -22,10 +23,26 @@ "output-prefix": "/data/tune_dataset/alpaca_his/alpaca_his", "tokenizer-name-or-path": "/data/qwen-7b/", "workers": 4, + "overwrite-cache": null, "log-interval": 1000, "prompt-type": "qwen", "map-keys": "{\"history\":\"history\"}" } + }, + { + "params": { + "input": "/data/tune_dataset/oaast_sft.json", + "tokenizer-type": "PretrainedFromHF", + "handler-name": "AlpacaStyleInstructionHandler", + "output-prefix": "/data/tune_dataset/alpaca_his/alpaca_his_seq1024", + "tokenizer-name-or-path": "/data/qwen-7b/", + "workers": 4, + "overwrite-cache": null, + "log-interval": 1000, + "seq-length" : 1024, + "prompt-type": "qwen", + "map-keys": "{\"history\":\"history\"}" + } } ], "test_sharegpt_dataset": [ @@ -37,6 +54,7 @@ "output-prefix": "/data/tune_dataset/sharegpt/sharegpt", "tokenizer-name-or-path": "/data/qwen-7b/", "workers": 4, + "overwrite-cache": null, "log-interval": 1000, "prompt-type": "qwen", "map-keys": "{\"system\":\"system_prompt\"}" @@ -52,6 +70,7 @@ "output-prefix": "/data/tune_dataset/sharegpt/sharegpt", "tokenizer-name-or-path": "/data/qwen-7b/", "workers": 4, + "overwrite-cache": null, "log-interval": 1000, "prompt-type": "qwen", "map-keys": "{\"messages\":\"messages\", \"tags\": {\"role_tag\": \"role\", \"content_tag\": \"content\", \"user_tag\": \"user\", \"assistant_tag\": \"assistant\", \"system_tag\": \"system\"}}" diff --git a/tests/ut/process_data/test_process_instruction_data_lf.py b/tests/ut/process_data/test_process_instruction_data_lf.py index 11f6bca62..acec6fc63 100644 --- a/tests/ut/process_data/test_process_instruction_data_lf.py +++ b/tests/ut/process_data/test_process_instruction_data_lf.py @@ -1,6 +1,9 @@ import os +import contextlib +import io from pathlib import Path import pytest +import logging import modellink from tests.test_tools.utils import build_args, create_testconfig, compare_file_md5_same from preprocess_data import main @@ -15,7 +18,7 @@ class TestProcessInstructionDataLf: @pytest.mark.parametrize("params, base_path", [ (test_config["test_alpaca_dataset"][0], "/data/tune_dataset/Llamafactoryhandler/alpaca/alpaca"), - (test_config["test_alpaca_history_dataset"][0], "/data/tune_dataset/Llamafactoryhandler/alpaca_history/alpaca_history"), + (test_config["test_alpaca_history_dataset"][0], "/data/tune_dataset/Llamafactoryhandler/alpaca_history/alpaca_history_new"), (test_config["test_sharegpt_dataset"][0], "/data/tune_dataset/Llamafactoryhandler/sharegpt/sharegpt_lf"), (test_config["test_openai_dataset"][0], "/data/tune_dataset/Llamafactoryhandler/openai/sss") ]) @@ -54,3 +57,62 @@ class TestProcessInstructionDataLf: base_file = base_path + end_str test_file = params["output-prefix"] + end_str assert compare_file_md5_same(base_file, test_file) + + + @pytest.mark.parametrize("params, base_path", + [ + (test_config["test_alpaca_history_dataset"][1], "/data/tune_dataset/Llamafactoryhandler/alpaca_history/alpaca_history_seq1024"), + ]) + def test_skip_num(self, build_args, params, base_path): + """ + Tests skip_num in preprocessing and validates output files by comparing MD5 checksums. + + Parameters: + - params: dict + A dictionary containing dataset-specific configurations, such as input files, + output prefix, and tokenizer information. Extracted from `test_config`. + - base_path: str + The base path of the reference dataset files (e.g., Alpaca, Alpaca History, ShareGPT, OpenAI). + Used to locate the ground truth files for comparison with the generated output. + """ + # create output dir if it doesn't exist + out_dir = os.path.dirname(params["output-prefix"]) + if not os.path.isdir(out_dir): + os.makedirs(out_dir) + + # run the main preprocessing function + log_capture_string = io.StringIO() + # run the main preprocessing function + log_handler = logging.StreamHandler(log_capture_string) + log_handler.setLevel(logging.INFO) + + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + log_handler.setFormatter(formatter) + logger = logging.getLogger() + logger.addHandler(log_handler) + main() + output = log_capture_string.getvalue() + assert("Skip " in output and " sample exceeded seq-length" in output) + + index1 = output.find("Skip ") + index2 = output.find(" sample exceeded seq-length") + skip_num = output[index1 + 5: index2] + assert(skip_num == "796.0") + logger.removeHandler(log_handler) + log_capture_string.close() + + # print dataset name for clarity + dataset_name = base_path.split('/')[-1] + print(f"=============== test_{dataset_name}_dataset =============") + + prefix_str = params["output-prefix"].split('/')[-1] + mid_strs = ["_packed_attention_mask_document", "_packed_input_ids_document", "_packed_labels_document"] + end_suffixs = [".bin", ".idx"] + + # loop through mid_strs and end_suffixs, checking file MD5 hashes + for mid_str in mid_strs: + for end_suffix in end_suffixs: + end_str = mid_str + end_suffix + base_file = base_path + end_str + test_file = params["output-prefix"] + end_str + assert compare_file_md5_same(base_file, test_file) \ No newline at end of file -- Gitee From 2e4a77e6656924cc23f563c2261ae58d29ca8580 Mon Sep 17 00:00:00 2001 From: wucong Date: Sat, 15 Feb 2025 09:39:25 +0000 Subject: [PATCH 28/32] =?UTF-8?q?!2168=20=E4=BF=AE=E5=A4=8D=E5=A4=9A?= =?UTF-8?q?=E8=BD=AE=E5=AF=B9=E8=AF=9D=E8=BE=93=E5=85=A5->RC3=20Merge=20pu?= =?UTF-8?q?ll=20request=20!2168=20from=20wucong/fix=5Fmulti=5Frc3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modellink/data/decoder_packed_mtf_dataset.py | 85 +++++++++++++++++--- tests/__init__.py | 14 ++++ tests/test_tools/__init__.py | 14 ++++ 3 files changed, 102 insertions(+), 11 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_tools/__init__.py diff --git a/modellink/data/decoder_packed_mtf_dataset.py b/modellink/data/decoder_packed_mtf_dataset.py index f3ba08134..91937d096 100644 --- a/modellink/data/decoder_packed_mtf_dataset.py +++ b/modellink/data/decoder_packed_mtf_dataset.py @@ -28,6 +28,8 @@ from modellink.utils import is_rank_0 from modellink.tokenizer import build_tokenizer from modellink.data.mtf_dataset import MTFDataset, get_packed_indexed_dataset from modellink.error_utils import check_equal +from modellink.tasks.preprocess.templates import get_model_template + logger = logging.getLogger(__name__) @@ -176,19 +178,64 @@ class DecoderPackedMTFDataset(torch.utils.data.Dataset): else: return self._cut_instruction_token(item, np.int64) - def _cut_instruction_token(self, item, dtype): IGNORE_INDEX = -100 - prompt_length = (item["labels"] != IGNORE_INDEX).nonzero()[0][0] - prompt_ids = item["input_ids"][:prompt_length] - label_ids = item["labels"][prompt_length:] - source_len, target_len = _infer_seqlen( - len(prompt_ids), len(label_ids), self.seq_length - ) - prompt_ids = prompt_ids[:source_len] - label_ids = label_ids[:target_len] - input_ids = np.append(prompt_ids, label_ids) - labels = np.append(IGNORE_INDEX * np.ones(source_len), label_ids) + token_length = len(item["input_ids"]) + if token_length <= self.seq_length: + return { + "input_ids": item["input_ids"].astype(dtype), + "attention_mask": np.ones_like(item["input_ids"]).astype(dtype), + "labels": item["labels"].astype(dtype) + } + + template = None + # get model chat template + if hasattr(self.args, "prompt_type") and self.args.prompt_type is not None: + template = get_model_template(self.args.prompt_type) + + prompt_begin_list, prompt_end_list = get_prompt_index(item["labels"], IGNORE_INDEX) + + multi_turns = len(prompt_begin_list) + total_length = 0 + + if template is not None and template.efficient_eos: + total_length = 1 + prompt_end_list = [x - 1 for x in prompt_end_list] + eos_token_id = item["input_ids"][token_length - 1] + item["input_ids"] = item["input_ids"][:token_length] + item["labels"] = item["labels"][:token_length] + + cutoff_len = self.seq_length + input_ids = np.array([], dtype=dtype) + labels = np.array([], dtype=dtype) + + for turn_idx in range(multi_turns): + if total_length >= cutoff_len: + break + source_ids = item["input_ids"][prompt_begin_list[turn_idx]:prompt_end_list[turn_idx]] + mask_ids = item["labels"][prompt_begin_list[turn_idx]:prompt_end_list[turn_idx]] + + label_begin_idx = prompt_end_list[turn_idx] + + if turn_idx != multi_turns - 1: + target_ids = item["labels"][label_begin_idx:prompt_begin_list[turn_idx + 1]] + else: + target_ids = item["labels"][label_begin_idx:] + + source_len, target_len = _infer_seqlen(len(source_ids), len(target_ids), cutoff_len - total_length) + + source_ids = source_ids[:source_len] + target_ids = target_ids[:target_len] + mask_ids = mask_ids[:source_len] + + total_length += source_len + target_len + input_ids = np.concatenate((input_ids, source_ids, target_ids), axis=0) + labels = np.concatenate((labels, mask_ids, target_ids), axis=0) + + if template is not None and template.efficient_eos: + input_ids = np.concatenate((input_ids, np.array([eos_token_id], dtype=dtype)), axis=0) + labels = np.concatenate((labels, np.array([eos_token_id], dtype=dtype)), axis=0) + res = { "input_ids": input_ids.astype(dtype), "attention_mask": np.ones_like(input_ids).astype(dtype), @@ -236,6 +283,22 @@ class DecoderPackedMTFDataset(torch.utils.data.Dataset): return res +def get_prompt_index(labels, ignored_label): + prompt_begin_list = [] + prompt_end_list = [] + in_group = False + for idx, label in enumerate(labels): + if label == ignored_label: + if not in_group: + prompt_begin_list.append(idx) + in_group = True + elif in_group: + prompt_end_list.append(idx) + in_group = False + + return prompt_begin_list, prompt_end_list + + def _infer_seqlen(source_len: int, target_len: int, cutoff_len: int): r""" Computes the real sequence length after truncation by the cutoff_len. diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..9a1307aec --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,14 @@ +# coding=utf-8 +# Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file diff --git a/tests/test_tools/__init__.py b/tests/test_tools/__init__.py new file mode 100644 index 000000000..9a1307aec --- /dev/null +++ b/tests/test_tools/__init__.py @@ -0,0 +1,14 @@ +# coding=utf-8 +# Copyright (c) 2024, HUAWEI CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file -- Gitee From 1863a6724029423f984beaa8f5f19f7ffa4f64a5 Mon Sep 17 00:00:00 2001 From: xiecheng Date: Tue, 18 Feb 2025 06:56:33 +0000 Subject: [PATCH 29/32] =?UTF-8?q?!2222=20=E6=9B=B4=E6=96=B0commiters=20Mer?= =?UTF-8?q?ge=20pull=20request=20!2222=20from=20xiecheng/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- OWNERS | 1 - 1 file changed, 1 deletion(-) diff --git a/OWNERS b/OWNERS index 0674e8de1..9256e24cd 100644 --- a/OWNERS +++ b/OWNERS @@ -2,7 +2,6 @@ approvers: - fengliangjun66 - wujianping996 - dilililiwhy -- liuyanghan reviewers: - fengliangjun66 - guhangsong -- Gitee From 1753c483beeb58f18ec5a604ee1176cd68d67976 Mon Sep 17 00:00:00 2001 From: xiecheng Date: Thu, 20 Feb 2025 13:55:36 +0000 Subject: [PATCH 30/32] =?UTF-8?q?!2261=20=E3=80=901.0.RC3=E3=80=91readme?= =?UTF-8?q?=E5=8A=A0=E5=85=A5=E5=85=8D=E8=B4=A3=E5=A3=B0=E6=98=8E=20Merge?= =?UTF-8?q?=20pull=20request=20!2261=20from=20xiecheng/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index b51de5cd9..b2f1d35fc 100644 --- a/README.md +++ b/README.md @@ -1585,3 +1585,16 @@ MindSpeed-LLM由华为公司的下列部门联合贡献 : ## 安全声明 [MindSpeed-LLM安全声明](https://gitee.com/ascend/MindSpeed-LLM/wikis/%E5%AE%89%E5%85%A8%E7%9B%B8%E5%85%B3/%E5%AE%89%E5%85%A8%E5%A3%B0%E6%98%8E) + +# 免责声明 + +## 致MindSpeed-LLM使用者 +1. MindSpeed-LLM提供的模型仅供您用于非商业目的。 +2. 对于各模型,MindSpeed-LLM平台仅提示性地向您建议可用于训练的数据集,华为不提供任何数据集,如您使用这些数据集进行训练,请您特别注意应遵守对应数据集的License,如您因使用数据集而产生侵权纠纷,华为不承担任何责任。 +3. 如您在使用MindSpeed-LLM模型过程中,发现任何问题(包括但不限于功能问题、合规问题),请在Gitee提交issue,我们将及时审视并解决。 + +## 致数据集所有者 +如果您不希望您的数据集在MindSpeed-LLM中的模型被提及,或希望更新MindSpeed-LLM中的模型关于您的数据集的描述,请在Gitee提交issue,我们将根据您的issue要求删除或更新您的数据集描述。衷心感谢您对MindSpeed-LLM的理解和贡献。 + +## License声明 +Ascend MindSpeed-LLM提供的模型,如模型目录下存在License的,以该License为准。如模型目录下不存在License的,以Apache 2.0许可证许可,对应许可证文本可查阅Ascend MindSpeed-LLM根目录。 \ No newline at end of file -- Gitee From 55615d72faa8b31fdfef638288ea2588e5f6d0ab Mon Sep 17 00:00:00 2001 From: xiecheng Date: Sat, 22 Feb 2025 09:24:27 +0000 Subject: [PATCH 31/32] =?UTF-8?q?!2265=20=E3=80=901.0.RC3=E3=80=91?= =?UTF-8?q?=E5=8A=A0=E5=85=A5open=20source=20notice=20Merge=20pull=20reque?= =?UTF-8?q?st=20!2265=20from=20xiecheng/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Third_Party_Open_Source_Software_Notice | 141 ++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 Third_Party_Open_Source_Software_Notice diff --git a/Third_Party_Open_Source_Software_Notice b/Third_Party_Open_Source_Software_Notice new file mode 100644 index 000000000..c5d32948c --- /dev/null +++ b/Third_Party_Open_Source_Software_Notice @@ -0,0 +1,141 @@ +OPEN SOURCE SOFTWARE NOTICE + +Please note we provide an open source software notice along with this product and/or this product firmware (in the following just “this product”). The open source software licenses are granted by the respective right holders. And the open source licenses prevail all other license information with regard to the respective open source software contained in the product, including but not limited to End User Software Licensing Agreement. This notice is provided on behalf of Huawei Technologies Co. Ltd. and any of its local subsidiaries which may have provided this product to you in your local country. + +Warranty Disclaimer +THE OPEN SOURCE SOFTWARE IN THIS PRODUCT IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE APPLICABLE LICENSES FOR MORE DETAILS. + +Copyright Notice and License Texts +Software: pytorch v2.1.0 +Copyright notice: +Copyright (c) 2016- Facebook, Inc +Copyright (c) 2014- Facebook, Inc +Copyright (c) 2011-2014 Idiap Research Institute +Copyright (c) 2012-2014 Deepmind Technologies +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +Copyright (c) 2006 Idiap Research Institute +Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) +Copyright (c) 2016-present, Facebook Inc. +Copyright (c) 2016 Facebook Inc. +Copyright (c) 2015 Google Inc. +Copyright (c) 2015 Yangqing Jia +Copyright 2019-2020 Kakao Brain +Copyright (c) 2022 Cruise LLC. +Copyright (c) 2013, 2014, 2015, the respective contributors +Copyright (c) 2015, 2016 the respective contributors +Copyright (c) 2014, The Regents of the University of California (Regents) +Copyright (c) 2014, the respective contributors +Copyright (c) 2018, Steven Moshier +Copyright (c) 2001-2002 Enthought, Inc. 2003-2019, SciPy Developers +Copyright (c) 1997-2011 by Secret Labs AB +Copyright (c) 1995-2011 by Fredrik Lundh +Copyright (c) 2010-2022 by Alex Clark and contributors +Copyright (c) 2006 The Android Open Source Project +Copyright (c) Facebook, Inc. and its affiliates +Copyright (c) Meta Platforms, Inc. and affiliates +Copyright 2004-present Facebook +Copyright (c) 2017 by Contributors +Copyright (c) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura +Copyright (c) 2022 Apple Inc. +Copyright (c) 2023 Apple Inc. +Copyright 2005 Robert Kern (robert.kern@gmail.com) +copyright 2019 The TensorFlow Authors +Copyright (c) 2018 MathInf GmbH, Thomas Viehmann +Copyright (c) 2014 Indiana University (c) +Copyright John Maddock 2006 +Copyright (c) 2012 Massachusetts Institute of Technology +Copyright (c) 2012 Giovanni Garberoglio Interdisciplinary Laboratory for Computational Science (LISC) Fondazione Bruno Kessler and University of Trento +Copyright (c) 2018 Marat Dukhan +Copyright (c) 2017-2018 Facebook Inc. +Copyright (c) 2017 Georgia Institute of Technology +Copyright 2015 Google Inc. +Copyright (c) 2011-2021, NVIDIA CORPORATION. +Copyright (c) 2022, Tri Dao +Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. +Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. +Copyright (c) 2017 The Android Open Source Project +Copyright (c) 2016-present, Facebook, Inc. +Copyright (c) 2005-2020 Rich Felker +Copyright Malte Skarupke 2017 +Copyright 2008 Google Inc. +Copyright (c) 2011 - 2012 Andrzej Krzemienski +Copyright (c) 2001-2019 Free Software Foundation, Inc. +Copyright (c) 1994 Hewlett-Packard Company +Copyright (c) 1996-1998 Silicon Graphics Computer Systems, Inc. +Copyright (c) Bjorn Fahller +Copyright Michael Park, 2015-2017 +Copyright (c) 2017-present, Facebook, Inc. +Copyright (c) 2018-present, Facebook, Inc. +Copyright (c) 2008-2015 The Khronos Group Inc. +Copyright 2016 Facebook +Copyright (c) 2016, NVIDIA CORPORATION +Copyright (c) 2008 - 2012 The Khronos Group Inc. +Copyright (c) 2008-2013 The Khronos Group Inc. +Copyright (c) 2008-2012 The Khronos Group Inc. +Copyright (c) 2016-2017, ARM Limited and Contributors +Copyright (c) 2014-2015 The Khronos Group Inc. +Copyright (c) 2015-2017 The Khronos Group Inc. +Copyright (c) Facebook Inc. and Microsoft Corporation +Copyright (c) 2014-2017 The Regents of the University of California (Regents) +Copyright (c) 2014-2017, the respective contributors +Copyright (c) 2017 Microsoft +Copyright 2015 The Gemmlowp Authors +Copyright (c) 2011-2019 Stephan Brumme +Copyright 2006, Google Inc. +Copyright (c) Meta Platforms, Inc. and its affiliates +Copyright (c) 2008 - 2009 NVIDIA Corporation +Copyright (c) 2007-2009 Scientific Computing and Imaging Institute, University of Utah +Copyright (c) 2006, Laurent Montel, montel@kde.org +Copyright 2013 Conrad Steenberg conrad.steenberg@gmail.com +copyright 2022, PyTorch +copyright 2023, PyTorch +Copyright (c) 2005-2022 NVIDIA Corporation Built +copyright PyTorch Contributors +Copyright (c) 2018 Alex Rogozhnikov +Copyright (c) 2016 Microsoft +Copyright (c) 2014, 2015, The Regents of the University of California (Regents) +Copyright (c) 2014, 2015, the respective contributors +Copyright (c) 2005-2017, NumPy Developers (c) Parameter containing Float +Copyright 2005, Google Inc. +Copyright 2019 Kakao Brain +Copyright 2013-2014 RAD Game +Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC +Copyright 2016 Martin Raiber +Copyright (c) 2003-2017 Josef Weidendorfer +Copyright (c) 2000-2017 Julian Seward +Copyright (c) Edward Z. Yang ezyang@mit.edu +Copyright (c) 2005-2010 ActiveState Software Inc. +Copyright (c) 2013 Eddy Petrisor +Copyright (c) 2010 ActiveState Software Inc. +Copyright (c) 2001-2014 Python Software Foundation +Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation +Copyright Python Software Foundation +Copyright 2022 Cruise LLC +Copyright (c) 2014 Matthew Rocklin +Copyright (c) 2015 Melissa E. O'Neill +Copyright (c) 2019 NumPy Developers +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. +Copyright 2013 Mark Dickinson + +License: BSD 3-Clause License +Copyright (c) , , +All rights reserved. +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Written Offer +This product contains software whose rights holders license it on the terms of the GNU General Public License, version 2 (GPLv2) and/or other open source software licenses. We will provide you and any third party with the source code of the software licensed under an open source software license if you send us a written request by mail or email to the following addresses: +foss@huawei.com +detailing the name of the product and the firmware version for which you need the source code and indicating how we can contact you. + +Please note you need to make a payment before you obtain the complete Corresponding Source Code from us. For how much you will pay and how we will deliver the complete Corresponding Source Code to you, we will further discuss it by mail or email. +This offer is valid to anyone in receipt of this information. + +THIS OFFER IS VALID FOR THREE YEARS FROM THE MOMENT WE DISTRIBUTED THE PRODUCT OR FIRMWARE. \ No newline at end of file -- Gitee From 7acd2296ab454fd1a596c2c2eafc3f2a68e84e96 Mon Sep 17 00:00:00 2001 From: Peihan Liu Date: Tue, 11 Mar 2025 12:30:27 +0000 Subject: [PATCH 32/32] =?UTF-8?q?!2374=20readme=E6=95=B4=E6=94=B9=20Merge?= =?UTF-8?q?=20pull=20request=20!2374=20from=20Peihan=20Liu/1.0.RC3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 91 +------------------------------------------------------ 1 file changed, 1 insertion(+), 90 deletions(-) diff --git a/README.md b/README.md index b2f1d35fc..3285bdb3b 100644 --- a/README.md +++ b/README.md @@ -1253,96 +1253,7 @@ MindSpeed-LLM预训练支持张量并行、流水线并行等多种加速算法 --- ## 分布式指令微调 -MindSpeed-LLM支持指令微调,方案与DeepSpeed统一,在微调效果保持一致的前提下,MindSpeed-LLM可以表现出优异性能 - -【与DeepSpeed微调Loss对比】 - - - - -
Llama2-7b模型与DeepSpeed微调5个epoch后的loss对比图 -

-
-Qwen-7b模型与DeepSpeed微调4个epoch后的loss对比图 -

-
- - -【Llama2-7b微调后对话效果对比】 - - - - - - - - - - - - - - - - - - - - - -
QuestionMindSpeed-LLMDeepSpeed
What is the product of 6 and 2?The product of 6 and 2 is 12.The product of 6 and 2 is 12.
Look up the boiling point of water.The boiling point of water is 100°C (212°F)..The boiling point of water is 100 degrees Celsius (212 degrees Fahrenheit).
Generate a poem that expresses joy.I open my arms and accept life's challenge, - -Embracing all of life's uncertainty. - -A future of promise awaits, - -One that I can find in my fate. - -A journey full of hope and discovery, - -A journey that will never end in disappointment. - -For I have found the strength to keep going, - -Researching and learning, ever knowing. - -The future is mine, and mine alone, - -A future that I have to own, - -And I will never be alone, - -For I will have the courage to face all of my fears, - -And the joy of life will be mine forever.I am so happy, so thankful and blessed, - -To have a heart that's full of cheer, - -My soul is on fire, my spirit runs free, - -My love is warm, my courage is high, - -My world is bright and my outlook is clear, - -My heart is light, my spirit runs free. - -I take each day with a positive outlook, - -My energy is so high, I can't be beat, - -My joy is real, my emotions run deep, - -My soul is full and my heart does soep. - -I am thankful for everything I have, - -My life is precious and my values ​​true, - -My hope is high and my spirit runs free, - -My soul is full and my heart does soep.
- -【现版本实测性能、显存(硬件信息:Atlas 900 A2 PODc)】 +MindSpeed-LLM支持指令微调,在微调效果保持一致的前提下,MindSpeed-LLM可以表现出优异性能 下述列表中的模型,我们在[examples/README.md](./examples/README.md)中提供了相应的使用说明,里面有详细的模型微调、推理、评估流程. 其中性能的单位是samples/s -- Gitee