From a7294b8dce11691a9afc2dc7cde1ac0c1b649cbd Mon Sep 17 00:00:00 2001 From: baoxiang Date: Thu, 21 Sep 2023 10:48:40 +0800 Subject: [PATCH 1/3] modify readme, remove inference part --- .../built-in/foundation/LLaMA-13B/README.md | 26 ++----------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/PyTorch/built-in/foundation/LLaMA-13B/README.md b/PyTorch/built-in/foundation/LLaMA-13B/README.md index 67159388dc..f4d8023566 100644 --- a/PyTorch/built-in/foundation/LLaMA-13B/README.md +++ b/PyTorch/built-in/foundation/LLaMA-13B/README.md @@ -161,7 +161,7 @@ LLaMA原始权重上实现来使用,主要分为如下两步: python3 -m fastchat.model.apply_delta \ --base-model-path /path/to/llama-7b \ --target-model-path /output/path/to/vicuna-7b \ - --delta-path lmsys/vicuna-7b-delta-v1.1 + --delta-path lmsys/vicuna-7b-delta-v0 ``` #### Vicuna-13B @@ -171,7 +171,7 @@ LLaMA原始权重上实现来使用,主要分为如下两步: python3 -m fastchat.model.apply_delta \ --base-model-path /path/to/llama-13b \ --target-model-path /output/path/to/vicuna-13b \ - --delta-path lmsys/vicuna-13b-delta-v1.1 + --delta-path lmsys/vicuna-13b-delta-v0 ``` 下载完毕后,可以在源码包根目录下找到对应的预训练参数文件夹。 @@ -254,28 +254,6 @@ LLaMA原始权重上实现来使用,主要分为如下两步: | 13B-竞品A | - | 1386 | 3 | zero2 | | 13B-NPU | - | 1498 | 3 | zero2 | -# 模型推理 - -## 支持模型 - -- Vicuna,LLaMA - -## 执行推理 - -由于当前npu上融合算子scaledmaskedsoftmax算子存在限制,在推理时需要将源码包根目录下transformers_modify文件夹中的下列文件替换到transformers安装目录下的对应位置(基于transformers 4.28.1版本); - - ``` - modeling_llama_eval.py -> transformers/models/llama/modeling_llama.py - ``` - -执行下列命令以完成模型推理(基于单NPU,推理13B模型大约需要28GB显存,推理7B模型大约需要14G显存)。 - - ``` - source /usr/local/Ascend/ascend_toolkit/set_env.sh - python3 -m fastchat.serve.cli --model-path path/to/FastChat/7B-vicuna --num-gpus 1 --conv-template conv_one_shot - python3 -m fastchat.serve.cli --model-path path/to/FastChat/13B-vicuna --num-gpus 1 --conv-template conv_one_shot - ``` - # 版本说明 ## 变更 -- Gitee From 28722aadf3652be5ebbc3e3d05ab5c97867aa665 Mon Sep 17 00:00:00 2001 From: baoxiang Date: Mon, 9 Oct 2023 20:20:22 +0800 Subject: [PATCH 2/3] fix bug for CrossEntropyLoss on overflow --- .../foundation/LLaMA-13B/transformers_modify/modeling_llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyTorch/built-in/foundation/LLaMA-13B/transformers_modify/modeling_llama.py b/PyTorch/built-in/foundation/LLaMA-13B/transformers_modify/modeling_llama.py index 8a00bf45ca..ed2a1c7c49 100644 --- a/PyTorch/built-in/foundation/LLaMA-13B/transformers_modify/modeling_llama.py +++ b/PyTorch/built-in/foundation/LLaMA-13B/transformers_modify/modeling_llama.py @@ -689,7 +689,7 @@ class LlamaForCausalLM(LlamaPreTrainedModel): shift_labels = shift_labels.view(-1) # Enable model parallelism shift_labels = shift_labels.to(shift_logits.device) - loss = loss_fct(shift_logits, shift_labels) + loss = loss_fct(shift_logits.float(), shift_labels).half() if not return_dict: output = (logits,) + outputs[1:] -- Gitee From b34e3ae1d5f16f0bfe20e9305825e2410fca34aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=B2=8D=E7=BF=94?= Date: Tue, 31 Oct 2023 09:00:38 +0000 Subject: [PATCH 3/3] update PyTorch/built-in/foundation/LLaMA-13B/13B_finetune.sh. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 鲍翔 --- PyTorch/built-in/foundation/LLaMA-13B/13B_finetune.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyTorch/built-in/foundation/LLaMA-13B/13B_finetune.sh b/PyTorch/built-in/foundation/LLaMA-13B/13B_finetune.sh index bd5e76e339..3775299f05 100644 --- a/PyTorch/built-in/foundation/LLaMA-13B/13B_finetune.sh +++ b/PyTorch/built-in/foundation/LLaMA-13B/13B_finetune.sh @@ -17,9 +17,9 @@ run_cmd="HCCL_CONNECT_TIMEOUT=1200 deepspeed --master_port ${MASTER_PORT} --num_ --fp16 True \ --output_dir ./ckpt_16p \ --num_train_epochs 1 \ - --per_device_train_batch_size 8 \ + --per_device_train_batch_size 4 \ --per_device_eval_batch_size 1 \ - --gradient_accumulation_steps 16 \ + --gradient_accumulation_steps 32 \ --evaluation_strategy "no" \ --save_strategy "steps" \ --save_steps 500 \ -- Gitee