diff --git a/README.md b/README.md index b51328219444c0816cb6ce62b66e6da013202715..4ddd9511c5b586b08f9eccaa36ad7bec101d0fab 100644 --- a/README.md +++ b/README.md @@ -1168,7 +1168,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 MiniCPM-V-2 - Supported + Supported - - diff --git a/models/vision-language-understanding/MiniCPM-V-2/vllm/README.md b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md similarity index 100% rename from models/vision-language-understanding/MiniCPM-V-2/vllm/README.md rename to models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md diff --git a/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..f1c0b9c835b299341cc45a866800e7b4a4bd5c89 --- /dev/null +++ b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install timm==0.9.10 +pip3 install transformers +pip3 install --user --upgrade pillow -i https://pypi.tuna.tsinghua.edu.cn/simple + +cp /mnt/deepspark/data/datasets/dog.jpg ./ \ No newline at end of file diff --git a/models/vision-language-understanding/MiniCPM-V-2/vllm/minicpmv-2.0-offline.py b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py similarity index 93% rename from models/vision-language-understanding/MiniCPM-V-2/vllm/minicpmv-2.0-offline.py rename to models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py index d6add4d8f00fcc8bb307767d149dad8009f182b0..1da0fdd8e5bb7c507c5f24d8f66f0d0e5d7d5967 100644 --- a/models/vision-language-understanding/MiniCPM-V-2/vllm/minicpmv-2.0-offline.py +++ b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py @@ -38,9 +38,9 @@ def main(args): llm = LLM(model=MODEL_NAME, gpu_memory_utilization=0.95, # 使用全部GPU内存 trust_remote_code=True, - max_model_len=1024, - max_num_seqs=1, - max_num_batched_tokens=1024,) # 根据内存状况可调整此值 + max_model_len=2048, + # max_num_seqs=1, + max_num_batched_tokens=2048,) # 根据内存状况可调整此值 # 构建对话消息 messages = [{'role': 'user', 'content': '(./)\n' + '请描述这张图片'}] @@ -64,10 +64,10 @@ def main(args): # top_p=0.8, # top_k=100, # seed=3472, - max_tokens=128, + max_tokens=1024, # min_tokens=150, temperature=0, - use_beam_search=False, + # use_beam_search=False, # length_penalty=1.2, best_of=1) diff --git a/models/nlp/large_language_model/baichuan2-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/baichuan2-7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..54b66b4eba0c0d7beb5f93d3699e4377dae399c0 --- /dev/null +++ b/models/nlp/large_language_model/baichuan2-7b/vllm/ci/prepare.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install transformers==4.37.1 + +# has prepared in ci +# ln -s /mnt/deepspark/data/checkpoints/Baichuan2-7B-Base ./ + +python3 convert2int8.py --model-path ./baichuan2-7b/ \ No newline at end of file diff --git a/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py b/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py index 40c0e2e10deb2869ad38dcdd2663a6ed8d5baa23..9e5738a17c9b532a4c38b3365a55df54c748a51d 100644 --- a/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py @@ -109,4 +109,9 @@ for i, output in enumerate(outputs): print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") +metricResult = {"metricResult": {}} +metricResult["metricResult"]["tokens"] = num_tokens +metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) +print(metricResult) + # 0.3.2 tokens: 757, QPS: 97.97229589080902 \ No newline at end of file diff --git a/models/nlp/large_language_model/chatglm3-6b-32k/vllm/ci/prepare.sh b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..ad683f6e992593e59d91544c2e1e6b724a6245ec --- /dev/null +++ b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/ci/prepare.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install transformers==4.37.1 diff --git a/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py index bc731079f72988cd20c5a68b3ccb4e192769c8fb..7fc45b685046e85411b17ea4edf7ff46b8bf52a6 100644 --- a/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py +++ b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py @@ -57,7 +57,7 @@ if __name__ == "__main__": model_name = model_name.rsplit("/")[-1] # Sample prompts. - prompts = ["Щܱһھ?", "һ֥ʿ", "дһƪй5Gз¡"] + prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"] # Create a sampling params object. sampling_params = SamplingParams(**sampling_params) diff --git a/models/nlp/large_language_model/chatglm3-6b/vllm/ci/prepare.sh b/models/nlp/large_language_model/chatglm3-6b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..4cfd5fd6f1fd9dcc9fab83d9023b33e4752606a9 --- /dev/null +++ b/models/nlp/large_language_model/chatglm3-6b/vllm/ci/prepare.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install vllm==0.5.0 +pip3 install transformers==4.37.1 diff --git a/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py b/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py index 0162d93c53ac839268b3c964e0e96ecaad63ac4e..eaa7fe206459819731b00523c6d40224afafc545 100644 --- a/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py @@ -99,4 +99,9 @@ for i, output in enumerate(outputs): print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") +metricResult = {"metricResult": {}} +metricResult["metricResult"]["tokens"] = num_tokens +metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) +print(metricResult) + # 0.3.2 tokens: 422, QPS: 70.02308283048338(tokens: 422, QPS: 93.67210003677407),32-k 模型 tokens: 477, QPS: 81.46537314533865(tokens: 477, QPS: 106.54247895449554) \ No newline at end of file diff --git a/models/nlp/large_language_model/llama2-13b/trtllm/README.md b/models/nlp/large_language_model/llama2-13b/trtllm/README.md index 4658334d0f2c2c262ff613a3127f9832965e465f..b24c29ce921f2571833a50d01ad3035ec28e1ea5 100755 --- a/models/nlp/large_language_model/llama2-13b/trtllm/README.md +++ b/models/nlp/large_language_model/llama2-13b/trtllm/README.md @@ -18,9 +18,6 @@ apt install -y libgl1-mesa-dev bash scripts/set_environment.sh . -# *star refer to a specified version -wget http://files.deepspark.org.cn:880/deepspark/add-ons/tensorrt_llm-*.whl -pip install tensorrt_llm-*.whl ``` ### Download diff --git a/models/nlp/large_language_model/llama2-13b/trtllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-13b/trtllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..7947d62bee569e9f109283e843b288fc68148f0e --- /dev/null +++ b/models/nlp/large_language_model/llama2-13b/trtllm/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +bash scripts/set_environment.sh . + +# Download model from the website and make sure the model's path is "data/llama2-13b-chat" +# Download dataset from the website and make sure the dataset's path is "data/datasets_cnn_dailymail" +mkdir -p data +ln -s /mnt/deepspark/data/checkpoints/llama2-13b-chat data/llama2-13b-chat +ln -s /mnt/deepspark/data/datasets/datasets_cnn_dailymail data/datasets_cnn_dailymail +# Please download rouge.py to this path if your server can't attach huggingface.co. +mkdir -p rouge/ +cp /mnt/deepspark/data/3rd_party/rouge.py rouge/ \ No newline at end of file diff --git a/models/nlp/large_language_model/llama2-70b/trtllm/README.md b/models/nlp/large_language_model/llama2-70b/trtllm/README.md index 671b51c79f01d8760df490d167e32814634d507d..621dbffeeaac3387030e326e6e24f918644c9ae1 100644 --- a/models/nlp/large_language_model/llama2-70b/trtllm/README.md +++ b/models/nlp/large_language_model/llama2-70b/trtllm/README.md @@ -16,10 +16,6 @@ yum install -y mesa-libGL apt install -y libgl1-mesa-dev bash scripts/set_environment.sh . - -# *star refer to a specified version -wget http://files.deepspark.org.cn:880/deepspark/add-ons/tensorrt_llm-*.whl -pip install tensorrt_llm-*.whl ``` ### Download diff --git a/models/nlp/large_language_model/llama2-70b/trtllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-70b/trtllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..4a8c1e4e321df3e2ca3228b3456029062731ddfd --- /dev/null +++ b/models/nlp/large_language_model/llama2-70b/trtllm/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +bash scripts/set_environment.sh . + +# Download model from the website and make sure the model's path is "data/llama2-70b-chat" +# Download dataset from the website and make sure the dataset's path is "data/datasets_cnn_dailymail" +mkdir -p data +ln -s /mnt/deepspark/data/checkpoints/llama2-70b-chat data/llama2-70b-chat +ln -s /mnt/deepspark/data/datasets/datasets_cnn_dailymail data/datasets_cnn_dailymail +# Please download rouge.py to this path if your server can't attach huggingface.co. +mkdir -p rouge/ +cp /mnt/deepspark/data/3rd_party/rouge.py rouge/ \ No newline at end of file diff --git a/models/nlp/large_language_model/llama2-7b/trtllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-7b/trtllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..2ac2384d24f563195692e601c86010febfcac6ef --- /dev/null +++ b/models/nlp/large_language_model/llama2-7b/trtllm/ci/prepare.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +bash scripts/set_environment.sh . + +# Download model from the website and make sure the model's path is "data/llama2-7b-chat" +# Download dataset from the website and make sure the dataset's path is "data/datasets_cnn_dailymail" +mkdir -p data +ln -s /mnt/deepspark/data/checkpoints/llama2-7b-chat data/llama2-7b-chat +ln -s /mnt/deepspark/data/datasets/datasets_cnn_dailymail data/datasets_cnn_dailymail +# Please download rouge.py to this path if your server can't attach huggingface.co. +mkdir -p rouge/ +cp /mnt/deepspark/data/3rd_party/rouge.py rouge/ \ No newline at end of file diff --git a/models/nlp/large_language_model/llama2-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..6afe9667d3033ef4fe624e70662dc90bd6e563f7 --- /dev/null +++ b/models/nlp/large_language_model/llama2-7b/vllm/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer +pip3 install vllm +pip3 install triton +pip3 install ixformer diff --git a/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py b/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py index 9c0b6d2fabcd1e9f64f59208336f12cd8d0def0c..538d35410b4a178602726608cbbae21550e2f90b 100644 --- a/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py @@ -129,3 +129,7 @@ if __name__ == "__main__": num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["tokens"] = num_tokens + metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) + print(metricResult) diff --git a/models/nlp/large_language_model/llama3-70b/vllm/ci/prepare.sh b/models/nlp/large_language_model/llama3-70b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..000245a822b911a916c2693f3b2adfbff570520b --- /dev/null +++ b/models/nlp/large_language_model/llama3-70b/vllm/ci/prepare.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + diff --git a/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py b/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py index cdf635a71debcbb68945a745e8cd7b2151968c77..6932fde6ba557964767d36b123dca9c4f4bf05c7 100644 --- a/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py @@ -151,6 +151,10 @@ if args.acc_test: print('val ROUGE-1 score f1: {}, target ROUGE-1 score f1: {}, fail'.format(scores[0]["rouge-1"]['f'],args.acc_threshold)) exit(1) print('val ROUGE-1 score f1: {}, target ROUGE-1 score f1: {}, pass'.format(scores[0]["rouge-1"]['f'],args.acc_threshold)) + metricResult = {"metricResult": {}} + metricResult["metricResult"]["val ROUGE-1 score f1"] = scores[0]["rouge-1"]['f'] + metricResult["metricResult"]["val ROUGE-1 score f1"] = args.acc_threshold + print(metricResult) # 2 7b vllm 0.1.6: batch 3, tokens: 773, QPS: 64.35866137433203; batch 1, tokens: 257, QPS: 25.396898421442113 # 1\2 13b vllm 0.1.6: batch 3, tokens: 768, QPS: 41.538942353799506; batch 1, tokens: 257, QPS: 15.639606595029639 (2, 6.5829828847570795; 8, 5.137610167755676) diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md index 33b0aab2bf76ae501b61e91c0a6104194fb4654a..729b9833fa6e0d7947f72dde56206988646bc299 100644 --- a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md +++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md @@ -17,10 +17,6 @@ yum install -y mesa-libGL ## Ubuntu apt install -y libgl1-mesa-dev -# *star refer to a specified version -wget http://files.deepspark.org.cn:880/deepspark/add-ons/text-generation-*.whl -wget http://files.deepspark.org.cn:880/deepspark/add-ons/text-generation-server-*.whl -pip install tensorrt_llm-*.whl text-generation-server-*.whl ``` ### Download diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/ci/prepare.sh b/models/nlp/large_language_model/qwen-7b/text-generation-inference/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..4b2fdf8b5b521defcd963cd9e9fe92bd271dc2cf --- /dev/null +++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/ci/prepare.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +mkdir -p data + +ln -s /mnt/deepspark/data/checkpoints/qwen-7B data/qwen-7B \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py b/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py index 57db633401e7849adac36f5f9e6ad166fdf38bbd..e3ebcc3a5e6b40d7801ebeda885710aec4dd6f08 100644 --- a/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py +++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py @@ -109,6 +109,11 @@ if __name__ == "__main__": duration_time = end_time - start_time print(f"generate length: {generations_one[0].generated_text.generated_tokens}") print(f"one batch: {generations_one[0].generated_text.text}\nqps: {generations_one[0].generated_text.generated_tokens /duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["generate length"] = generations_one[0].generated_text.generated_tokens + metricResult["metricResult"]["one batch"] = generations_one[0].generated_text.text + metricResult["metricResult"]["qps"] = generations_one[0].generated_text.generated_tokens /duration_time + print(metricResult) """ qwen-7B diff --git a/models/nlp/large_language_model/qwen-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen-7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee --- /dev/null +++ b/models/nlp/large_language_model/qwen-7b/vllm/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer +pip3 install vllm +pip3 install triton +pip3 install ixformer \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py index 3b9e9fd89ae1043b6055a01087d8d6e421281c5e..5de14fb0f6989e50ed03725946cb17543d30832d 100644 --- a/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py @@ -130,3 +130,7 @@ if __name__ == "__main__": num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["tokens"] = num_tokens + metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) + print(metricResult) diff --git a/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md b/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md index b3c67597fbb13b3d404969aa0f4c09208b131e8c..905967c2c7372eba6bfb3791fe6833400ba68ee6 100644 --- a/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md +++ b/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md @@ -29,7 +29,7 @@ ln -s /path/to/Qwen1.5-14B ./data/qwen1.5 ## Inference ```bash -python3 offline_inference.py --model ./data/qwen1.5/Qwen1.5-14B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 1024 +python3 offline_inference.py --model ./data/qwen1.5/Qwen1.5-14B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 896 ``` ## Results diff --git a/models/nlp/large_language_model/qwen1.5-14b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-14b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..75fb19458942e1d61a674c1d6dd9bbdb521bc00c --- /dev/null +++ b/models/nlp/large_language_model/qwen1.5-14b/vllm/ci/prepare.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi diff --git a/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py index 7ee127a259eb78f91d71c07b4a129464e0cc6cd3..130f0885a4a85b657d71d6204bdf78a2cc9e871a 100644 --- a/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py @@ -108,3 +108,7 @@ for i, output in enumerate(outputs): num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") +metricResult = {"metricResult": {}} +metricResult["metricResult"]["tokens"] = num_tokens +metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) +print(metricResult) \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen1.5-32b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-32b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee --- /dev/null +++ b/models/nlp/large_language_model/qwen1.5-32b/vllm/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer +pip3 install vllm +pip3 install triton +pip3 install ixformer \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py index 5e85929151748155349ec1de2bd89b9789f48574..9799150f7c1221b3d8f58857d3463727c9d5400e 100644 --- a/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py @@ -129,3 +129,7 @@ if __name__ == "__main__": num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["tokens"] = num_tokens + metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) + print(metricResult) \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen1.5-72b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-72b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..75fb19458942e1d61a674c1d6dd9bbdb521bc00c --- /dev/null +++ b/models/nlp/large_language_model/qwen1.5-72b/vllm/ci/prepare.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi diff --git a/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py index 7ee127a259eb78f91d71c07b4a129464e0cc6cd3..130f0885a4a85b657d71d6204bdf78a2cc9e871a 100644 --- a/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py @@ -108,3 +108,7 @@ for i, output in enumerate(outputs): num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") +metricResult = {"metricResult": {}} +metricResult["metricResult"]["tokens"] = num_tokens +metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) +print(metricResult) \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..c801677cefaf6638b3a9695a7da4d84a3a66fcc1 --- /dev/null +++ b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/ci/prepare.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +mkdir -p data + +ln -s /mnt/deepspark/data/checkpoints/Qwen1.5-7B data/Qwen1.5-7B \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py index b927973a76953e189d4c4ebd4ee10bc392e0b4f0..87f4df9885a635af2e019fb76bbca2c5210f0cb4 100644 --- a/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py +++ b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py @@ -115,6 +115,11 @@ if __name__ == "__main__": duration_time = end_time - start_time print(f"generate length: {generations_one[0].generated_text.generated_tokens}") print(f"one batch: {generations_one[0].generated_text.text}\nqps: {generations_one[0].generated_text.generated_tokens /duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["generate length"] = generations_one[0].generated_text.generated_tokens + metricResult["metricResult"]["one batch"] = generations_one[0].generated_text.text + metricResult["metricResult"]["qps"] = generations_one[0].generated_text.generated_tokens /duration_time + print(metricResult) """ qwen1.5-0.5B diff --git a/models/nlp/large_language_model/qwen1.5-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..75fb19458942e1d61a674c1d6dd9bbdb521bc00c --- /dev/null +++ b/models/nlp/large_language_model/qwen1.5-7b/vllm/ci/prepare.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi diff --git a/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py index 7ee127a259eb78f91d71c07b4a129464e0cc6cd3..bae01307762ee571e9c8bd5f77af10a177bbf28d 100644 --- a/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py @@ -108,3 +108,7 @@ for i, output in enumerate(outputs): num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") +metricResult = {"metricResult": {}} +metricResult["metricResult"]["tokens"] = num_tokens +metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) +print(metricResult) diff --git a/models/nlp/large_language_model/qwen2-72b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen2-72b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee --- /dev/null +++ b/models/nlp/large_language_model/qwen2-72b/vllm/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer +pip3 install vllm +pip3 install triton +pip3 install ixformer \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py index 5e85929151748155349ec1de2bd89b9789f48574..9799150f7c1221b3d8f58857d3463727c9d5400e 100644 --- a/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py @@ -129,3 +129,7 @@ if __name__ == "__main__": num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["tokens"] = num_tokens + metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) + print(metricResult) \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen2-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen2-7b/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee --- /dev/null +++ b/models/nlp/large_language_model/qwen2-7b/vllm/ci/prepare.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer +pip3 install vllm +pip3 install triton +pip3 install ixformer \ No newline at end of file diff --git a/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py index 5e85929151748155349ec1de2bd89b9789f48574..9799150f7c1221b3d8f58857d3463727c9d5400e 100644 --- a/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py @@ -129,3 +129,7 @@ if __name__ == "__main__": num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["tokens"] = num_tokens + metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) + print(metricResult) \ No newline at end of file diff --git a/models/nlp/large_language_model/stablelm/vllm/ci/prepare.sh b/models/nlp/large_language_model/stablelm/vllm/ci/prepare.sh new file mode 100644 index 0000000000000000000000000000000000000000..6d81455870a7e0da387248def8bb77bbae5e417f --- /dev/null +++ b/models/nlp/large_language_model/stablelm/vllm/ci/prepare.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x +ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') +if [[ ${ID} == "ubuntu" ]]; then + apt install -y libgl1-mesa-glx +elif [[ ${ID} == "centos" ]]; then + yum install -y mesa-libGL +else + echo "Not Support Os" +fi + +pip3 install transformers \ No newline at end of file diff --git a/models/nlp/large_language_model/stablelm/vllm/offline_inference.py b/models/nlp/large_language_model/stablelm/vllm/offline_inference.py index 40678a62ea18296ecdd53cbbcf7d8c3c25e0950d..e9f2abfb7002071d0ce520be433eb972fd0def4b 100644 --- a/models/nlp/large_language_model/stablelm/vllm/offline_inference.py +++ b/models/nlp/large_language_model/stablelm/vllm/offline_inference.py @@ -132,4 +132,8 @@ if __name__ == "__main__": num_tokens += len(output.outputs[0].token_ids) print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n") - print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") \ No newline at end of file + print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}") + metricResult = {"metricResult": {}} + metricResult["metricResult"]["tokens"] = num_tokens + metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3) + print(metricResult) \ No newline at end of file diff --git a/tests/models_trtllm.yaml b/tests/models_trtllm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..de21908e72524b29534ee542380ab49dfb448e30 --- /dev/null +++ b/tests/models_trtllm.yaml @@ -0,0 +1,41 @@ +--- +- datasets: https://localhost + download_url: https://huggingface.co/meta-llama/llama2-7b-chat + name: llama2-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/llama2-7b/trtllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/llama2-13b-chat + name: llama2-13b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/llama2-13b/trtllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/llama2-70b-chat + name: llama2-70b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/llama2-70b/trtllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/qwen-7B + name: qwen-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen-7b/text-generation-inference + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://modelscope.cn/models/qwen/Qwen1.5-7B + name: qwen1.5-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen1.5-7b/text-generation-inference + task_type: nlp/large_language_model diff --git a/tests/models_vllm.yaml b/tests/models_vllm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..548f16c81dbc297b22a666e4982c3a3027311128 --- /dev/null +++ b/tests/models_vllm.yaml @@ -0,0 +1,114 @@ +--- +- datasets: https://localhost + download_url: https://huggingface.co/baichuan-inc/Baichuan2-7B-Base + name: baichuan2-7b + need_third_part: false + precisions: + - fp16 + - int8 + relative_path: models/nlp/large_language_model/baichuan2-7b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://huggingface.co/THUDM/chatglm3-6b + name: chatglm3-6b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/chatglm3-6b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://www.modelscope.cn/models/ZhipuAI/chatglm3-6b-32k + name: chatglm3-6b-32k + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/chatglm3-6b-32k/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/llama2-7b + name: llama2-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/llama2-7b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/Meta-Llama-3-70B-Instruct + name: llama3-70b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/llama3-70b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/qwen-7B + name: qwen-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen-7b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://modelscope.cn/models/qwen/Qwen1.5-7B + name: qwen1.5-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen1.5-7b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://modelscope.cn/models/qwen/Qwen1.5-14B + name: qwen1.5-14b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen1.5-14b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://modelscope.cn/models/Qwen/Qwen1.5-32B-Chat + name: qwen1.5-32b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen1.5-32b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://modelscope.cn/models/qwen/Qwen1.5-72B + name: qwen1.5-72b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen1.5-72b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://modelscope.cn/models/Qwen/Qwen2-7B-Instruct + name: qwen2-7b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen2-7b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/Qwen2-72B + name: qwen2-72b + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/qwen2-72b/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://huggingface.co/stabilityai/stablelm-2-1_6b + name: stablelm + need_third_part: false + precisions: + - fp16 + relative_path: models/nlp/large_language_model/stablelm/vllm + task_type: nlp/large_language_model +- datasets: https://localhost + download_url: https://localhost/MiniCPM-V-2 + name: minicpm-v-2 + need_third_part: false + precisions: + - fp16 + relative_path: models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ + task_type: multimodal/vision-language-understanding diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py new file mode 100644 index 0000000000000000000000000000000000000000..911ac6fd80bed287336bd092c521cf6f6478b396 --- /dev/null +++ b/tests/run_trtllm.py @@ -0,0 +1,189 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml +import subprocess +import json +import re +import time +import logging +import os +import sys +import argparse + +import utils + +# 配置日志 +debug_level = logging.DEBUG if utils.is_debug() else logging.INFO +logging.basicConfig( + handlers=[logging.FileHandler("output.log"), logging.StreamHandler()], + level=debug_level, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +METRIC_PATTERN = r"{'metricResult':.*}" + +def main(): + parser = argparse.ArgumentParser(description="") + parser.add_argument("--model", type=str, help="model name, e.g: alexnet") + args = parser.parse_args() + + if args.model: + test_model = args.model + else: + test_model = os.environ.get("TEST_CASE") + logging.info(f"Test case to run: {test_model}") + if not test_model: + logging.error("test model case is empty") + sys.exit(-1) + + model = get_model_config(test_model) + if not model: + logging.error("mode config is empty") + sys.exit(-1) + + result = {} + # NLP模型 + if model["task_type"] in ["nlp/large_language_model"]: + logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}") + d_url = model["download_url"] + if d_url is not None: + result = run_nlp_testcase(model) + check_model_result(result) + logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}") + logging.info(f"End running {model['name']} test case.") + + logging.info(f"Full text result: {result}") + +def get_model_config(mode_name): + with open("models_trtllm.yaml", "r") as file: + models = yaml.safe_load(file) + + for model in models: + if model["name"] == mode_name.lower(): + return model + return + +def check_model_result(result): + status = "PASS" + for prec in ["fp16", "int8"]: + if prec in result["result"]: + if result["result"][prec]["status"] == "FAIL": + status = "FAIL" + break + result["status"] = status + +def run_nlp_testcase(model): + model_name = model["name"] + result = { + "name": model_name, + "result": {}, + } + d_url = model["download_url"] + checkpoint_n = d_url.split("/")[-1] + dataset_n = model["datasets"].split("/")[-1] + prepare_script = f""" + set -x + cd ../{model['relative_path']} + bash ci/prepare.sh + """ + + # add pip list info when in debug mode + if utils.is_debug(): + pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n" + prepare_script = pip_list_script + prepare_script + pip_list_script + + run_script(prepare_script) + + for prec in model["precisions"]: + logging.info(f"Start running {model_name} {prec} test case") + script = f""" + set -x + cd ../{model['relative_path']} + """ + if model_name == "llama2-7b": + script = f""" + set -x + cd ../{model['relative_path']} + bash scripts/test_trtllm_llama2_7b_gpu1_build.sh + bash scripts/test_trtllm_llama2_7b_gpu1.sh + """ + elif model_name == "llama2-13b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1 + bash scripts/test_trtllm_llama2_13b_gpu2_build.sh + bash scripts/test_trtllm_llama2_13b_gpu2.sh + """ + elif model_name == "llama2-70b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 + bash scripts/test_trtllm_llama2_70b_gpu8_build.sh + bash scripts/test_trtllm_llama2_70b_gpu8.sh + """ + elif model_name == "qwen-7b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=1 + python3 offline_inference.py --model2path ./data/qwen-7B + """ + elif model_name == "qwen1.5-7b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=1 + python3 offline_inference.py --model2path ./data/Qwen1.5-7B + """ + + r, t = run_script(script) + sout = r.stdout + + pattern = METRIC_PATTERN + matchs = re.findall(pattern, sout) + result["result"].setdefault(prec, {"status": "FAIL"}) + logging.debug(f"matchs:\n{matchs}") + for m in matchs: + result["result"][prec].update(get_metric_result(m)) + if len(matchs) == 2: + result["result"][prec]["status"] = "PASS" + + result["result"][prec]["Cost time (s)"] = t + return result + +def get_metric_result(str): + if str: + return json.loads(str.replace("'", "\""))["metricResult"] + return None + +def run_script(script): + start_time = time.perf_counter() + result = subprocess.run( + script, shell=True, capture_output=True, text=True, executable="/bin/bash" + ) + end_time = time.perf_counter() + execution_time = end_time - start_time + logging.debug(f"执行命令:\n{script}") + logging.debug("执行时间: {:.4f} 秒".format(execution_time)) + logging.debug(f"标准输出: {result.stdout}") + logging.debug(f"标准错误: {result.stderr}") + logging.debug(f"返回码: {result.returncode}") + return result, execution_time + +if __name__ == "__main__": + main() diff --git a/tests/run_vllm.py b/tests/run_vllm.py new file mode 100644 index 0000000000000000000000000000000000000000..8bd3504db634ec4360e502cb27aababd5edace63 --- /dev/null +++ b/tests/run_vllm.py @@ -0,0 +1,259 @@ +# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import yaml +import subprocess +import json +import re +import time +import logging +import os +import sys +import argparse + +import utils + +# 配置日志 +debug_level = logging.DEBUG if utils.is_debug() else logging.INFO +logging.basicConfig( + handlers=[logging.FileHandler("output.log"), logging.StreamHandler()], + level=debug_level, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +METRIC_PATTERN = r"{'metricResult':.*}" + +def main(): + parser = argparse.ArgumentParser(description="") + parser.add_argument("--model", type=str, help="model name, e.g: alexnet") + args = parser.parse_args() + + if args.model: + test_model = args.model + else: + test_model = os.environ.get("TEST_CASE") + logging.info(f"Test case to run: {test_model}") + if not test_model: + logging.error("test model case is empty") + sys.exit(-1) + + model = get_model_config(test_model) + if not model: + logging.error("mode config is empty") + sys.exit(-1) + + result = {} + # NLP模型 + if model["task_type"] in ["nlp/large_language_model", "multimodal/vision-language-understanding"]: + logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}") + d_url = model["download_url"] + if d_url is not None: + result = run_nlp_testcase(model) + check_model_result(result) + logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}") + logging.info(f"End running {model['name']} test case.") + + logging.info(f"Full text result: {result}") + +def get_model_config(mode_name): + with open("models_vllm.yaml", "r") as file: + models = yaml.safe_load(file) + + for model in models: + if model["name"] == mode_name.lower(): + return model + return + +def check_model_result(result): + status = "PASS" + for prec in ["fp16", "int8"]: + if prec in result["result"]: + if result["result"][prec]["status"] == "FAIL": + status = "FAIL" + break + result["status"] = status + +def run_nlp_testcase(model): + model_name = model["name"] + result = { + "name": model_name, + "result": {}, + } + d_url = model["download_url"] + checkpoint_n = d_url.split("/")[-1] + dataset_n = model["datasets"].split("/")[-1] + prepare_script = f""" + set -x + cd ../{model['relative_path']} + ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{model_name} + bash ci/prepare.sh + """ + + # add pip list info when in debug mode + if utils.is_debug(): + pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n" + prepare_script = pip_list_script + prepare_script + pip_list_script + + run_script(prepare_script) + + for prec in model["precisions"]: + logging.info(f"Start running {model_name} {prec} test case") + script = f""" + set -x + cd ../{model['relative_path']} + """ + if model_name == "baichuan2-7b": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./baichuan2-7b/ --max-tokens 256 --trust-remote-code --chat_template template_baichuan.jinja --temperature 0.0 + """ + if prec == "int8": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./baichuan2-7b/int8/ --chat_template template_baichuan.jinja --quantization w8a16 --max-num-seqs 1 --max-model-len 256 --trust-remote-code --temperature 0.0 --max-tokens 256 + """ + elif model_name == "chatglm3-6b": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./chatglm3-6b --trust-remote-code --temperature 0.0 --max-tokens 256 + """ + elif model_name == "chatglm3-6b-32k": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./chatglm3-6b-32k --trust-remote-code --temperature 0.0 --max-tokens 256 + """ + elif model_name == "llama2-7b": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./llama2-7b --max-tokens 256 -tp 1 --temperature 0.0 + """ + elif model_name == "llama3-70b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1,2,3 + python3 offline_inference.py --model ./llama3-70b --max-tokens 256 -tp 4 --temperature 0.0 + """ + elif model_name == "qwen-7b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1 + python3 offline_inference.py --model ./qwen-7b --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0 + """ + elif model_name == "qwen1.5-7b": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./qwen1.5-7b --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096 + """ + elif model_name == "qwen1.5-7b": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./qwen1.5-7b --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096 + """ + elif model_name == "qwen1.5-14b": + script = f""" + set -x + cd ../{model['relative_path']} + python3 offline_inference.py --model ./qwen1.5-14b --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 896 + """ + elif model_name == "qwen1.5-32b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1,2,3 + python3 offline_inference.py --model ./qwen1.5-32b --max-tokens 256 -tp 4 --temperature 0.0 + """ + elif model_name == "qwen1.5-72b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1 + python3 offline_inference.py --model ./qwen1.5-72b --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 3096 + """ + elif model_name == "qwen2-7b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0 + python3 offline_inference.py --model ./qwen2-7b --max-tokens 256 -tp 1 --temperature 0.0 + """ + elif model_name == "qwen2-72b": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1,2,3 + python3 offline_inference.py --model ./qwen2-72b --max-tokens 256 -tp 4 --temperature 0.0 --gpu-memory-utilization 0.98 --max-model-len 32768 + """ + elif model_name == "stablelm": + script = f""" + set -x + cd ../{model['relative_path']} + export CUDA_VISIBLE_DEVICES=0,1 + python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0 + """ + elif model_name == "minicpm-v-2": + script = f""" + set -x + cd ../{model['relative_path']} + export PT_SDPA_ENABLE_HEAD_DIM_PADDING=1 + export PATH=/usr/local/corex/bin:${PATH} + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64 + python3 minicpmv-2.0-offline.py --model-path ./minicpm-v-2 --image-path ./dog.jpg + """ + + r, t = run_script(script) + sout = r.stdout + + pattern = METRIC_PATTERN + matchs = re.findall(pattern, sout) + result["result"].setdefault(prec, {"status": "FAIL"}) + logging.debug(f"matchs:\n{matchs}") + for m in matchs: + result["result"][prec].update(get_metric_result(m)) + if len(matchs) == 1: + result["result"][prec]["status"] = "PASS" + + result["result"][prec]["Cost time (s)"] = t + return result + +def get_metric_result(str): + if str: + return json.loads(str.replace("'", "\""))["metricResult"] + return None + +def run_script(script): + start_time = time.perf_counter() + result = subprocess.run( + script, shell=True, capture_output=True, text=True, executable="/bin/bash" + ) + end_time = time.perf_counter() + execution_time = end_time - start_time + logging.debug(f"执行命令:\n{script}") + logging.debug("执行时间: {:.4f} 秒".format(execution_time)) + logging.debug(f"标准输出: {result.stdout}") + logging.debug(f"标准错误: {result.stderr}") + logging.debug(f"返回码: {result.returncode}") + return result, execution_time + +if __name__ == "__main__": + main()