diff --git a/README.md b/README.md
index b51328219444c0816cb6ce62b66e6da013202715..4ddd9511c5b586b08f9eccaa36ad7bec101d0fab 100644
--- a/README.md
+++ b/README.md
@@ -1168,7 +1168,7 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型
MiniCPM-V-2 |
- Supported |
+ Supported |
- |
- |
diff --git a/models/vision-language-understanding/MiniCPM-V-2/vllm/README.md b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md
similarity index 100%
rename from models/vision-language-understanding/MiniCPM-V-2/vllm/README.md
rename to models/multimodal/vision-language-understanding/minicpm-v-2/vllm/README.md
diff --git a/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..f1c0b9c835b299341cc45a866800e7b4a4bd5c89
--- /dev/null
+++ b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/ci/prepare.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+pip3 install timm==0.9.10
+pip3 install transformers
+pip3 install --user --upgrade pillow -i https://pypi.tuna.tsinghua.edu.cn/simple
+
+cp /mnt/deepspark/data/datasets/dog.jpg ./
\ No newline at end of file
diff --git a/models/vision-language-understanding/MiniCPM-V-2/vllm/minicpmv-2.0-offline.py b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py
similarity index 93%
rename from models/vision-language-understanding/MiniCPM-V-2/vllm/minicpmv-2.0-offline.py
rename to models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py
index d6add4d8f00fcc8bb307767d149dad8009f182b0..1da0fdd8e5bb7c507c5f24d8f66f0d0e5d7d5967 100644
--- a/models/vision-language-understanding/MiniCPM-V-2/vllm/minicpmv-2.0-offline.py
+++ b/models/multimodal/vision-language-understanding/minicpm-v-2/vllm/minicpmv-2.0-offline.py
@@ -38,9 +38,9 @@ def main(args):
llm = LLM(model=MODEL_NAME,
gpu_memory_utilization=0.95, # 使用全部GPU内存
trust_remote_code=True,
- max_model_len=1024,
- max_num_seqs=1,
- max_num_batched_tokens=1024,) # 根据内存状况可调整此值
+ max_model_len=2048,
+ # max_num_seqs=1,
+ max_num_batched_tokens=2048,) # 根据内存状况可调整此值
# 构建对话消息
messages = [{'role': 'user', 'content': '(./)\n' + '请描述这张图片'}]
@@ -64,10 +64,10 @@ def main(args):
# top_p=0.8,
# top_k=100,
# seed=3472,
- max_tokens=128,
+ max_tokens=1024,
# min_tokens=150,
temperature=0,
- use_beam_search=False,
+ # use_beam_search=False,
# length_penalty=1.2,
best_of=1)
diff --git a/models/nlp/large_language_model/baichuan2-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/baichuan2-7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..54b66b4eba0c0d7beb5f93d3699e4377dae399c0
--- /dev/null
+++ b/models/nlp/large_language_model/baichuan2-7b/vllm/ci/prepare.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+pip3 install transformers==4.37.1
+
+# has prepared in ci
+# ln -s /mnt/deepspark/data/checkpoints/Baichuan2-7B-Base ./
+
+python3 convert2int8.py --model-path ./baichuan2-7b/
\ No newline at end of file
diff --git a/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py b/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py
index 40c0e2e10deb2869ad38dcdd2663a6ed8d5baa23..9e5738a17c9b532a4c38b3365a55df54c748a51d 100644
--- a/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/baichuan2-7b/vllm/offline_inference.py
@@ -109,4 +109,9 @@ for i, output in enumerate(outputs):
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+metricResult = {"metricResult": {}}
+metricResult["metricResult"]["tokens"] = num_tokens
+metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+print(metricResult)
+
# 0.3.2 tokens: 757, QPS: 97.97229589080902
\ No newline at end of file
diff --git a/models/nlp/large_language_model/chatglm3-6b-32k/vllm/ci/prepare.sh b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ad683f6e992593e59d91544c2e1e6b724a6245ec
--- /dev/null
+++ b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+pip3 install transformers==4.37.1
diff --git a/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py
index bc731079f72988cd20c5a68b3ccb4e192769c8fb..7fc45b685046e85411b17ea4edf7ff46b8bf52a6 100644
--- a/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/chatglm3-6b-32k/vllm/offline_inference.py
@@ -57,7 +57,7 @@ if __name__ == "__main__":
model_name = model_name.rsplit("/")[-1]
# Sample prompts.
- prompts = ["Щܱһھ?", "һ֥ʿ", "дһƪй5Gз¡"]
+ prompts = ["哪些迹象可能表明一个人正在经历焦虑?", "描述一下如何制作芝士披萨。", "写一篇有关5G网络研发的综述文章。"]
# Create a sampling params object.
sampling_params = SamplingParams(**sampling_params)
diff --git a/models/nlp/large_language_model/chatglm3-6b/vllm/ci/prepare.sh b/models/nlp/large_language_model/chatglm3-6b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4cfd5fd6f1fd9dcc9fab83d9023b33e4752606a9
--- /dev/null
+++ b/models/nlp/large_language_model/chatglm3-6b/vllm/ci/prepare.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+pip3 install vllm==0.5.0
+pip3 install transformers==4.37.1
diff --git a/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py b/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py
index 0162d93c53ac839268b3c964e0e96ecaad63ac4e..eaa7fe206459819731b00523c6d40224afafc545 100644
--- a/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/chatglm3-6b/vllm/offline_inference.py
@@ -99,4 +99,9 @@ for i, output in enumerate(outputs):
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+metricResult = {"metricResult": {}}
+metricResult["metricResult"]["tokens"] = num_tokens
+metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+print(metricResult)
+
# 0.3.2 tokens: 422, QPS: 70.02308283048338(tokens: 422, QPS: 93.67210003677407),32-k 模型 tokens: 477, QPS: 81.46537314533865(tokens: 477, QPS: 106.54247895449554)
\ No newline at end of file
diff --git a/models/nlp/large_language_model/llama2-13b/trtllm/README.md b/models/nlp/large_language_model/llama2-13b/trtllm/README.md
index 4658334d0f2c2c262ff613a3127f9832965e465f..b24c29ce921f2571833a50d01ad3035ec28e1ea5 100755
--- a/models/nlp/large_language_model/llama2-13b/trtllm/README.md
+++ b/models/nlp/large_language_model/llama2-13b/trtllm/README.md
@@ -18,9 +18,6 @@ apt install -y libgl1-mesa-dev
bash scripts/set_environment.sh .
-# *star refer to a specified version
-wget http://files.deepspark.org.cn:880/deepspark/add-ons/tensorrt_llm-*.whl
-pip install tensorrt_llm-*.whl
```
### Download
diff --git a/models/nlp/large_language_model/llama2-13b/trtllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-13b/trtllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7947d62bee569e9f109283e843b288fc68148f0e
--- /dev/null
+++ b/models/nlp/large_language_model/llama2-13b/trtllm/ci/prepare.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+bash scripts/set_environment.sh .
+
+# Download model from the website and make sure the model's path is "data/llama2-13b-chat"
+# Download dataset from the website and make sure the dataset's path is "data/datasets_cnn_dailymail"
+mkdir -p data
+ln -s /mnt/deepspark/data/checkpoints/llama2-13b-chat data/llama2-13b-chat
+ln -s /mnt/deepspark/data/datasets/datasets_cnn_dailymail data/datasets_cnn_dailymail
+# Please download rouge.py to this path if your server can't attach huggingface.co.
+mkdir -p rouge/
+cp /mnt/deepspark/data/3rd_party/rouge.py rouge/
\ No newline at end of file
diff --git a/models/nlp/large_language_model/llama2-70b/trtllm/README.md b/models/nlp/large_language_model/llama2-70b/trtllm/README.md
index 671b51c79f01d8760df490d167e32814634d507d..621dbffeeaac3387030e326e6e24f918644c9ae1 100644
--- a/models/nlp/large_language_model/llama2-70b/trtllm/README.md
+++ b/models/nlp/large_language_model/llama2-70b/trtllm/README.md
@@ -16,10 +16,6 @@ yum install -y mesa-libGL
apt install -y libgl1-mesa-dev
bash scripts/set_environment.sh .
-
-# *star refer to a specified version
-wget http://files.deepspark.org.cn:880/deepspark/add-ons/tensorrt_llm-*.whl
-pip install tensorrt_llm-*.whl
```
### Download
diff --git a/models/nlp/large_language_model/llama2-70b/trtllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-70b/trtllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4a8c1e4e321df3e2ca3228b3456029062731ddfd
--- /dev/null
+++ b/models/nlp/large_language_model/llama2-70b/trtllm/ci/prepare.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+bash scripts/set_environment.sh .
+
+# Download model from the website and make sure the model's path is "data/llama2-70b-chat"
+# Download dataset from the website and make sure the dataset's path is "data/datasets_cnn_dailymail"
+mkdir -p data
+ln -s /mnt/deepspark/data/checkpoints/llama2-70b-chat data/llama2-70b-chat
+ln -s /mnt/deepspark/data/datasets/datasets_cnn_dailymail data/datasets_cnn_dailymail
+# Please download rouge.py to this path if your server can't attach huggingface.co.
+mkdir -p rouge/
+cp /mnt/deepspark/data/3rd_party/rouge.py rouge/
\ No newline at end of file
diff --git a/models/nlp/large_language_model/llama2-7b/trtllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-7b/trtllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2ac2384d24f563195692e601c86010febfcac6ef
--- /dev/null
+++ b/models/nlp/large_language_model/llama2-7b/trtllm/ci/prepare.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+bash scripts/set_environment.sh .
+
+# Download model from the website and make sure the model's path is "data/llama2-7b-chat"
+# Download dataset from the website and make sure the dataset's path is "data/datasets_cnn_dailymail"
+mkdir -p data
+ln -s /mnt/deepspark/data/checkpoints/llama2-7b-chat data/llama2-7b-chat
+ln -s /mnt/deepspark/data/datasets/datasets_cnn_dailymail data/datasets_cnn_dailymail
+# Please download rouge.py to this path if your server can't attach huggingface.co.
+mkdir -p rouge/
+cp /mnt/deepspark/data/3rd_party/rouge.py rouge/
\ No newline at end of file
diff --git a/models/nlp/large_language_model/llama2-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/llama2-7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6afe9667d3033ef4fe624e70662dc90bd6e563f7
--- /dev/null
+++ b/models/nlp/large_language_model/llama2-7b/vllm/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer
+pip3 install vllm
+pip3 install triton
+pip3 install ixformer
diff --git a/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py b/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py
index 9c0b6d2fabcd1e9f64f59208336f12cd8d0def0c..538d35410b4a178602726608cbbae21550e2f90b 100644
--- a/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/llama2-7b/vllm/offline_inference.py
@@ -129,3 +129,7 @@ if __name__ == "__main__":
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["tokens"] = num_tokens
+ metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+ print(metricResult)
diff --git a/models/nlp/large_language_model/llama3-70b/vllm/ci/prepare.sh b/models/nlp/large_language_model/llama3-70b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..000245a822b911a916c2693f3b2adfbff570520b
--- /dev/null
+++ b/models/nlp/large_language_model/llama3-70b/vllm/ci/prepare.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
diff --git a/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py b/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py
index cdf635a71debcbb68945a745e8cd7b2151968c77..6932fde6ba557964767d36b123dca9c4f4bf05c7 100644
--- a/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/llama3-70b/vllm/offline_inference.py
@@ -151,6 +151,10 @@ if args.acc_test:
print('val ROUGE-1 score f1: {}, target ROUGE-1 score f1: {}, fail'.format(scores[0]["rouge-1"]['f'],args.acc_threshold))
exit(1)
print('val ROUGE-1 score f1: {}, target ROUGE-1 score f1: {}, pass'.format(scores[0]["rouge-1"]['f'],args.acc_threshold))
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["val ROUGE-1 score f1"] = scores[0]["rouge-1"]['f']
+ metricResult["metricResult"]["val ROUGE-1 score f1"] = args.acc_threshold
+ print(metricResult)
# 2 7b vllm 0.1.6: batch 3, tokens: 773, QPS: 64.35866137433203; batch 1, tokens: 257, QPS: 25.396898421442113
# 1\2 13b vllm 0.1.6: batch 3, tokens: 768, QPS: 41.538942353799506; batch 1, tokens: 257, QPS: 15.639606595029639 (2, 6.5829828847570795; 8, 5.137610167755676)
diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md
index 33b0aab2bf76ae501b61e91c0a6104194fb4654a..729b9833fa6e0d7947f72dde56206988646bc299 100644
--- a/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md
+++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/README.md
@@ -17,10 +17,6 @@ yum install -y mesa-libGL
## Ubuntu
apt install -y libgl1-mesa-dev
-# *star refer to a specified version
-wget http://files.deepspark.org.cn:880/deepspark/add-ons/text-generation-*.whl
-wget http://files.deepspark.org.cn:880/deepspark/add-ons/text-generation-server-*.whl
-pip install tensorrt_llm-*.whl text-generation-server-*.whl
```
### Download
diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/ci/prepare.sh b/models/nlp/large_language_model/qwen-7b/text-generation-inference/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4b2fdf8b5b521defcd963cd9e9fe92bd271dc2cf
--- /dev/null
+++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+mkdir -p data
+
+ln -s /mnt/deepspark/data/checkpoints/qwen-7B data/qwen-7B
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py b/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py
index 57db633401e7849adac36f5f9e6ad166fdf38bbd..e3ebcc3a5e6b40d7801ebeda885710aec4dd6f08 100644
--- a/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py
+++ b/models/nlp/large_language_model/qwen-7b/text-generation-inference/offline_inference.py
@@ -109,6 +109,11 @@ if __name__ == "__main__":
duration_time = end_time - start_time
print(f"generate length: {generations_one[0].generated_text.generated_tokens}")
print(f"one batch: {generations_one[0].generated_text.text}\nqps: {generations_one[0].generated_text.generated_tokens /duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["generate length"] = generations_one[0].generated_text.generated_tokens
+ metricResult["metricResult"]["one batch"] = generations_one[0].generated_text.text
+ metricResult["metricResult"]["qps"] = generations_one[0].generated_text.generated_tokens /duration_time
+ print(metricResult)
"""
qwen-7B
diff --git a/models/nlp/large_language_model/qwen-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen-7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee
--- /dev/null
+++ b/models/nlp/large_language_model/qwen-7b/vllm/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer
+pip3 install vllm
+pip3 install triton
+pip3 install ixformer
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py
index 3b9e9fd89ae1043b6055a01087d8d6e421281c5e..5de14fb0f6989e50ed03725946cb17543d30832d 100644
--- a/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen-7b/vllm/offline_inference.py
@@ -130,3 +130,7 @@ if __name__ == "__main__":
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["tokens"] = num_tokens
+ metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+ print(metricResult)
diff --git a/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md b/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md
index b3c67597fbb13b3d404969aa0f4c09208b131e8c..905967c2c7372eba6bfb3791fe6833400ba68ee6 100644
--- a/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md
+++ b/models/nlp/large_language_model/qwen1.5-14b/vllm/README.md
@@ -29,7 +29,7 @@ ln -s /path/to/Qwen1.5-14B ./data/qwen1.5
## Inference
```bash
-python3 offline_inference.py --model ./data/qwen1.5/Qwen1.5-14B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 1024
+python3 offline_inference.py --model ./data/qwen1.5/Qwen1.5-14B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 896
```
## Results
diff --git a/models/nlp/large_language_model/qwen1.5-14b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-14b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..75fb19458942e1d61a674c1d6dd9bbdb521bc00c
--- /dev/null
+++ b/models/nlp/large_language_model/qwen1.5-14b/vllm/ci/prepare.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
diff --git a/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py
index 7ee127a259eb78f91d71c07b4a129464e0cc6cd3..130f0885a4a85b657d71d6204bdf78a2cc9e871a 100644
--- a/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen1.5-14b/vllm/offline_inference.py
@@ -108,3 +108,7 @@ for i, output in enumerate(outputs):
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+metricResult = {"metricResult": {}}
+metricResult["metricResult"]["tokens"] = num_tokens
+metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen1.5-32b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-32b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee
--- /dev/null
+++ b/models/nlp/large_language_model/qwen1.5-32b/vllm/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer
+pip3 install vllm
+pip3 install triton
+pip3 install ixformer
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py
index 5e85929151748155349ec1de2bd89b9789f48574..9799150f7c1221b3d8f58857d3463727c9d5400e 100644
--- a/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen1.5-32b/vllm/offline_inference.py
@@ -129,3 +129,7 @@ if __name__ == "__main__":
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["tokens"] = num_tokens
+ metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+ print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen1.5-72b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-72b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..75fb19458942e1d61a674c1d6dd9bbdb521bc00c
--- /dev/null
+++ b/models/nlp/large_language_model/qwen1.5-72b/vllm/ci/prepare.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
diff --git a/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py
index 7ee127a259eb78f91d71c07b4a129464e0cc6cd3..130f0885a4a85b657d71d6204bdf78a2cc9e871a 100644
--- a/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen1.5-72b/vllm/offline_inference.py
@@ -108,3 +108,7 @@ for i, output in enumerate(outputs):
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+metricResult = {"metricResult": {}}
+metricResult["metricResult"]["tokens"] = num_tokens
+metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..c801677cefaf6638b3a9695a7da4d84a3a66fcc1
--- /dev/null
+++ b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/ci/prepare.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+mkdir -p data
+
+ln -s /mnt/deepspark/data/checkpoints/Qwen1.5-7B data/Qwen1.5-7B
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py
index b927973a76953e189d4c4ebd4ee10bc392e0b4f0..87f4df9885a635af2e019fb76bbca2c5210f0cb4 100644
--- a/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py
+++ b/models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/offline_inference.py
@@ -115,6 +115,11 @@ if __name__ == "__main__":
duration_time = end_time - start_time
print(f"generate length: {generations_one[0].generated_text.generated_tokens}")
print(f"one batch: {generations_one[0].generated_text.text}\nqps: {generations_one[0].generated_text.generated_tokens /duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["generate length"] = generations_one[0].generated_text.generated_tokens
+ metricResult["metricResult"]["one batch"] = generations_one[0].generated_text.text
+ metricResult["metricResult"]["qps"] = generations_one[0].generated_text.generated_tokens /duration_time
+ print(metricResult)
"""
qwen1.5-0.5B
diff --git a/models/nlp/large_language_model/qwen1.5-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen1.5-7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..75fb19458942e1d61a674c1d6dd9bbdb521bc00c
--- /dev/null
+++ b/models/nlp/large_language_model/qwen1.5-7b/vllm/ci/prepare.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
diff --git a/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py
index 7ee127a259eb78f91d71c07b4a129464e0cc6cd3..bae01307762ee571e9c8bd5f77af10a177bbf28d 100644
--- a/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen1.5-7b/vllm/offline_inference.py
@@ -108,3 +108,7 @@ for i, output in enumerate(outputs):
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+metricResult = {"metricResult": {}}
+metricResult["metricResult"]["tokens"] = num_tokens
+metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+print(metricResult)
diff --git a/models/nlp/large_language_model/qwen2-72b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen2-72b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee
--- /dev/null
+++ b/models/nlp/large_language_model/qwen2-72b/vllm/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer
+pip3 install vllm
+pip3 install triton
+pip3 install ixformer
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py
index 5e85929151748155349ec1de2bd89b9789f48574..9799150f7c1221b3d8f58857d3463727c9d5400e 100644
--- a/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen2-72b/vllm/offline_inference.py
@@ -129,3 +129,7 @@ if __name__ == "__main__":
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["tokens"] = num_tokens
+ metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+ print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen2-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/qwen2-7b/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..cfd5031a7d3e6ac57abbc085dd41fc1063482bee
--- /dev/null
+++ b/models/nlp/large_language_model/qwen2-7b/vllm/ci/prepare.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+# Contact the iluvatar manager to get adapted install packages of vllm, triton, and ixformer
+pip3 install vllm
+pip3 install triton
+pip3 install ixformer
\ No newline at end of file
diff --git a/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py b/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py
index 5e85929151748155349ec1de2bd89b9789f48574..9799150f7c1221b3d8f58857d3463727c9d5400e 100644
--- a/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/qwen2-7b/vllm/offline_inference.py
@@ -129,3 +129,7 @@ if __name__ == "__main__":
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["tokens"] = num_tokens
+ metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+ print(metricResult)
\ No newline at end of file
diff --git a/models/nlp/large_language_model/stablelm/vllm/ci/prepare.sh b/models/nlp/large_language_model/stablelm/vllm/ci/prepare.sh
new file mode 100644
index 0000000000000000000000000000000000000000..6d81455870a7e0da387248def8bb77bbae5e417f
--- /dev/null
+++ b/models/nlp/large_language_model/stablelm/vllm/ci/prepare.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
+if [[ ${ID} == "ubuntu" ]]; then
+ apt install -y libgl1-mesa-glx
+elif [[ ${ID} == "centos" ]]; then
+ yum install -y mesa-libGL
+else
+ echo "Not Support Os"
+fi
+
+pip3 install transformers
\ No newline at end of file
diff --git a/models/nlp/large_language_model/stablelm/vllm/offline_inference.py b/models/nlp/large_language_model/stablelm/vllm/offline_inference.py
index 40678a62ea18296ecdd53cbbcf7d8c3c25e0950d..e9f2abfb7002071d0ce520be433eb972fd0def4b 100644
--- a/models/nlp/large_language_model/stablelm/vllm/offline_inference.py
+++ b/models/nlp/large_language_model/stablelm/vllm/offline_inference.py
@@ -132,4 +132,8 @@ if __name__ == "__main__":
num_tokens += len(output.outputs[0].token_ids)
print(f"Prompt: {prompt}\nGenerated text: {generated_text} \n")
- print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
\ No newline at end of file
+ print(f"tokens: {num_tokens}, QPS: {num_tokens/duration_time}")
+ metricResult = {"metricResult": {}}
+ metricResult["metricResult"]["tokens"] = num_tokens
+ metricResult["metricResult"]["QPS"] = round(num_tokens/duration_time,3)
+ print(metricResult)
\ No newline at end of file
diff --git a/tests/models_trtllm.yaml b/tests/models_trtllm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..de21908e72524b29534ee542380ab49dfb448e30
--- /dev/null
+++ b/tests/models_trtllm.yaml
@@ -0,0 +1,41 @@
+---
+- datasets: https://localhost
+ download_url: https://huggingface.co/meta-llama/llama2-7b-chat
+ name: llama2-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/llama2-7b/trtllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/llama2-13b-chat
+ name: llama2-13b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/llama2-13b/trtllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/llama2-70b-chat
+ name: llama2-70b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/llama2-70b/trtllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/qwen-7B
+ name: qwen-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen-7b/text-generation-inference
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://modelscope.cn/models/qwen/Qwen1.5-7B
+ name: qwen1.5-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen1.5-7b/text-generation-inference
+ task_type: nlp/large_language_model
diff --git a/tests/models_vllm.yaml b/tests/models_vllm.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..548f16c81dbc297b22a666e4982c3a3027311128
--- /dev/null
+++ b/tests/models_vllm.yaml
@@ -0,0 +1,114 @@
+---
+- datasets: https://localhost
+ download_url: https://huggingface.co/baichuan-inc/Baichuan2-7B-Base
+ name: baichuan2-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ - int8
+ relative_path: models/nlp/large_language_model/baichuan2-7b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://huggingface.co/THUDM/chatglm3-6b
+ name: chatglm3-6b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/chatglm3-6b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://www.modelscope.cn/models/ZhipuAI/chatglm3-6b-32k
+ name: chatglm3-6b-32k
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/chatglm3-6b-32k/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/llama2-7b
+ name: llama2-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/llama2-7b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/Meta-Llama-3-70B-Instruct
+ name: llama3-70b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/llama3-70b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/qwen-7B
+ name: qwen-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen-7b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://modelscope.cn/models/qwen/Qwen1.5-7B
+ name: qwen1.5-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen1.5-7b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://modelscope.cn/models/qwen/Qwen1.5-14B
+ name: qwen1.5-14b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen1.5-14b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://modelscope.cn/models/Qwen/Qwen1.5-32B-Chat
+ name: qwen1.5-32b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen1.5-32b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://modelscope.cn/models/qwen/Qwen1.5-72B
+ name: qwen1.5-72b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen1.5-72b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://modelscope.cn/models/Qwen/Qwen2-7B-Instruct
+ name: qwen2-7b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen2-7b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/Qwen2-72B
+ name: qwen2-72b
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/qwen2-72b/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://huggingface.co/stabilityai/stablelm-2-1_6b
+ name: stablelm
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/nlp/large_language_model/stablelm/vllm
+ task_type: nlp/large_language_model
+- datasets: https://localhost
+ download_url: https://localhost/MiniCPM-V-2
+ name: minicpm-v-2
+ need_third_part: false
+ precisions:
+ - fp16
+ relative_path: models/multimodal/vision-language-understanding/minicpm-v-2/vllm/
+ task_type: multimodal/vision-language-understanding
diff --git a/tests/run_trtllm.py b/tests/run_trtllm.py
new file mode 100644
index 0000000000000000000000000000000000000000..911ac6fd80bed287336bd092c521cf6f6478b396
--- /dev/null
+++ b/tests/run_trtllm.py
@@ -0,0 +1,189 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import yaml
+import subprocess
+import json
+import re
+import time
+import logging
+import os
+import sys
+import argparse
+
+import utils
+
+# 配置日志
+debug_level = logging.DEBUG if utils.is_debug() else logging.INFO
+logging.basicConfig(
+ handlers=[logging.FileHandler("output.log"), logging.StreamHandler()],
+ level=debug_level,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+METRIC_PATTERN = r"{'metricResult':.*}"
+
+def main():
+ parser = argparse.ArgumentParser(description="")
+ parser.add_argument("--model", type=str, help="model name, e.g: alexnet")
+ args = parser.parse_args()
+
+ if args.model:
+ test_model = args.model
+ else:
+ test_model = os.environ.get("TEST_CASE")
+ logging.info(f"Test case to run: {test_model}")
+ if not test_model:
+ logging.error("test model case is empty")
+ sys.exit(-1)
+
+ model = get_model_config(test_model)
+ if not model:
+ logging.error("mode config is empty")
+ sys.exit(-1)
+
+ result = {}
+ # NLP模型
+ if model["task_type"] in ["nlp/large_language_model"]:
+ logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+ d_url = model["download_url"]
+ if d_url is not None:
+ result = run_nlp_testcase(model)
+ check_model_result(result)
+ logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+ logging.info(f"End running {model['name']} test case.")
+
+ logging.info(f"Full text result: {result}")
+
+def get_model_config(mode_name):
+ with open("models_trtllm.yaml", "r") as file:
+ models = yaml.safe_load(file)
+
+ for model in models:
+ if model["name"] == mode_name.lower():
+ return model
+ return
+
+def check_model_result(result):
+ status = "PASS"
+ for prec in ["fp16", "int8"]:
+ if prec in result["result"]:
+ if result["result"][prec]["status"] == "FAIL":
+ status = "FAIL"
+ break
+ result["status"] = status
+
+def run_nlp_testcase(model):
+ model_name = model["name"]
+ result = {
+ "name": model_name,
+ "result": {},
+ }
+ d_url = model["download_url"]
+ checkpoint_n = d_url.split("/")[-1]
+ dataset_n = model["datasets"].split("/")[-1]
+ prepare_script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ bash ci/prepare.sh
+ """
+
+ # add pip list info when in debug mode
+ if utils.is_debug():
+ pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+ prepare_script = pip_list_script + prepare_script + pip_list_script
+
+ run_script(prepare_script)
+
+ for prec in model["precisions"]:
+ logging.info(f"Start running {model_name} {prec} test case")
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ """
+ if model_name == "llama2-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ bash scripts/test_trtllm_llama2_7b_gpu1_build.sh
+ bash scripts/test_trtllm_llama2_7b_gpu1.sh
+ """
+ elif model_name == "llama2-13b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1
+ bash scripts/test_trtllm_llama2_13b_gpu2_build.sh
+ bash scripts/test_trtllm_llama2_13b_gpu2.sh
+ """
+ elif model_name == "llama2-70b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+ bash scripts/test_trtllm_llama2_70b_gpu8_build.sh
+ bash scripts/test_trtllm_llama2_70b_gpu8.sh
+ """
+ elif model_name == "qwen-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=1
+ python3 offline_inference.py --model2path ./data/qwen-7B
+ """
+ elif model_name == "qwen1.5-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=1
+ python3 offline_inference.py --model2path ./data/Qwen1.5-7B
+ """
+
+ r, t = run_script(script)
+ sout = r.stdout
+
+ pattern = METRIC_PATTERN
+ matchs = re.findall(pattern, sout)
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ logging.debug(f"matchs:\n{matchs}")
+ for m in matchs:
+ result["result"][prec].update(get_metric_result(m))
+ if len(matchs) == 2:
+ result["result"][prec]["status"] = "PASS"
+
+ result["result"][prec]["Cost time (s)"] = t
+ return result
+
+def get_metric_result(str):
+ if str:
+ return json.loads(str.replace("'", "\""))["metricResult"]
+ return None
+
+def run_script(script):
+ start_time = time.perf_counter()
+ result = subprocess.run(
+ script, shell=True, capture_output=True, text=True, executable="/bin/bash"
+ )
+ end_time = time.perf_counter()
+ execution_time = end_time - start_time
+ logging.debug(f"执行命令:\n{script}")
+ logging.debug("执行时间: {:.4f} 秒".format(execution_time))
+ logging.debug(f"标准输出: {result.stdout}")
+ logging.debug(f"标准错误: {result.stderr}")
+ logging.debug(f"返回码: {result.returncode}")
+ return result, execution_time
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/run_vllm.py b/tests/run_vllm.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bd3504db634ec4360e502cb27aababd5edace63
--- /dev/null
+++ b/tests/run_vllm.py
@@ -0,0 +1,259 @@
+# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd.
+# All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import yaml
+import subprocess
+import json
+import re
+import time
+import logging
+import os
+import sys
+import argparse
+
+import utils
+
+# 配置日志
+debug_level = logging.DEBUG if utils.is_debug() else logging.INFO
+logging.basicConfig(
+ handlers=[logging.FileHandler("output.log"), logging.StreamHandler()],
+ level=debug_level,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+METRIC_PATTERN = r"{'metricResult':.*}"
+
+def main():
+ parser = argparse.ArgumentParser(description="")
+ parser.add_argument("--model", type=str, help="model name, e.g: alexnet")
+ args = parser.parse_args()
+
+ if args.model:
+ test_model = args.model
+ else:
+ test_model = os.environ.get("TEST_CASE")
+ logging.info(f"Test case to run: {test_model}")
+ if not test_model:
+ logging.error("test model case is empty")
+ sys.exit(-1)
+
+ model = get_model_config(test_model)
+ if not model:
+ logging.error("mode config is empty")
+ sys.exit(-1)
+
+ result = {}
+ # NLP模型
+ if model["task_type"] in ["nlp/large_language_model", "multimodal/vision-language-understanding"]:
+ logging.info(f"Start running {model['name']} test case:\n{json.dumps(model, indent=4)}")
+ d_url = model["download_url"]
+ if d_url is not None:
+ result = run_nlp_testcase(model)
+ check_model_result(result)
+ logging.debug(f"The result of {model['name']} is\n{json.dumps(result, indent=4)}")
+ logging.info(f"End running {model['name']} test case.")
+
+ logging.info(f"Full text result: {result}")
+
+def get_model_config(mode_name):
+ with open("models_vllm.yaml", "r") as file:
+ models = yaml.safe_load(file)
+
+ for model in models:
+ if model["name"] == mode_name.lower():
+ return model
+ return
+
+def check_model_result(result):
+ status = "PASS"
+ for prec in ["fp16", "int8"]:
+ if prec in result["result"]:
+ if result["result"][prec]["status"] == "FAIL":
+ status = "FAIL"
+ break
+ result["status"] = status
+
+def run_nlp_testcase(model):
+ model_name = model["name"]
+ result = {
+ "name": model_name,
+ "result": {},
+ }
+ d_url = model["download_url"]
+ checkpoint_n = d_url.split("/")[-1]
+ dataset_n = model["datasets"].split("/")[-1]
+ prepare_script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ ln -s /mnt/deepspark/data/checkpoints/{checkpoint_n} ./{model_name}
+ bash ci/prepare.sh
+ """
+
+ # add pip list info when in debug mode
+ if utils.is_debug():
+ pip_list_script = "pip list | grep -E 'numpy|transformer|igie|mmcv|onnx'\n"
+ prepare_script = pip_list_script + prepare_script + pip_list_script
+
+ run_script(prepare_script)
+
+ for prec in model["precisions"]:
+ logging.info(f"Start running {model_name} {prec} test case")
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ """
+ if model_name == "baichuan2-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./baichuan2-7b/ --max-tokens 256 --trust-remote-code --chat_template template_baichuan.jinja --temperature 0.0
+ """
+ if prec == "int8":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./baichuan2-7b/int8/ --chat_template template_baichuan.jinja --quantization w8a16 --max-num-seqs 1 --max-model-len 256 --trust-remote-code --temperature 0.0 --max-tokens 256
+ """
+ elif model_name == "chatglm3-6b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./chatglm3-6b --trust-remote-code --temperature 0.0 --max-tokens 256
+ """
+ elif model_name == "chatglm3-6b-32k":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./chatglm3-6b-32k --trust-remote-code --temperature 0.0 --max-tokens 256
+ """
+ elif model_name == "llama2-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./llama2-7b --max-tokens 256 -tp 1 --temperature 0.0
+ """
+ elif model_name == "llama3-70b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
+ python3 offline_inference.py --model ./llama3-70b --max-tokens 256 -tp 4 --temperature 0.0
+ """
+ elif model_name == "qwen-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1
+ python3 offline_inference.py --model ./qwen-7b --max-tokens 256 -tp 2 --trust-remote-code --temperature 0.0
+ """
+ elif model_name == "qwen1.5-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./qwen1.5-7b --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096
+ """
+ elif model_name == "qwen1.5-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./qwen1.5-7b --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096
+ """
+ elif model_name == "qwen1.5-14b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ python3 offline_inference.py --model ./qwen1.5-14b --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 896
+ """
+ elif model_name == "qwen1.5-32b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
+ python3 offline_inference.py --model ./qwen1.5-32b --max-tokens 256 -tp 4 --temperature 0.0
+ """
+ elif model_name == "qwen1.5-72b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1
+ python3 offline_inference.py --model ./qwen1.5-72b --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 3096
+ """
+ elif model_name == "qwen2-7b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0
+ python3 offline_inference.py --model ./qwen2-7b --max-tokens 256 -tp 1 --temperature 0.0
+ """
+ elif model_name == "qwen2-72b":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
+ python3 offline_inference.py --model ./qwen2-72b --max-tokens 256 -tp 4 --temperature 0.0 --gpu-memory-utilization 0.98 --max-model-len 32768
+ """
+ elif model_name == "stablelm":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export CUDA_VISIBLE_DEVICES=0,1
+ python3 offline_inference.py --model ./stablelm --max-tokens 256 -tp 1 --temperature 0.0
+ """
+ elif model_name == "minicpm-v-2":
+ script = f"""
+ set -x
+ cd ../{model['relative_path']}
+ export PT_SDPA_ENABLE_HEAD_DIM_PADDING=1
+ export PATH=/usr/local/corex/bin:${PATH}
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
+ python3 minicpmv-2.0-offline.py --model-path ./minicpm-v-2 --image-path ./dog.jpg
+ """
+
+ r, t = run_script(script)
+ sout = r.stdout
+
+ pattern = METRIC_PATTERN
+ matchs = re.findall(pattern, sout)
+ result["result"].setdefault(prec, {"status": "FAIL"})
+ logging.debug(f"matchs:\n{matchs}")
+ for m in matchs:
+ result["result"][prec].update(get_metric_result(m))
+ if len(matchs) == 1:
+ result["result"][prec]["status"] = "PASS"
+
+ result["result"][prec]["Cost time (s)"] = t
+ return result
+
+def get_metric_result(str):
+ if str:
+ return json.loads(str.replace("'", "\""))["metricResult"]
+ return None
+
+def run_script(script):
+ start_time = time.perf_counter()
+ result = subprocess.run(
+ script, shell=True, capture_output=True, text=True, executable="/bin/bash"
+ )
+ end_time = time.perf_counter()
+ execution_time = end_time - start_time
+ logging.debug(f"执行命令:\n{script}")
+ logging.debug("执行时间: {:.4f} 秒".format(execution_time))
+ logging.debug(f"标准输出: {result.stdout}")
+ logging.debug(f"标准错误: {result.stderr}")
+ logging.debug(f"返回码: {result.returncode}")
+ return result, execution_time
+
+if __name__ == "__main__":
+ main()