From ab276af9075dc823bf8c3c3cdf4ebc47457b5261 Mon Sep 17 00:00:00 2001 From: "mingjiang.li" Date: Tue, 18 Feb 2025 17:32:51 +0800 Subject: [PATCH] add DeepSeek R1 Distill models to model list Signed-off-by: mingjiang.li --- README.md | 40 +++++++++++-------- .../vllm/README.md | 5 ++- .../vllm/ci/prepare.sh | 2 +- .../vllm/offline_inference.py | 15 +++++++ .../vllm/README.md | 11 +++-- .../vllm/ci/prepare.sh | 2 +- .../vllm/offline_inference.py | 15 +++++++ .../vllm/README.md | 9 +++-- .../vllm/ci/prepare.sh | 2 +- .../vllm/offline_inference.py | 15 +++++++ .../vllm/README.md | 11 +++-- .../vllm/ci/prepare.sh | 2 +- .../vllm/offline_inference.py | 15 +++++++ .../vllm/README.md | 11 +++-- .../vllm/ci/prepare.sh | 2 +- .../vllm/offline_inference.py | 15 +++++++ .../vllm/README.md | 11 +++-- .../vllm/ci/prepare.sh | 2 +- .../vllm/offline_inference.py | 15 +++++++ 19 files changed, 157 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index d15f9dc9..d8156156 100644 --- a/README.md +++ b/README.md @@ -183,23 +183,29 @@ DeepSparkInference将按季度进行版本更新,后续会逐步丰富模型 ## LLM (Large Language Model) -| Model | vLLM | TRT-LLM | TGI | -|--------------------|---------------------------------------------------------------------|------------------------------------------------------------------|-------------------------------------------------------------------------------------| -| Baichuan2-7B | [✅](models/nlp/large_language_model/baichuan2-7b/vllm/README.md) | | | -| ChatGLM-3-6B | [✅](models/nlp/large_language_model/chatglm3-6b/vllm/README.md) | | | -| ChatGLM-3-6B-32K | [✅](models/nlp/large_language_model/chatglm3-6b-32k/vllm/README.md) | | | -| Llama2-7B | [✅](models/nlp/large_language_model/llama2-7b/vllm/README.md) | [✅](models/nlp/large_language_model/llama2-7b/trtllm/README.md) | | -| Llama2-13B | | [✅](models/nlp/large_language_model/llama2-13b/trtllm/README.md) | | -| Llama2-70B | | [✅](models/nlp/large_language_model/llama2-70b/trtllm/README.md) | | -| Llama3-70B | [✅](models/nlp/large_language_model/llama3-70b/vllm/README.md) | | | -| Qwen-7B | [✅](models/nlp/large_language_model/qwen-7b/vllm/README.md) | | | -| Qwen1.5-7B | [✅](models/nlp/large_language_model/qwen1.5-7b/vllm/README.md) | | [✅](models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/README.md) | -| Qwen1.5-14B | [✅](models/nlp/large_language_model/qwen1.5-14b/vllm/README.md) | | | -| Qwen1.5-32B Chat | [✅](models/nlp/large_language_model/qwen1.5-32b/vllm/README.md) | | | -| Qwen1.5-72B | [✅](models/nlp/large_language_model/qwen1.5-72b/vllm/README.md) | | | -| Qwen2-7B Instruct | [✅](models/nlp/large_language_model/qwen2-7b/vllm/README.md) | | | -| Qwen2-72B Instruct | [✅](models/nlp/large_language_model/qwen2-72b/vllm/README.md) | | | -| StableLM2-1.6B | [✅](models/nlp/large_language_model/stablelm/vllm/README.md) | | | +| Model | vLLM | TRT-LLM | TGI | +|-------------------------------|-----------------------------------------------------------------------------------|------------------------------------------------------------------|-------------------------------------------------------------------------------------| +| Baichuan2-7B | [✅](models/nlp/large_language_model/baichuan2-7b/vllm/README.md) | | | +| ChatGLM-3-6B | [✅](models/nlp/large_language_model/chatglm3-6b/vllm/README.md) | | | +| ChatGLM-3-6B-32K | [✅](models/nlp/large_language_model/chatglm3-6b-32k/vllm/README.md) | | | +| DeepSeek-R1-Distill-Llama-8B | [✅](models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/README.md) | | | +| DeepSeek-R1-Distill-Llama-70B | [✅](models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/README.md) | | | +| DeepSeek-R1-Distill-Qwen-1.5B | [✅](models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/README.md) | | | +| DeepSeek-R1-Distill-Qwen-7B | [✅](models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/README.md) | | | +| DeepSeek-R1-Distill-Qwen-14B | [✅](models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/README.md) | | | +| DeepSeek-R1-Distill-Qwen-32B | [✅](models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/README.md) | | | +| Llama2-7B | [✅](models/nlp/large_language_model/llama2-7b/vllm/README.md) | [✅](models/nlp/large_language_model/llama2-7b/trtllm/README.md) | | +| Llama2-13B | | [✅](models/nlp/large_language_model/llama2-13b/trtllm/README.md) | | +| Llama2-70B | | [✅](models/nlp/large_language_model/llama2-70b/trtllm/README.md) | | +| Llama3-70B | [✅](models/nlp/large_language_model/llama3-70b/vllm/README.md) | | | +| Qwen-7B | [✅](models/nlp/large_language_model/qwen-7b/vllm/README.md) | | | +| Qwen1.5-7B | [✅](models/nlp/large_language_model/qwen1.5-7b/vllm/README.md) | | [✅](models/nlp/large_language_model/qwen1.5-7b/text-generation-inference/README.md) | +| Qwen1.5-14B | [✅](models/nlp/large_language_model/qwen1.5-14b/vllm/README.md) | | | +| Qwen1.5-32B Chat | [✅](models/nlp/large_language_model/qwen1.5-32b/vllm/README.md) | | | +| Qwen1.5-72B | [✅](models/nlp/large_language_model/qwen1.5-72b/vllm/README.md) | | | +| Qwen2-7B Instruct | [✅](models/nlp/large_language_model/qwen2-7b/vllm/README.md) | | | +| Qwen2-72B Instruct | [✅](models/nlp/large_language_model/qwen2-72b/vllm/README.md) | | | +| StableLM2-1.6B | [✅](models/nlp/large_language_model/stablelm/vllm/README.md) | | | ## Multimodal diff --git a/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/README.md b/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/README.md index 26049a59..af8b5abb 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/README.md +++ b/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/README.md @@ -2,7 +2,9 @@ ## Description -DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. +DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by +DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, +8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. ## Setup @@ -31,6 +33,7 @@ ln -s /path/to/DeepSeek-R1-Distill-Llama-70B ./data/ ```bash python3 offline_inference.py --model ./data/DeepSeek-R1-Distill-Llama-70B --max-tokens 256 -tp 8 --temperature 0.0 --max-model-len 3096 ``` + ## Inference with serve ```bash diff --git a/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/ci/prepare.sh b/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/ci/prepare.sh index 75fb1945..0fa3df9b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/ci/prepare.sh +++ b/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/ci/prepare.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may diff --git a/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/offline_inference.py b/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/offline_inference.py index 9b7d87fd..7653847b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/deepseek-r1-distill-llama-70b/vllm/offline_inference.py @@ -1,3 +1,18 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os diff --git a/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/README.md b/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/README.md index 903a7a16..b5b9c6d0 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/README.md +++ b/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/README.md @@ -2,7 +2,9 @@ ## Description -DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. +DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by +DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, +8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. ## Setup @@ -31,6 +33,7 @@ ln -s /path/to/DeepSeek-R1-Distill-Llama-8B ./data/ ```bash python3 offline_inference.py --model ./data/DeepSeek-R1-Distill-Llama-8B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096 ``` + ## Inference with serve ```bash @@ -39,9 +42,9 @@ vllm serve data/DeepSeek-R1-Distill-Llama-8B --tensor-parallel-size 2 --max-mode ## Results -| Model | QPS | -| ---------- | ----- | -| DeepSeek-R1-Distill-Llama-8B | 105.33| +| Model | QPS | +|------------------------------|--------| +| DeepSeek-R1-Distill-Llama-8B | 105.33 | ## Reference diff --git a/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/ci/prepare.sh b/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/ci/prepare.sh index 75fb1945..0fa3df9b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/ci/prepare.sh +++ b/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/ci/prepare.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may diff --git a/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/offline_inference.py b/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/offline_inference.py index 9b7d87fd..7653847b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/deepseek-r1-distill-llama-8b/vllm/offline_inference.py @@ -1,3 +1,18 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/README.md b/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/README.md index c52fc0f5..88eb5163 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/README.md +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/README.md @@ -2,7 +2,9 @@ ## Description -DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. +DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by +DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, +8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. ## Setup @@ -31,6 +33,7 @@ ln -s /path/to/DeepSeek-R1-Distill-Qwen-1.5B ./data/ ```bash python3 offline_inference.py --model ./data/DeepSeek-R1-Distill-Qwen-1.5B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096 ``` + ## Inference with serve ```bash @@ -39,8 +42,8 @@ vllm serve data/DeepSeek-R1-Distill-Qwen-1.5B --tensor-parallel-size 2 --max-mod ## Results -| Model | QPS | -| ---------- | ----- | +| Model | QPS | +|-------------------------------|--------| | DeepSeek-R1-Distill-Qwen-1.5B | 259.42 | ## Reference diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/ci/prepare.sh b/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/ci/prepare.sh index 75fb1945..0fa3df9b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/ci/prepare.sh +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/ci/prepare.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py b/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py index 9b7d87fd..7653847b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-1.5b/vllm/offline_inference.py @@ -1,3 +1,18 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/README.md b/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/README.md index 7d20eeee..f24c6904 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/README.md +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/README.md @@ -2,7 +2,9 @@ ## Description -DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. +DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by +DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, +8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. ## Setup @@ -31,6 +33,7 @@ ln -s /path/to/DeepSeek-R1-Distill-Qwen-14B ./data/ ```bash python3 offline_inference.py --model ./data/DeepSeek-R1-Distill-Qwen-14B --max-tokens 256 -tp 2 --temperature 0.0 --max-model-len 3096 ``` + ## Inference with serve ```bash @@ -39,9 +42,9 @@ vllm serve data/DeepSeek-R1-Distill-Qwen-14B --tensor-parallel-size 2 --max-mode ## Results -| Model | QPS | -| ---------- | ----- | -| DeepSeek-R1-Distill-Qwen-14B | 88.01| +| Model | QPS | +|------------------------------|-------| +| DeepSeek-R1-Distill-Qwen-14B | 88.01 | ## Reference diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/ci/prepare.sh b/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/ci/prepare.sh index 75fb1945..0fa3df9b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/ci/prepare.sh +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/ci/prepare.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py b/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py index 9b7d87fd..7653847b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-14b/vllm/offline_inference.py @@ -1,3 +1,18 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/README.md b/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/README.md index e1c8ca81..4a2b85bc 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/README.md +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/README.md @@ -2,7 +2,9 @@ ## Description -DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. +DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by +DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, +8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. ## Setup @@ -31,6 +33,7 @@ ln -s /path/to/DeepSeek-R1-Distill-Qwen-32B ./data/ ```bash python3 offline_inference.py --model ./data/DeepSeek-R1-Distill-Qwen-32B --max-tokens 256 -tp 4 --temperature 0.0 --max-model-len 3096 ``` + ## Inference with serve ```bash @@ -39,9 +42,9 @@ vllm serve data/DeepSeek-R1-Distill-Qwen-32B --tensor-parallel-size 4 --max-mode ## Results -| Model | QPS | -| ---------- | ----- | -| DeepSeek-R1-Distill-Qwen-32B | 68.30| +| Model | QPS | +|------------------------------|-------| +| DeepSeek-R1-Distill-Qwen-32B | 68.30 | ## Reference diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/ci/prepare.sh b/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/ci/prepare.sh index 75fb1945..0fa3df9b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/ci/prepare.sh +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/ci/prepare.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py b/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py index 9b7d87fd..7653847b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-32b/vllm/offline_inference.py @@ -1,3 +1,18 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/README.md b/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/README.md index 8d72e0c7..fba0db66 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/README.md +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/README.md @@ -2,7 +2,9 @@ ## Description -DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, 8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. +DeepSeek-R1-Distill models are fine-tuned based on open-source models, using samples generated by +DeepSeek-R1. We slightly change their configs and tokenizers. We open-source distilled 1.5B, 7B, +8B, 14B, 32B, and 70B checkpoints based on Qwen2.5 and Llama3 series to the community. ## Setup @@ -31,6 +33,7 @@ ln -s /path/to/DeepSeek-R1-Distill-Qwen-7B ./data/ ```bash python3 offline_inference.py --model ./data/DeepSeek-R1-Distill-Qwen-7B --max-tokens 256 -tp 1 --temperature 0.0 --max-model-len 3096 ``` + ## Inference with serve ```bash @@ -39,9 +42,9 @@ vllm serve data/DeepSeek-R1-Distill-Qwen-7B --tensor-parallel-size 2 --max-model ## Results -| Model | QPS | -| ---------- | ----- | -| DeepSeek-R1-Distill-Qwen-7B | 90.48| +| Model | QPS | +|-----------------------------|-------| +| DeepSeek-R1-Distill-Qwen-7B | 90.48 | ## Reference diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/ci/prepare.sh b/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/ci/prepare.sh index 75fb1945..0fa3df9b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/ci/prepare.sh +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/ci/prepare.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2024, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may diff --git a/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py b/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py index 9b7d87fd..7653847b 100644 --- a/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py +++ b/models/nlp/large_language_model/deepseek-r1-distill-qwen-7b/vllm/offline_inference.py @@ -1,3 +1,18 @@ +# Copyright (c) 2025, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys from pathlib import Path import os -- Gitee