From 3ededf8dd58868532dc4bb89b6ea3a25c48b9d0e Mon Sep 17 00:00:00 2001 From: lijiaming <1228575330@qq.com> Date: Wed, 16 Apr 2025 16:54:22 +0800 Subject: [PATCH] adjust start commond for int4 deploy --- ...-V3&R1\351\203\250\347\275\262\346\214\207\345\215\227.md" | 4 ++-- .../workspace/roles/prepare/files/lib/start_ds.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git "a/doc/deepseek/DeepSeek-V3&R1\351\203\250\347\275\262\346\214\207\345\215\227.md" "b/doc/deepseek/DeepSeek-V3&R1\351\203\250\347\275\262\346\214\207\345\215\227.md" index 1726edc..eddb984 100644 --- "a/doc/deepseek/DeepSeek-V3&R1\351\203\250\347\275\262\346\214\207\345\215\227.md" +++ "b/doc/deepseek/DeepSeek-V3&R1\351\203\250\347\275\262\346\214\207\345\215\227.md" @@ -135,8 +135,8 @@ all: vars: # 容器镜像 # 如果本地docker上已加载镜像,改成docker image的image_name和image_tag -image_name: hub.oepkgs.net/oedeploy/openeuler/aarch64/deepseek_hyperinfer - image_tag: openeuler22.03-py3.11 + image_name: hub.oepkgs.net/oedeploy/openeuler/aarch64/mindspore + image_tag: 20250411 # 单机部署时,建议使用tag为20250326的容器镜像 # 将要拉起的推理容器的名称 container_name: openeuler_ds # 启动之后的docker name,不能和已有镜像重名 # 模型路径 diff --git a/script/mindspore-deepseek/workspace/roles/prepare/files/lib/start_ds.sh b/script/mindspore-deepseek/workspace/roles/prepare/files/lib/start_ds.sh index 49a7bd2..89209c4 100644 --- a/script/mindspore-deepseek/workspace/roles/prepare/files/lib/start_ds.sh +++ b/script/mindspore-deepseek/workspace/roles/prepare/files/lib/start_ds.sh @@ -39,7 +39,7 @@ rm -rf ds.log if [ $NODE_NUM -ne 1 ]; then nohup python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model "$MODEL_PATH" --port=$LLM_PORT --trust_remote_code --tensor_parallel_size=$PARALLEL --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=128 --gpu-memory-utilization=0.93 --num-scheduler-steps=8 --disable-async-output-proc --distributed-executor-backend=ray &> ds.log & else - nohup python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model "$MODEL_PATH" --port=$LLM_PORT --trust_remote_code --tensor_parallel_size=8 --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=128 --gpu-memory-utilization=0.93 --num-scheduler-steps=8 --disable-async-output-proc &> ds.log & + nohup python3 -m vllm_mindspore.entrypoints vllm.entrypoints.openai.api_server --model "$MODEL_PATH" --port=$LLM_PORT --trust_remote_code --tensor_parallel_size=8 --max-num-seqs=192 --max_model_len=32768 --max-num-batched-tokens=16384 --block-size=32 --gpu-memory-utilization=0.93 --num-scheduler-steps=8 --disable-async-output-proc &> ds.log & fi #检测推理服务是否拉起 -- Gitee