From 657cef8557d1080d5cb55d737a7de7e0c777ec37 Mon Sep 17 00:00:00 2001 From: icew1998 Date: Wed, 5 Jun 2024 09:36:15 +0000 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=94=AF=E6=8C=81AI=E5=A4=A7?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E5=BC=80=E7=AE=B1=E5=8D=B3=E7=94=A8=E7=9A=84?= =?UTF-8?q?=E5=AE=B9=E5=99=A8=E5=8C=96=E6=9C=8D=E5=8A=A1=E9=95=9C=E5=83=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: icew1998 --- llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile | 23 ++++++++++++ llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile | 20 ++++++++++ llm-server/README.md | 37 +++++++++++++++++++ llm-server/meta.yml | 4 ++ 4 files changed, 84 insertions(+) create mode 100644 llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile create mode 100644 llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile create mode 100644 llm-server/README.md create mode 100644 llm-server/meta.yml diff --git a/llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile b/llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile new file mode 100644 index 00000000..41173fe3 --- /dev/null +++ b/llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile @@ -0,0 +1,23 @@ +FROM openeuler/openeuler:22.03-lts-sp3 + +RUN sed -i 's|http://repo.openeuler.org/|https://mirrors.huaweicloud.com/openeuler/|g' /etc/yum.repos.d/openEuler.repo &&\ + yum update -y &&\ + yum install -y python3 python3-pip shadow-utils cmake gcc g++ git make &&\ + yum clean all +RUN git clone https://github.com/OpenMathLib/OpenBLAS.git && \ + cd OpenBLAS && \ + make && \ + make install && \ + cd .. && \ + rm -rf OpenBLAS +RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple --no-cache-dir guidance llama-cpp-python[server] + +ENV KEY=sk-123456 +ENV MODEL=/models/model.gguf +ENV MODEL_NAME=qwen-1.5 +ENV THREADS=8 +ENV CONTEXT=8192 + +EXPOSE 8000 + +ENTRYPOINT python3 -m llama_cpp.server --host 0.0.0.0 --port 8000 --api_key $KEY --model $MODEL --model_alias $MODEL_NAME --n_threads $THREADS --n_ctx $CONTEXT \ No newline at end of file diff --git a/llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile b/llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile new file mode 100644 index 00000000..9a5485de --- /dev/null +++ b/llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile @@ -0,0 +1,20 @@ +FROM nvidia/cuda:12.1.1-devel-rockylinux9 + +RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' \ + -e 's|^#baseurl=http://dl.rockylinux.org/$contentdir|baseurl=https://mirror.nju.edu.cn/rocky|g' \ + -i.bak \ + /etc/yum.repos.d/rocky-extras.repo \ + /etc/yum.repos.d/rocky.repo && \ + dnf -y update && \ + dnf -y install python3 python3-devel cmake gcc g++ + +RUN CMAKE_ARGS="-DLLAMA_CUDA=on" pip3 install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple -v guidance llama-cpp-python[server] + +ENV KEY=sk-123456 +ENV MODEL=/models/model.gguf +ENV MODEL_NAME=qwen-1.5 +ENV CONTEXT=8192 + +EXPOSE 8000 + +ENTRYPOINT python3 -m llama_cpp.server --host 0.0.0.0 --port 8000 --api_key $KEY --model $MODEL --model_alias $MODEL_NAME --n_gpu_layers -1 --n_ctx $CONTEXT \ No newline at end of file diff --git a/llm-server/README.md b/llm-server/README.md new file mode 100644 index 00000000..2434435a --- /dev/null +++ b/llm-server/README.md @@ -0,0 +1,37 @@ +# 支持百川、chatglm、星火等AI大模型的容器化封装 + +已配好相关依赖,分为CPU和GPU版本,降低使用门槛,开箱即用。 + +### 启动方式 +docker-compose +``` +version: '3' +services: + model: + image: openeuler/llm-server:1.0.0.gpu-oe2203sp3 #镜像名称与Tag + restart: on-failure:5 + ports: + - 10011:8000 #监听端口号,修改“10011”以更换端口 + volumes: + - /root/models:/models + environment: + - MODEL=/models/qwen1_5-7b-chat-q4_k_m.gguf #容器内的模型文件路径 + - MODEL_NAME=qwen1.5 #模型名称 + - KEY=sk-12345678 #API Key + - CONTEXT=8192 #上下文大小 + - THREADS=8 #CPU线程数,仅CPU部署时需要 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] +``` +``` +docker-compose -f docker-compose.yaml up +``` +或者 +``` +docker run -d --gpus all --restart on-failure:5 -p 宿主机端口:容器端口(默认8000) -v 宿主机模型挂载路径:/models -e MODEL=/models/模型名(gguf格式) -e MODEL_NAME=baichuan-7b(自定义模型名称) -e KEY=sk-12345678(api key) image_name:tag +``` \ No newline at end of file diff --git a/llm-server/meta.yml b/llm-server/meta.yml new file mode 100644 index 00000000..28dd2abf --- /dev/null +++ b/llm-server/meta.yml @@ -0,0 +1,4 @@ +1.0.0.gpu-oe2203sp3: + llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile +1.0.0.cpu-oe2203sp3: + llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile \ No newline at end of file -- Gitee