diff --git a/llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile b/llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..119ba3736390156fe3aad1199ce56979eea3f39d --- /dev/null +++ b/llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile @@ -0,0 +1,23 @@ +FROM openeuler/openeuler:22.03-lts-sp3 + +RUN sed -i 's|http://repo.openeuler.org/|https://mirrors.huaweicloud.com/openeuler/|g' /etc/yum.repos.d/openEuler.repo &&\ + yum update -y &&\ + yum install -y python3 python3-pip shadow-utils cmake gcc g++ git make &&\ + yum clean all +RUN git clone https://github.com/OpenMathLib/OpenBLAS.git && \ + cd OpenBLAS && \ + make && \ + make install && \ + cd .. && \ + rm -rf OpenBLAS +RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple --no-cache-dir guidance llama-cpp-python[server] + +ENV KEY=sk-123456 +ENV MODEL=/models/model.gguf +ENV MODEL_NAME=qwen-1.5 +ENV THREADS=8 +ENV CONTEXT=8192 + +EXPOSE 8000 + +ENTRYPOINT python3 -m llama_cpp.server --host 0.0.0.0 --port 8000 --api_key $KEY --model $MODEL --model_alias $MODEL_NAME --n_threads $THREADS --n_ctx $CONTEXT \ No newline at end of file diff --git a/llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile b/llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3b95b54d398563e0b75826373ffbca52239c562f --- /dev/null +++ b/llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile @@ -0,0 +1,20 @@ +FROM nvidia/cuda:12.1.1-devel-rockylinux9 + +RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' \ + -e 's|^#baseurl=http://dl.rockylinux.org/$contentdir|baseurl=https://mirror.nju.edu.cn/rocky|g' \ + -i.bak \ + /etc/yum.repos.d/rocky-extras.repo \ + /etc/yum.repos.d/rocky.repo && \ + dnf -y update && \ + dnf -y install python3 python3-devel cmake gcc g++ + +RUN CMAKE_ARGS="-DLLAMA_CUDA=on" pip3 install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple -v guidance llama-cpp-python[server] + +ENV KEY=sk-123456 +ENV MODEL=/models/model.gguf +ENV MODEL_NAME=qwen-1.5 +ENV CONTEXT=8192 + +EXPOSE 8000 + +ENTRYPOINT python3 -m llama_cpp.server --host 0.0.0.0 --port 8000 --api_key $KEY --model $MODEL --model_alias $MODEL_NAME --n_gpu_layers -1 --n_ctx $CONTEXT \ No newline at end of file diff --git a/llm-server/README.md b/llm-server/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4e53b38db071751005658fb9a8248e5eb3edb2ab --- /dev/null +++ b/llm-server/README.md @@ -0,0 +1,37 @@ +# 支持百川、chatglm、星火等AI大模型的容器化封装 + +已配好相关依赖,分为CPU和GPU版本,降低使用门槛,开箱即用。 + +### 启动方式 +docker-compose +``` +version: '3' +services: + model: + image: openeuler/llm-server:1.0.0.gpu-oe2203sp3 #镜像名称与Tag + restart: on-failure:5 + ports: + - 10011:8000 #监听端口号,修改“10011”以更换端口 + volumes: + - /root/models:/models + environment: + - MODEL=/models/qwen1_5-7b-chat-q4_k_m.gguf #容器内的模型文件路径 + - MODEL_NAME=qwen1.5 #模型名称 + - KEY=sk-12345678 #API Key + - CONTEXT=8192 #上下文大小 + - THREADS=8 #CPU线程数,仅CPU部署时需要 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] +``` +``` +docker-compose -f docker-compose.yaml up +``` +或者 +``` +docker run -d --gpus all --restart on-failure:5 -p 宿主机端口:容器端口(默认8000) -v 宿主机模型挂载路径:/models -e MODEL=/models/模型名(gguf格式) -e MODEL_NAME=baichuan-7b(自定义模型名称) -e KEY=sk-12345678(api key) image_name:tag +``` \ No newline at end of file diff --git a/llm-server/meta.yml b/llm-server/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..a7c6d880c1b4e3dbeda651652fa523ef649dd352 --- /dev/null +++ b/llm-server/meta.yml @@ -0,0 +1,4 @@ +1.0.0.gpu-oe2203sp3: + llm-server/1.0.0.gpu/22.03-lts-sp3/Dockerfile +1.0.0.cpu-oe2203sp3: + llm-server/1.0.0.cpu/22.03-lts-sp3/Dockerfile \ No newline at end of file