diff --git a/AI/image-list.yml b/AI/image-list.yml index 78769a81a63dd22e235abff6ecb22d91d58343a8..ba5e7e6131957efc08be54e29d1fd8e8e8a849cd 100644 --- a/AI/image-list.yml +++ b/AI/image-list.yml @@ -29,6 +29,7 @@ images: llm-faqgen-tgi: opea/llm-faqgen-tgi llm-tgi: opea/llm-tgi llm-vllm: opea/llm-vllm + llm-textgen: opea/llm-textgen reranking-tei: opea/reranking-tei retriever-redis: opea/retriever-redis searchqna: opea/searchqna diff --git a/AI/opea/codegen-ui/1.2/24.03-lts/Dockerfile b/AI/opea/codegen-ui/1.2/24.03-lts/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5eb1a5ea78fcdb829cab6cb6bd83c48b999f0e98 --- /dev/null +++ b/AI/opea/codegen-ui/1.2/24.03-lts/Dockerfile @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Use node 20.11.1 as the base image +FROM openeuler/openeuler:24.03-lts + +# Update package manager and install Git +RUN yum update -y && \ + yum install -y \ + npm \ + git + +WORKDIR /home/user/ + +# Copy the front-end code repository +RUN git clone -b v1.2 https://github.com/opea-project/GenAIExamples.git + +# Set the working directory +WORKDIR /home/user/GenAIExamples/CodeGen/ui/svelte + +# Install front-end dependencies +RUN npm install + +# Build the front-end application +RUN npm run build + +# Expose the port of the front-end application +EXPOSE 5173 + +# Run the front-end application in preview mode +CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"] \ No newline at end of file diff --git a/AI/opea/codegen-ui/meta.yml b/AI/opea/codegen-ui/meta.yml index 4cae4dd4a3ff05f2037aaa54695b1cc970e797f2..0e6dfdc00ec532336b9942f33ee6565fe22109d0 100644 --- a/AI/opea/codegen-ui/meta.yml +++ b/AI/opea/codegen-ui/meta.yml @@ -1,3 +1,7 @@ 1.0-oe2403lts: path: 1.0/24.03-lts/Dockerfile + arch: x86_64 + +1.2-oe2403lts: + path: 1.2/24.03-lts/Dockerfile arch: x86_64 \ No newline at end of file diff --git a/AI/opea/codegen/1.2/24.03-lts/Dockerfile b/AI/opea/codegen/1.2/24.03-lts/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..f93be2b75459af0e3184975d9ea5ee7f74bbb27f --- /dev/null +++ b/AI/opea/codegen/1.2/24.03-lts/Dockerfile @@ -0,0 +1,36 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Stage 1: base setup used by other stages +FROM openeuler/openeuler:24.03-lts AS base + +# get security updates +RUN yum update -y && \ + yum install -y \ + git \ + python python-pip + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user + +WORKDIR /home/user/ + +RUN git clone -b v1.2 https://github.com/opea-project/GenAIComps.git + +WORKDIR /home/user/GenAIComps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt + +WORKDIR /home/user/ + +RUN git clone -b v1.2 https://github.com/opea-project/GenAIExamples.git && \ + cp GenAIExamples/CodeGen/codegen.py . && \ + rm -rf GenAIExamples + +ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps + +USER user + +ENTRYPOINT ["python", "codegen.py"] diff --git a/AI/opea/codegen/meta.yml b/AI/opea/codegen/meta.yml index 4cae4dd4a3ff05f2037aaa54695b1cc970e797f2..0e6dfdc00ec532336b9942f33ee6565fe22109d0 100644 --- a/AI/opea/codegen/meta.yml +++ b/AI/opea/codegen/meta.yml @@ -1,3 +1,7 @@ 1.0-oe2403lts: path: 1.0/24.03-lts/Dockerfile + arch: x86_64 + +1.2-oe2403lts: + path: 1.2/24.03-lts/Dockerfile arch: x86_64 \ No newline at end of file diff --git a/AI/opea/llm-textgen/1.2/24.03-lts/Dockerfile b/AI/opea/llm-textgen/1.2/24.03-lts/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a68580f26119aaf38f33a9e0978e4c7b30ad3cf9 --- /dev/null +++ b/AI/opea/llm-textgen/1.2/24.03-lts/Dockerfile @@ -0,0 +1,31 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM openeuler/openeuler:24.03-lts + +RUN yum update -y && \ + yum install -y \ + python python-pip \ + mesa-libGL \ + jemalloc-devel \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +USER user +WORKDIR /home/user/ + +RUN git clone -b v1.2 https://github.com/opea-project/GenAIComps.git && \ + cp -r GenAIComps/comps /home/user/comps && \ + rm -rf GenAIComps + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r /home/user/comps/llms/src/text-generation/requirements.txt + +ENV PYTHONPATH=$PYTHONPATH:/home/user + +WORKDIR /home/user/comps/llms/src/text-generation + +ENTRYPOINT ["bash", "entrypoint.sh"] \ No newline at end of file diff --git a/AI/opea/llm-textgen/meta.yml b/AI/opea/llm-textgen/meta.yml new file mode 100644 index 0000000000000000000000000000000000000000..ee4b49e9afa63ff59b2371473ff499d093b30722 --- /dev/null +++ b/AI/opea/llm-textgen/meta.yml @@ -0,0 +1,3 @@ +1.2-oe2403lts: + path: 1.2/24.03-lts/Dockerfile + arch: x86_64 \ No newline at end of file diff --git a/AI/vllm/0.8.3/24.03-lts/Dockerfile b/AI/vllm/0.8.3/24.03-lts/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8ed829b031c478012ca84441b919b0b2ae73ecb6 --- /dev/null +++ b/AI/vllm/0.8.3/24.03-lts/Dockerfile @@ -0,0 +1,19 @@ +# This vLLM Dockerfile is used to construct an image that can build and run vLLM on ARM CPU platform. + +FROM openeuler/openeuler:24.03-lts + +RUN yum update -y && \ + yum install -y make gcc gcc-c++ python python-pip python3-devel git vim wget net-tools numactl-devel && \ + rm -rf /var/cache/yum + +WORKDIR /workspace + +RUN git clone -b v0.8.3 https://github.com/vllm-project/vllm.git + +WORKDIR /workspace/vllm + +RUN pip install "cmake>=3.26" wheel packaging ninja "setuptools-scm>=8" numpy + +RUN VLLM_TARGET_DEVICE="cpu" pip install -e . --extra-index-url https://download.pytorch.org/whl/cpu + +ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"] diff --git a/AI/vllm/README.md b/AI/vllm/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd7623aecd2dba1a98aa56f8ac3982191025957e --- /dev/null +++ b/AI/vllm/README.md @@ -0,0 +1,86 @@ +# Quick reference + +- The offical vLLM Ascend docker images + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative) + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community) + +# vLLM | openEuler + +Current vLLM docker images are built on the [openEuler](https://repo.openeuler.org/)⁠. This repository is free to use and exempted from per-user rate limits. + +vLLM is a fast and easy-to-use library for LLM inference and serving. + +Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu/) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry. + +vLLM is fast with: + +- State-of-the-art serving throughputV +- Efficient management of attention key and value memory with [PagedAttention](https://blog.vllm.ai/2023/06/20/vllm.html) +- Continuous batching of incoming requests +- Fast model execution with CUDA/HIP graph +- Quantizations: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8. +- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer. +- Speculative decoding +- Chunked prefill + +Read more about vLLM at [vLLM paper](https://arxiv.org/abs/2309.06180) (SOSP 2023) and explore the vLLM technical documentation at [docs.vllm.ai](https://docs.vllm.ai/) + +# Supported tags and respective Dockerfile links + +The tag of each vLLM docker image is consist of the version of vLLM and the version of basic image. The details are as follows + +| Tags | Currently | Architectures| +|--|--|--| +|[0.6.3-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/vllm/0.6.3/24.03-lts/Dockerfile)| vLLM 0.6.3 on openEuler 24.03-LTS | amd64 | +|[0.8.3-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/vllm/0.8.3/24.03-lts/Dockerfile)| vLLM 0.8.3 on openEuler 24.03-LTS | amd64, arm64 | +|[0.8.5-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/vllm/0.8.5/24.03-lts/Dockerfile)| vLLM 0.8.5 on openEuler 24.03-LTS | amd64 | + +# Usage + +## Quick start 1: supported devices + +- Intel/AMD x86 +- ARM AArch64 + +## Quick start 2: setup environment using container + +```bash +# Update the vllm image +docker run --rm --name vllm -p 8000:8000 -it --entrypoint bash openeuler/vllm-cpu:latest +``` +## Quick start 3: offline inference + +You can use Modelscope mirror to speed up download: + +```bash +export VLLM_USE_MODELSCOPE=true +``` + +With vLLM installed, you can start generating texts for list of input prompts (i.e. offline batch inferencing). + +Try to run below Python script directly or use `python3` shell to generate texts: + +```python +from vllm import LLM, SamplingParams + +prompts = [ + "Hello, my name is", + "The future of AI is", +] +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) +# The first run will take about 3-5 mins (10 MB/s) to download models +llm = LLM(model="Qwen/Qwen3-8B") + +outputs = llm.generate(prompts, sampling_params) + +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") +``` + +# Question and answering + +If you have any questions or want to use some special features, please submit an issue or a pull request on [openeuler-docker-images](https://gitee.com/openeuler/openeuler-docker-images)⁠.