From 2055fb5ee037d4d084db4098161ac3784c3e6899 Mon Sep 17 00:00:00 2001 From: zhihang Date: Thu, 7 Aug 2025 02:38:05 +0000 Subject: [PATCH] update README.md for KServe Signed-off-by: zhihang --- AI/kserve/controller/README.md | 179 ++++++++++++++++++++++ AI/kserve/controller/doc/quick_install.sh | 179 ++++++++++++++++++++++ 2 files changed, 358 insertions(+) create mode 100644 AI/kserve/controller/README.md create mode 100644 AI/kserve/controller/doc/quick_install.sh diff --git a/AI/kserve/controller/README.md b/AI/kserve/controller/README.md new file mode 100644 index 00000000..342f236d --- /dev/null +++ b/AI/kserve/controller/README.md @@ -0,0 +1,179 @@ +# Quick reference + +- The official PyTorch docker image. + +- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative). + +- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community). + +# KServe | openEuler +Current KServe docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits. + +KServe provides a Kubernetes [Custom Resource Definition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) for serving predictive and generative machine learning (ML) models. +It aims to solve production model serving use cases by providing high abstraction interfaces for Tensorflow, XGBoost, ScikitLearn, PyTorch, Huggingface Transformer/LLM models using standardized data plane protocols. + +It encapsulates the complexity of autoscaling, networking, health checking, and server configuration to bring cutting edge serving features like GPU Autoscaling, Scale to Zero, and Canary Rollouts to your ML deployments. +It enables a simple, pluggable, and complete story for Production ML Serving including prediction, pre-processing, post-processing and explainability. +KServe is being [used across various organizations](https://kserve.github.io/website/master/community/adopters/). + +For more details, visit the [KServe website](https://kserve.github.io/website/). + +# Supported tags and respective Dockerfile links +The tag of each `KServe` docker image is consist of the complete software stack version. The details are as follows +| Tag | Currently | Architectures | +|----------|-------------|------------------| +|[0.15.2-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/kserve/controller/0.15.2/24.03-lts/Dockerfile)| KServe controller 0.15.2 on openEuler 24.03-LTS | amd64 | + +# Usage + +## Before you begin + +> KServe Quickstart Environments are for experimentation use only. For production installation, see our [Administrator's Guide](https://kserve.github.io/website/latest/admin/serverless/serverless/). + +Before you can get started with a KServe Quickstart deployment you must install kind and the Kubernetes CLI. + +### Install Kind (Kubernetes in Docker) + +You can use [kind](https://kind.sigs.k8s.io/docs/user/quick-start) (Kubernetes in Docker) to run a local Kubernetes cluster with Docker container nodes. + +### Install the Kubernetes CLI + +The [Kubernetes CLI (kubectl)](https://kubernetes.io/docs/tasks/tools/install-kubectl), allows you to run commands against Kubernetes clusters. You can use kubectl to deploy applications, inspect and manage cluster resources, and view logs. + +### Install Helm + +The [Helm](https://helm.sh/docs/intro/install/) package manager for Kubernetes helps you define, install and upgrade software built for Kubernetes. + +## Install the KServe environment + +After having kind installed, create a kind cluster with: +```shell +kind create cluster +``` + +Then run: +```shell +kubectl config get-contexts +``` + +It should list out a list of contexts you have, one of them should be kind-kind. Then run: +```shell +kubectl config use-context kind-kind +``` +to use this context. + +You can then get started with a local deployment of KServe by using KServe Quick installation script on Kind: +```shell +curl -s "https://gitee.com/openeuler-docker-images/raw//master/AI/kserve/controller/doc/quick_install.sh" | bash +``` + +## Deploy the Llama3 model for text_generation task with Hugging Face LLM Serving Runtime + +In this example, We demonstrate how to deploy Llama3 model for text generation task from Hugging Face by deploying the InferenceService with [Hugging Face Serving runtime](https://github.com/kserve/kserve/tree/master/python/huggingfaceserver). + +## Serve the Hugging Face LLM model using vLLM backend + +KServe Hugging Face runtime by default uses vLLM to serve the LLM models for faster time-to-first-token(TTFT) and higher token generation throughput than the Hugging Face API. +vLLM is implemented with common inference optimization techniques, such as paged attention, continuous batching and an optimized CUDA kernel. +If the model is not supported by vLLM, KServe falls back to HuggingFace backend as a failsafe. + +> The Llama3 model requires huggingface hub token to download the model. +You can set the token using HF_TOKEN environment variable. + +Create a secret with the Hugging Face token. +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: hf-secret +type: Opaque +stringData: + HF_TOKEN: +``` + +Then create the inference service. +```yaml +kubectl apply -f - </dev/null; then + echo "😱 Helm command not found. Please install Helm." + exit 1 +fi + +deploymentMode="Serverless" +installKeda=false +while getopts ":hsrudk" option; do + case $option in + h) # display Help + Help + exit + ;; + r) # skip knative install + deploymentMode="RawDeployment" ;; + s) # install knative + deploymentMode="Serverless" ;; + u) # uninstall + uninstall + exit + ;; + d) # install only dependencies + installKserve=false ;; + k) # install KEDA + installKeda=true ;; + \?) # Invalid option + echo "Error: Invalid option" + exit + ;; + esac +done + +get_kube_version() { + kubectl version --short=true 2>/dev/null || kubectl version | awk -F '.' '/Server Version/ {print $2}' +} + +if [ "$(get_kube_version)" -lt 24 ]; then + echo "😱 install requires at least Kubernetes 1.24" + exit 1 +fi + +echo "Installing Gateway API CRDs ..." +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/standard-install.yaml + +helm repo add istio https://istio-release.storage.googleapis.com/charts --force-update +helm install istio-base istio/base -n istio-system --wait --set defaultRevision=default --create-namespace --version ${ISTIO_VERSION} +helm install istiod istio/istiod -n istio-system --wait --version ${ISTIO_VERSION} \ + --set proxy.autoInject=disabled \ + --set-string pilot.podAnnotations."cluster-autoscaler\.kubernetes\.io/safe-to-evict"=true +helm install istio-ingressgateway istio/gateway -n istio-system --version ${ISTIO_VERSION} \ + --set-string podAnnotations."cluster-autoscaler\.kubernetes\.io/safe-to-evict"=true + +# Wait for the istio ingressgateway pod to be created +sleep 10 +# Wait for istio ingressgateway to be ready +kubectl wait --for=condition=Ready pod -l app=istio-ingressgateway -n istio-system --timeout=600s +echo "😀 Successfully installed Istio" + +# Install Cert Manager +helm repo add jetstack https://charts.jetstack.io --force-update +helm install \ + cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version ${CERT_MANAGER_VERSION} \ + --set crds.enabled=true +echo "😀 Successfully installed Cert Manager" + +if [ $installKeda = true ]; then + #Install KEDA + helm repo add kedacore https://kedacore.github.io/charts + helm install keda kedacore/keda --version ${KEDA_VERSION} --namespace keda --create-namespace --wait + echo "😀 Successfully installed KEDA" + + kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml + + helm upgrade -i kedify-otel oci://ghcr.io/kedify/charts/otel-add-on --version=v0.0.6 --namespace keda --wait --set validatingAdmissionPolicy.enabled=false + echo "😀 Successfully installed KEDA" +fi + + +# Install Knative +if [ "${deploymentMode}" = "Serverless" ]; then + helm install knative-operator --namespace knative-serving --create-namespace --wait \ + https://github.com/knative/operator/releases/download/knative-${KNATIVE_OPERATOR_VERSION}/knative-operator-${KNATIVE_OPERATOR_VERSION}.tgz + kubectl apply -f - <