From 2055fb5ee037d4d084db4098161ac3784c3e6899 Mon Sep 17 00:00:00 2001
From: zhihang <zhihang161013@outlook.com>
Date: Thu, 7 Aug 2025 02:38:05 +0000
Subject: [PATCH] update README.md for KServe

Signed-off-by: zhihang <zhihang161013@outlook.com>
---
 AI/kserve/controller/README.md            | 179 ++++++++++++++++++++++
 AI/kserve/controller/doc/quick_install.sh | 179 ++++++++++++++++++++++
 2 files changed, 358 insertions(+)
 create mode 100644 AI/kserve/controller/README.md
 create mode 100644 AI/kserve/controller/doc/quick_install.sh

diff --git a/AI/kserve/controller/README.md b/AI/kserve/controller/README.md
new file mode 100644
index 00000000..342f236d
--- /dev/null
+++ b/AI/kserve/controller/README.md
@@ -0,0 +1,179 @@
+# Quick reference
+
+- The official PyTorch docker image.
+
+- Maintained by: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative).
+
+- Where to get help: [openEuler CloudNative SIG](https://gitee.com/openeuler/cloudnative), [openEuler](https://gitee.com/openeuler/community).
+
+# KServe | openEuler
+Current KServe docker images are built on the [openEuler](https://repo.openeuler.org/). This repository is free to use and exempted from per-user rate limits.
+
+KServe provides a Kubernetes [Custom Resource Definition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) for serving predictive and generative machine learning (ML) models. 
+It aims to solve production model serving use cases by providing high abstraction interfaces for Tensorflow, XGBoost, ScikitLearn, PyTorch, Huggingface Transformer/LLM models using standardized data plane protocols.
+
+It encapsulates the complexity of autoscaling, networking, health checking, and server configuration to bring cutting edge serving features like GPU Autoscaling, Scale to Zero, and Canary Rollouts to your ML deployments. 
+It enables a simple, pluggable, and complete story for Production ML Serving including prediction, pre-processing, post-processing and explainability. 
+KServe is being [used across various organizations](https://kserve.github.io/website/master/community/adopters/).
+
+For more details, visit the [KServe website](https://kserve.github.io/website/).
+
+# Supported tags and respective Dockerfile links
+The tag of each `KServe` docker image is consist of the complete software stack version. The details are as follows
+|    Tag   |  Currently  |   Architectures  |
+|----------|-------------|------------------|
+|[0.15.2-oe2403lts](https://gitee.com/openeuler/openeuler-docker-images/blob/master/AI/kserve/controller/0.15.2/24.03-lts/Dockerfile)| KServe controller 0.15.2 on openEuler 24.03-LTS | amd64 |
+
+# Usage
+
+## Before you begin
+
+> KServe Quickstart Environments are for experimentation use only. For production installation, see our [Administrator's Guide](https://kserve.github.io/website/latest/admin/serverless/serverless/).
+
+Before you can get started with a KServe Quickstart deployment you must install kind and the Kubernetes CLI.
+
+### Install Kind (Kubernetes in Docker)
+
+You can use [kind](https://kind.sigs.k8s.io/docs/user/quick-start) (Kubernetes in Docker) to run a local Kubernetes cluster with Docker container nodes.
+
+### Install the Kubernetes CLI
+
+The [Kubernetes CLI (kubectl)](https://kubernetes.io/docs/tasks/tools/install-kubectl), allows you to run commands against Kubernetes clusters. You can use kubectl to deploy applications, inspect and manage cluster resources, and view logs.
+
+### Install Helm
+
+The [Helm](https://helm.sh/docs/intro/install/) package manager for Kubernetes helps you define, install and upgrade software built for Kubernetes.
+
+## Install the KServe environment
+
+After having kind installed, create a kind cluster with:
+```shell
+kind create cluster
+```
+
+Then run:
+```shell
+kubectl config get-contexts
+```
+
+It should list out a list of contexts you have, one of them should be kind-kind. Then run:
+```shell
+kubectl config use-context kind-kind
+```
+to use this context.
+
+You can then get started with a local deployment of KServe by using KServe Quick installation script on Kind:
+```shell
+curl -s "https://gitee.com/openeuler-docker-images/raw//master/AI/kserve/controller/doc/quick_install.sh" | bash
+```
+
+## Deploy the Llama3 model for text_generation task with Hugging Face LLM Serving Runtime
+
+In this example, We demonstrate how to deploy Llama3 model for text generation task from Hugging Face by deploying the InferenceService with [Hugging Face Serving runtime](https://github.com/kserve/kserve/tree/master/python/huggingfaceserver).
+
+## Serve the Hugging Face LLM model using vLLM backend
+
+KServe Hugging Face runtime by default uses vLLM to serve the LLM models for faster time-to-first-token(TTFT) and higher token generation throughput than the Hugging Face API. 
+vLLM is implemented with common inference optimization techniques, such as paged attention, continuous batching and an optimized CUDA kernel. 
+If the model is not supported by vLLM, KServe falls back to HuggingFace backend as a failsafe.
+
+> The Llama3 model requires huggingface hub token to download the model. 
+You can set the token using HF_TOKEN environment variable.
+
+Create a secret with the Hugging Face token.
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+    name: hf-secret
+type: Opaque    
+stringData:
+    HF_TOKEN: <token>
+```
+
+Then create the inference service.
+```yaml
+kubectl apply -f - <<EOF
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  name: huggingface-llama3
+spec:
+  predictor:
+    model:
+      modelFormat:
+        name: huggingface
+      args:
+        - --model_name=llama3
+        - --model_id=meta-llama/meta-llama-3-8b-instruct
+      env:
+        - name: HF_TOKEN
+          valueFrom:
+            secretKeyRef:
+              name: hf-secret
+              key: HF_TOKEN
+              optional: false
+      resources:
+        limits:
+          cpu: "6"
+          memory: 24Gi
+          nvidia.com/gpu: "1"
+        requests:
+          cpu: "6"
+          memory: 24Gi
+          nvidia.com/gpu: "1"
+EOF
+
+```
+
+Check InferenceService status.
+```shell
+kubectl get inferenceservices huggingface-llama3
+```
+
+Expected output:
+```shell
+NAME                 URL                                                   READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                          AGE
+huggingface-llama3   http://huggingface-llama3.default.example.com         True           100                              huggingface-llama3-predictor-default-47q2g   7d23h
+```
+
+## Perform Model Inference
+
+The first step is to [determine the ingress IP](https://kserve.github.io/website/latest/get_started/first_isvc/#4-determine-the-ingress-ip-and-ports) and ports and set `INGRESS_HOST` and `INGRESS_PORT`.
+```shell
+MODEL_NAME=llama3
+SERVICE_HOSTNAME=$(kubectl get inferenceservice huggingface-llama3 -o jsonpath='{.status.url}' | cut -d "/" -f 3)
+```
+
+KServe Hugging Face vLLM runtime supports the OpenAI `/v1/completions` and `/v1/chat/completions` endpoints for `inference`.
+
+Sample OpenAI Completions request:
+```shell
+curl -v http://${INGRESS_HOST}:${INGRESS_PORT}/openai/v1/completions \
+-H "content-type: application/json" -H "Host: ${SERVICE_HOSTNAME}" \
+-d '{"model": "llama3", "prompt": "Write a poem about colors", "stream":false, "max_tokens": 30}'
+```
+
+Expected output:
+```shell
+{
+  "id": "cmpl-625a9240f25e463487a9b6c53cbed080",
+  "choices": [
+    {
+      "finish_reason": "length",
+      "index": 0,
+      "logprobs": null,
+      "text": " and how they make you feel\nColors, oh colors, so vibrant and bright\nA world of emotions, a kaleidoscope in sight\nRed"
+    }
+  ],
+  "created": 1718620153,
+  "model": "llama3",
+  "system_fingerprint": null,
+  "object": "text_completion",
+  "usage": {
+    "completion_tokens": 30,
+    "prompt_tokens": 6,
+    "total_tokens": 36
+  }
+}
+```
diff --git a/AI/kserve/controller/doc/quick_install.sh b/AI/kserve/controller/doc/quick_install.sh
new file mode 100644
index 00000000..96ebb493
--- /dev/null
+++ b/AI/kserve/controller/doc/quick_install.sh
@@ -0,0 +1,179 @@
+#!/bin/bash
+
+set -eo pipefail
+############################################################
+# Help                                                     #
+############################################################
+Help() {
+   # Display Help
+   echo "KServe quick install script."
+   echo
+   echo "Syntax: [-s|-r]"
+   echo "options:"
+   echo "s Serverless Mode."
+   echo "r RawDeployment Mode."
+   echo "u Uninstall."
+   echo "d Install only dependencies."
+   echo "k Install KEDA."
+   echo
+}
+
+export ISTIO_VERSION=1.23.2
+export KNATIVE_OPERATOR_VERSION=v1.15.7
+export KNATIVE_SERVING_VERSION=1.15.2
+export KSERVE_VERSION=v0.15.2
+export CERT_MANAGER_VERSION=v1.16.1
+export GATEWAY_API_VERSION=v1.2.1
+export KEDA_VERSION=2.14.0
+SCRIPT_DIR="$(dirname -- "${BASH_SOURCE[0]}")"
+export SCRIPT_DIR
+
+uninstall() {
+   helm uninstall --ignore-not-found istio-ingressgateway -n istio-system
+   helm uninstall --ignore-not-found istiod -n istio-system
+   helm uninstall --ignore-not-found istio-base -n istio-system
+   echo "😀 Successfully uninstalled Istio"
+
+   helm uninstall --ignore-not-found cert-manager -n cert-manager
+   echo "😀 Successfully uninstalled Cert Manager"
+
+   helm uninstall --ignore-not-found keda -n keda
+   echo "😀 Successfully uninstalled KEDA"
+   
+   kubectl delete --ignore-not-found=true KnativeServing knative-serving -n knative-serving --wait=True --timeout=300s
+   helm uninstall --ignore-not-found knative-operator -n knative-serving
+   echo "😀 Successfully uninstalled Knative"
+
+   helm uninstall --ignore-not-found kserve -n kserve
+   helm uninstall --ignore-not-found kserve-crd -n kserve
+   echo "😀 Successfully uninstalled KServe"
+
+   kubectl delete --ignore-not-found=true namespace istio-system
+   kubectl delete --ignore-not-found=true namespace cert-manager
+   kubectl delete --ignore-not-found=true namespace kserve
+}
+
+# Check if helm command is available
+if ! command -v helm &>/dev/null; then
+   echo "😱 Helm command not found. Please install Helm."
+   exit 1
+fi
+
+deploymentMode="Serverless"
+installKeda=false
+while getopts ":hsrudk" option; do
+   case $option in
+   h) # display Help
+      Help
+      exit
+      ;;
+   r) # skip knative install
+      deploymentMode="RawDeployment" ;;
+   s) # install knative
+      deploymentMode="Serverless" ;;
+   u) # uninstall
+      uninstall
+      exit
+      ;;
+   d) # install only dependencies
+      installKserve=false ;;
+   k) # install KEDA
+      installKeda=true ;;
+   \?) # Invalid option
+      echo "Error: Invalid option"
+      exit
+      ;;
+   esac
+done
+
+get_kube_version() {
+   kubectl version --short=true 2>/dev/null || kubectl version | awk -F '.' '/Server Version/ {print $2}'
+}
+
+if [ "$(get_kube_version)" -lt 24 ]; then
+   echo "😱 install requires at least Kubernetes 1.24"
+   exit 1
+fi
+
+echo "Installing Gateway API CRDs ..."
+kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/${GATEWAY_API_VERSION}/standard-install.yaml
+
+helm repo add istio https://istio-release.storage.googleapis.com/charts --force-update
+helm install istio-base istio/base -n istio-system --wait --set defaultRevision=default --create-namespace --version ${ISTIO_VERSION}
+helm install istiod istio/istiod -n istio-system --wait --version ${ISTIO_VERSION} \
+   --set proxy.autoInject=disabled \
+   --set-string pilot.podAnnotations."cluster-autoscaler\.kubernetes\.io/safe-to-evict"=true
+helm install istio-ingressgateway istio/gateway -n istio-system --version ${ISTIO_VERSION} \
+   --set-string podAnnotations."cluster-autoscaler\.kubernetes\.io/safe-to-evict"=true
+
+# Wait for the istio ingressgateway pod to be created
+sleep 10
+# Wait for istio ingressgateway to be ready
+kubectl wait --for=condition=Ready pod -l app=istio-ingressgateway -n istio-system --timeout=600s
+echo "😀 Successfully installed Istio"
+
+# Install Cert Manager
+helm repo add jetstack https://charts.jetstack.io --force-update
+helm install \
+   cert-manager jetstack/cert-manager \
+   --namespace cert-manager \
+   --create-namespace \
+   --version ${CERT_MANAGER_VERSION} \
+   --set crds.enabled=true
+echo "😀 Successfully installed Cert Manager"
+
+if [ $installKeda = true ]; then
+   #Install KEDA
+   helm repo add kedacore https://kedacore.github.io/charts
+   helm install keda kedacore/keda --version ${KEDA_VERSION} --namespace keda --create-namespace --wait
+   echo "😀 Successfully installed KEDA"
+
+   kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml
+   
+   helm upgrade -i kedify-otel oci://ghcr.io/kedify/charts/otel-add-on --version=v0.0.6 --namespace keda --wait --set validatingAdmissionPolicy.enabled=false
+   echo "😀 Successfully installed KEDA"
+fi
+
+
+# Install Knative
+if [ "${deploymentMode}" = "Serverless" ]; then
+   helm install knative-operator --namespace knative-serving --create-namespace --wait \
+      https://github.com/knative/operator/releases/download/knative-${KNATIVE_OPERATOR_VERSION}/knative-operator-${KNATIVE_OPERATOR_VERSION}.tgz
+   kubectl apply -f - <<EOF
+   apiVersion: operator.knative.dev/v1beta1
+   kind: KnativeServing
+   metadata:
+     name: knative-serving
+     namespace: knative-serving
+   spec:
+     version: "${KNATIVE_SERVING_VERSION}"
+     config:
+       domain:
+         # Patch the external domain as the default domain svc.cluster.local is not exposed on ingress (from knative 1.8)
+         example.com: ""
+EOF
+   echo "😀 Successfully installed Knative"
+fi
+
+if [ "${installKserve}" = false ]; then
+   exit
+fi
+# Install KServe
+helm install kserve-crd oci://ghcr.io/kserve/charts/kserve-crd --version ${KSERVE_VERSION} --namespace kserve --create-namespace --wait
+helm upgrade --install kserve oci://ghcr.io/kserve/charts/kserve --version ${KSERVE_VERSION} \
+   --namespace kserve --create-namespace \
+   --set-string kserve.controller.image="openeuler/kserve-controller" \
+   --set-string kserve.controller.tag="0.15.2-oe2403lts" \
+   --set-string kserve.controller.deploymentMode="RawDeployment" \
+   --set-string kserve.agent.image="openeuler/kserve-agent" \
+   --set-string kserve.agent.tag="0.15.2-oe2403lts" \
+   --set-string kserve.storage.image="openeuler/kserve-storage-initializer" \
+   --set-string kserve.storage.tag="0.15.2-oe2403lts" \
+   --set-string kserve.router.image="openeuler/kserve-router" \
+   --set-string kserve.router.tag="0.15.2-oe2403lts" \
+   --set-string kserve.servingruntime.sklearnserver.image="openeuler/kserve-sklearnserver" \
+   --set-string kserve.servingruntime.sklearnserver.tag="0.15.2-oe2403lts" \
+   --set-string kserve.servingruntime.huggingfaceserver.image="openeuler/kserve-huggingfaceserver" \
+   --set-string kserve.servingruntime.huggingfaceserver.tag="0.15.2-oe2403lts"
+   
+echo "😀 Successfully installed KServe"
-- 
Gitee