From 28639aee908624a74c184c767f6173487aae4254 Mon Sep 17 00:00:00 2001 From: Dingjiahui Date: Fri, 21 Feb 2025 18:36:22 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E5=9F=BA?= =?UTF-8?q?=E4=BA=8Ek8s=E7=9A=84Pytorch=E7=AE=80=E5=8D=95=E5=9C=BA?= =?UTF-8?q?=E6=99=AF=E9=83=A8=E7=BD=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/pytorch/config.yaml | 28 ++++++ plugins/pytorch/doc/readme.md | 93 +++++++++++++++++++ plugins/pytorch/main.yaml | 16 ++++ plugins/pytorch/workspace/install.yaml | 19 ++++ .../pytorch/workspace/pytorch-deployment.yaml | 61 ++++++++++++ plugins/pytorch/workspace/uninstall.yaml | 8 ++ 6 files changed, 225 insertions(+) create mode 100644 plugins/pytorch/config.yaml create mode 100644 plugins/pytorch/doc/readme.md create mode 100644 plugins/pytorch/main.yaml create mode 100644 plugins/pytorch/workspace/install.yaml create mode 100644 plugins/pytorch/workspace/pytorch-deployment.yaml create mode 100644 plugins/pytorch/workspace/uninstall.yaml diff --git a/plugins/pytorch/config.yaml b/plugins/pytorch/config.yaml new file mode 100644 index 0000000..1a8cf2d --- /dev/null +++ b/plugins/pytorch/config.yaml @@ -0,0 +1,28 @@ +all: + hosts: + # k8s master 节点 + # ================ 本地部署 ===================== + localhost: + ansible_connection: local + # ================ 远端部署 ===================== + # host1: + # ansible_host: 127.0.0.1 # 远端IP + # ansible_port: 22 # 端口号 + # ansible_user: root # 用户名 + # ansible_password: PASSWORD # 密码 + + vars: + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + + # ================ demo1: pytorch with http.server ===================== + kubectl_apply: pytorch-deployment.yaml + namespace: pytorch-namespace + replicas: 1 + containers: + http: + name: http-container + image: hub.oepkgs.net/oedeploy/pytorch/pytorch:latest # amd64 + # image: hub.oepkgs.net/oedeploy/pytorch/torchserve:latest-arm64 # arm64 + workspace_mount: /tmp + service: + http_port: 30699 diff --git a/plugins/pytorch/doc/readme.md b/plugins/pytorch/doc/readme.md new file mode 100644 index 0000000..df63c4a --- /dev/null +++ b/plugins/pytorch/doc/readme.md @@ -0,0 +1,93 @@ +# 使用 oeDeploy 基于 k8s 集群部署Pytorch + + +1. 准备一个k8s集群 + +2. 下载oedp命令行工具,并用yum安装。如有更新的oedp版本,可以选择新版本。 + + ````bash + # x86_64: + wget https://repo.oepkgs.net/openEuler/rpm/openEuler-24.03-LTS/contrib/oedp/x86_64/Packages/oedp-1.0.0-20250208.x86_64.rpm + yum install -y oedp-1.0.0-20250208.x86_64.rpm + # aarch64: + wget https://repo.oepkgs.net/openEuler/rpm/openEuler-24.03-LTS/contrib/oedp/aarch64/Packages/oedp-1.0.0-20250208.aarch64.rpm + yum install -y oedp-1.0.0-20250208.aarch64.rpm + ```` + +3. 根据实际情况,修改config.yaml + 请确保目标节点为k8s的master节点 + `kubectl_apply`需要与workspace下的playbook对应。 + +4. 一键部署 + ````bash + oedp run install -p pytorch # -p <插件目录> + ```` + +5. 一键卸载 + ````bash + oedp run uninstall -p pytorch # -p <插件目录> + ```` + + + +## demo + +- config.yaml + ````yaml + all: + hosts: + localhost: + ansible_connection: local + + vars: + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + kubectl_apply: pytorch-deployment.yaml + namespace: pytorch-namespace + replicas: 1 + containers: + http: + name: http-container + image: hub.oepkgs.net/oedeploy/pytorch/pytorch:latest # amd64 only + workspace_mount: /tmp + service: + http_port: 30699 + ```` + +- 查看pod + + ````bash + kubectl get pods -n pytorch-namespace + ```` + + ```` + NAME READY STATUS RESTARTS AGE + pytorch-deployment-db5d59bcb-ptqnp 1/1 Running 0 15m + ```` + +- 查看端口映射,并访问 + + ````bash + kubectl get svc -n pytorch-namespace + ```` + + ```` + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + pytorch-service NodePort 10.96.50.156 80:30699/TCP 15m + ```` + + ```` + http://x.x.x.x:30699/ # master所在节点 + ```` + +- 进入pod + + ````bash + kubectl exec -n pytorch-namespace -it pytorch-deployment-db5d59bcb-ptqnp -- /bin/bash + ```` + +- 打印PyTorch信息 + + ````bash + python -c "import torch; print(torch.__version__); print(torch.tensor([1.0, 2.0, 3.0]) + torch.tensor([4.0, 5.0, 6.0]))" + ```` + diff --git a/plugins/pytorch/main.yaml b/plugins/pytorch/main.yaml new file mode 100644 index 0000000..f4005c9 --- /dev/null +++ b/plugins/pytorch/main.yaml @@ -0,0 +1,16 @@ +name: pytorch +version: 1.0.0 +description: pytorch +action: + install: + description: install pytorch + tasks: + - name: install pytorch + playbook: install.yaml + scope: all + uninstall: + description: uninstall pytorch + tasks: + - name: uninstall pytorch + playbook: uninstall.yaml + scope: all \ No newline at end of file diff --git a/plugins/pytorch/workspace/install.yaml b/plugins/pytorch/workspace/install.yaml new file mode 100644 index 0000000..1b6f390 --- /dev/null +++ b/plugins/pytorch/workspace/install.yaml @@ -0,0 +1,19 @@ +--- +- name: Deploy Pytorch on Kubernetes + hosts: all + tasks: + - name: Ensure /tmp exists + file: + path: /tmp + state: directory + + - name: Copy and render the kubectl_apply file with variables from config.yaml + template: + src: "{{ kubectl_apply }}" + dest: "/tmp/{{ kubectl_apply }}" + mode: '0644' + + - name: Apply the Kubernetes deployment using kubectl + command: + cmd: kubectl apply -f "/tmp/{{ kubectl_apply }}" + warn: false \ No newline at end of file diff --git a/plugins/pytorch/workspace/pytorch-deployment.yaml b/plugins/pytorch/workspace/pytorch-deployment.yaml new file mode 100644 index 0000000..7c88a75 --- /dev/null +++ b/plugins/pytorch/workspace/pytorch-deployment.yaml @@ -0,0 +1,61 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: {{ namespace }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pytorch-deployment + namespace: {{ namespace }} + labels: + app: pytorch +spec: + replicas: {{ replicas }} + selector: + matchLabels: + app: pytorch + template: + metadata: + labels: + app: pytorch + spec: + containers: + - name: {{ containers.http.name }} + image: {{ containers.http.image }} + ports: + - containerPort: 8080 + volumeMounts: + - mountPath: /workspace + name: pytorch-workspace + command: ["python"] + args: ["-m", "http.server", "8080"] + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1" + memory: "1Gi" + volumes: + - name: pytorch-workspace + hostPath: + path: {{ containers.workspace_mount }} + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: pytorch-service + namespace: {{ namespace }} +spec: + type: NodePort + selector: + app: pytorch + ports: + - name: http-port + protocol: TCP + port: 8080 + targetPort: 8080 + nodePort: {{ service.http_port }} \ No newline at end of file diff --git a/plugins/pytorch/workspace/uninstall.yaml b/plugins/pytorch/workspace/uninstall.yaml new file mode 100644 index 0000000..269d7fb --- /dev/null +++ b/plugins/pytorch/workspace/uninstall.yaml @@ -0,0 +1,8 @@ +--- +- name: Uninstall Pytorch on Kubernetes + hosts: all + tasks: + - name: uninstall the Pytorch using kubectl + command: + cmd: kubectl delete -f "/tmp/{{ kubectl_apply }}" + warn: false \ No newline at end of file -- Gitee From 7c92d61c0dff3443653a03c92aca9ebede08dc7a Mon Sep 17 00:00:00 2001 From: Dingjiahui Date: Mon, 24 Feb 2025 14:51:22 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E6=94=AF=E6=8C=81port=E8=87=AA=E5=AE=9A?= =?UTF-8?q?=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugins/pytorch/config.yaml | 4 +++- plugins/pytorch/doc/readme.md | 11 +++++++---- plugins/pytorch/workspace/pytorch-deployment.yaml | 10 +++++----- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/plugins/pytorch/config.yaml b/plugins/pytorch/config.yaml index 1a8cf2d..ce3465e 100644 --- a/plugins/pytorch/config.yaml +++ b/plugins/pytorch/config.yaml @@ -25,4 +25,6 @@ all: # image: hub.oepkgs.net/oedeploy/pytorch/torchserve:latest-arm64 # arm64 workspace_mount: /tmp service: - http_port: 30699 + port: 8080 + target_port: 8080 + node_port: 30699 diff --git a/plugins/pytorch/doc/readme.md b/plugins/pytorch/doc/readme.md index df63c4a..18ec0f0 100644 --- a/plugins/pytorch/doc/readme.md +++ b/plugins/pytorch/doc/readme.md @@ -38,21 +38,24 @@ hosts: localhost: ansible_connection: local - vars: ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + # ================ demo1: pytorch with http.server ===================== kubectl_apply: pytorch-deployment.yaml namespace: pytorch-namespace replicas: 1 containers: http: name: http-container - image: hub.oepkgs.net/oedeploy/pytorch/pytorch:latest # amd64 only + image: hub.oepkgs.net/oedeploy/pytorch/pytorch:latest # amd64 + # image: hub.oepkgs.net/oedeploy/pytorch/torchserve:latest-arm64 # arm64 workspace_mount: /tmp service: - http_port: 30699 + port: 8080 + target_port: 8080 + node_port: 30699 ```` - + - 查看pod ````bash diff --git a/plugins/pytorch/workspace/pytorch-deployment.yaml b/plugins/pytorch/workspace/pytorch-deployment.yaml index 7c88a75..fdd4d3c 100644 --- a/plugins/pytorch/workspace/pytorch-deployment.yaml +++ b/plugins/pytorch/workspace/pytorch-deployment.yaml @@ -25,12 +25,12 @@ spec: - name: {{ containers.http.name }} image: {{ containers.http.image }} ports: - - containerPort: 8080 + - containerPort: {{ service.port }} volumeMounts: - mountPath: /workspace name: pytorch-workspace command: ["python"] - args: ["-m", "http.server", "8080"] + args: ["-m", "http.server", "{{ service.port }}"] resources: requests: cpu: "500m" @@ -56,6 +56,6 @@ spec: ports: - name: http-port protocol: TCP - port: 8080 - targetPort: 8080 - nodePort: {{ service.http_port }} \ No newline at end of file + port: {{ service.port }} + targetPort: {{ service.target_port }} + nodePort: {{ service.node_port }} \ No newline at end of file -- Gitee