diff --git a/plugins/pytorch/config.yaml b/plugins/pytorch/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce3465e833b0bb56df6cc4aeba1973a8dd030af7 --- /dev/null +++ b/plugins/pytorch/config.yaml @@ -0,0 +1,30 @@ +all: + hosts: + # k8s master 节点 + # ================ 本地部署 ===================== + localhost: + ansible_connection: local + # ================ 远端部署 ===================== + # host1: + # ansible_host: 127.0.0.1 # 远端IP + # ansible_port: 22 # 端口号 + # ansible_user: root # 用户名 + # ansible_password: PASSWORD # 密码 + + vars: + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + + # ================ demo1: pytorch with http.server ===================== + kubectl_apply: pytorch-deployment.yaml + namespace: pytorch-namespace + replicas: 1 + containers: + http: + name: http-container + image: hub.oepkgs.net/oedeploy/pytorch/pytorch:latest # amd64 + # image: hub.oepkgs.net/oedeploy/pytorch/torchserve:latest-arm64 # arm64 + workspace_mount: /tmp + service: + port: 8080 + target_port: 8080 + node_port: 30699 diff --git a/plugins/pytorch/doc/readme.md b/plugins/pytorch/doc/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..18ec0f08ee6e7665f914565f82ff53d68e47db75 --- /dev/null +++ b/plugins/pytorch/doc/readme.md @@ -0,0 +1,96 @@ +# 使用 oeDeploy 基于 k8s 集群部署Pytorch + + +1. 准备一个k8s集群 + +2. 下载oedp命令行工具,并用yum安装。如有更新的oedp版本,可以选择新版本。 + + ````bash + # x86_64: + wget https://repo.oepkgs.net/openEuler/rpm/openEuler-24.03-LTS/contrib/oedp/x86_64/Packages/oedp-1.0.0-20250208.x86_64.rpm + yum install -y oedp-1.0.0-20250208.x86_64.rpm + # aarch64: + wget https://repo.oepkgs.net/openEuler/rpm/openEuler-24.03-LTS/contrib/oedp/aarch64/Packages/oedp-1.0.0-20250208.aarch64.rpm + yum install -y oedp-1.0.0-20250208.aarch64.rpm + ```` + +3. 根据实际情况,修改config.yaml + 请确保目标节点为k8s的master节点 + `kubectl_apply`需要与workspace下的playbook对应。 + +4. 一键部署 + ````bash + oedp run install -p pytorch # -p <插件目录> + ```` + +5. 一键卸载 + ````bash + oedp run uninstall -p pytorch # -p <插件目录> + ```` + + + +## demo + +- config.yaml + ````yaml + all: + hosts: + localhost: + ansible_connection: local + vars: + ansible_ssh_common_args: '-o StrictHostKeyChecking=no' + # ================ demo1: pytorch with http.server ===================== + kubectl_apply: pytorch-deployment.yaml + namespace: pytorch-namespace + replicas: 1 + containers: + http: + name: http-container + image: hub.oepkgs.net/oedeploy/pytorch/pytorch:latest # amd64 + # image: hub.oepkgs.net/oedeploy/pytorch/torchserve:latest-arm64 # arm64 + workspace_mount: /tmp + service: + port: 8080 + target_port: 8080 + node_port: 30699 + ```` + +- 查看pod + + ````bash + kubectl get pods -n pytorch-namespace + ```` + + ```` + NAME READY STATUS RESTARTS AGE + pytorch-deployment-db5d59bcb-ptqnp 1/1 Running 0 15m + ```` + +- 查看端口映射,并访问 + + ````bash + kubectl get svc -n pytorch-namespace + ```` + + ```` + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + pytorch-service NodePort 10.96.50.156 80:30699/TCP 15m + ```` + + ```` + http://x.x.x.x:30699/ # master所在节点 + ```` + +- 进入pod + + ````bash + kubectl exec -n pytorch-namespace -it pytorch-deployment-db5d59bcb-ptqnp -- /bin/bash + ```` + +- 打印PyTorch信息 + + ````bash + python -c "import torch; print(torch.__version__); print(torch.tensor([1.0, 2.0, 3.0]) + torch.tensor([4.0, 5.0, 6.0]))" + ```` + diff --git a/plugins/pytorch/main.yaml b/plugins/pytorch/main.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f4005c94f6ce56fd2145d523782d8accbba3a316 --- /dev/null +++ b/plugins/pytorch/main.yaml @@ -0,0 +1,16 @@ +name: pytorch +version: 1.0.0 +description: pytorch +action: + install: + description: install pytorch + tasks: + - name: install pytorch + playbook: install.yaml + scope: all + uninstall: + description: uninstall pytorch + tasks: + - name: uninstall pytorch + playbook: uninstall.yaml + scope: all \ No newline at end of file diff --git a/plugins/pytorch/workspace/install.yaml b/plugins/pytorch/workspace/install.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b6f39007ac48cfda9a9bfe629b9318f3d9b62a6 --- /dev/null +++ b/plugins/pytorch/workspace/install.yaml @@ -0,0 +1,19 @@ +--- +- name: Deploy Pytorch on Kubernetes + hosts: all + tasks: + - name: Ensure /tmp exists + file: + path: /tmp + state: directory + + - name: Copy and render the kubectl_apply file with variables from config.yaml + template: + src: "{{ kubectl_apply }}" + dest: "/tmp/{{ kubectl_apply }}" + mode: '0644' + + - name: Apply the Kubernetes deployment using kubectl + command: + cmd: kubectl apply -f "/tmp/{{ kubectl_apply }}" + warn: false \ No newline at end of file diff --git a/plugins/pytorch/workspace/pytorch-deployment.yaml b/plugins/pytorch/workspace/pytorch-deployment.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fdd4d3cbd457813d04236da0a3248bd893933ac5 --- /dev/null +++ b/plugins/pytorch/workspace/pytorch-deployment.yaml @@ -0,0 +1,61 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: {{ namespace }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pytorch-deployment + namespace: {{ namespace }} + labels: + app: pytorch +spec: + replicas: {{ replicas }} + selector: + matchLabels: + app: pytorch + template: + metadata: + labels: + app: pytorch + spec: + containers: + - name: {{ containers.http.name }} + image: {{ containers.http.image }} + ports: + - containerPort: {{ service.port }} + volumeMounts: + - mountPath: /workspace + name: pytorch-workspace + command: ["python"] + args: ["-m", "http.server", "{{ service.port }}"] + resources: + requests: + cpu: "500m" + memory: "512Mi" + limits: + cpu: "1" + memory: "1Gi" + volumes: + - name: pytorch-workspace + hostPath: + path: {{ containers.workspace_mount }} + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: pytorch-service + namespace: {{ namespace }} +spec: + type: NodePort + selector: + app: pytorch + ports: + - name: http-port + protocol: TCP + port: {{ service.port }} + targetPort: {{ service.target_port }} + nodePort: {{ service.node_port }} \ No newline at end of file diff --git a/plugins/pytorch/workspace/uninstall.yaml b/plugins/pytorch/workspace/uninstall.yaml new file mode 100644 index 0000000000000000000000000000000000000000..269d7fb5c1c162d1352098ea5eed3e65ec2f7f3b --- /dev/null +++ b/plugins/pytorch/workspace/uninstall.yaml @@ -0,0 +1,8 @@ +--- +- name: Uninstall Pytorch on Kubernetes + hosts: all + tasks: + - name: uninstall the Pytorch using kubectl + command: + cmd: kubectl delete -f "/tmp/{{ kubectl_apply }}" + warn: false \ No newline at end of file