From b219e413dbc45fabc5cc1681d3407612f125a7ea Mon Sep 17 00:00:00 2001 From: fangzehua Date: Fri, 6 May 2022 17:05:32 +0800 Subject: [PATCH] add ms node id --- README.md | 2 +- pkg/controllers/v1/msjob_controller.go | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a5ebdab..372c990 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ kubectl apply -f deploy/v1/ms-operator.yaml ``` 安装后: 使用`kubectl get pods --all-namespaces`,即可看到namespace为ms-operator-system的部署任务。 -使用`kubectl describe pod ms-operator-controller-manager-xxx-xxx -n ms-operator-sysytem`,可查看pod的详细信息。 +使用`kubectl describe pod ms-operator-controller-manager-xxx-xxx -n ms-operator-system`,可查看pod的详细信息。 ## 2. 使用make deploy安装 ``` make deploy IMG=swr.cn-south-1.myhuaweicloud.com/mindspore/ms-operator:latest diff --git a/pkg/controllers/v1/msjob_controller.go b/pkg/controllers/v1/msjob_controller.go index f0666f9..4544e79 100644 --- a/pkg/controllers/v1/msjob_controller.go +++ b/pkg/controllers/v1/msjob_controller.go @@ -94,6 +94,7 @@ const ( msSchedHost = "MS_SCHED_HOST" msSchedPort = "MS_SCHED_PORT" msRole = "MS_ROLE" + msNodeId = "MS_NODE_ID" // exitedWithCodeReason is the normal reason when the pod is exited because of the exit code. exitedWithCodeReason = "ExitedWithCode" @@ -863,6 +864,14 @@ func (r *MSJobReconciler) SetClusterSpec(job interface{}, podTemplate *corev1.Po Value: msSchedHostStr, }) } + podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ + Name: msNodeId, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }) podTemplate.Spec.Containers[i].Env = append(podTemplate.Spec.Containers[i].Env, corev1.EnvVar{ Name: msSchedPort, Value: msSchedPortStr, -- Gitee