From 9908706b3420f34b9d86c319a58c7506f117ba24 Mon Sep 17 00:00:00 2001 From: chengjunhua Date: Wed, 15 Jan 2025 20:00:24 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=9C=AA=E6=9F=A5=E8=AF=A2?= =?UTF-8?q?=E5=88=B0=E6=AF=8F=E4=B8=AApod=E7=9A=84device=20num=E6=97=B6?= =?UTF-8?q?=EF=BC=8Clabel=20pod=E5=AF=BC=E8=87=B4clusterD=20panic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/clusterd/pkg/domain/pod/pod_util.go | 1 + component/clusterd/pkg/interface/grpc/common/utils.go | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/component/clusterd/pkg/domain/pod/pod_util.go b/component/clusterd/pkg/domain/pod/pod_util.go index 05bfde2d0..921edb32b 100644 --- a/component/clusterd/pkg/domain/pod/pod_util.go +++ b/component/clusterd/pkg/domain/pod/pod_util.go @@ -214,6 +214,7 @@ func GetPodDeviceNumByJobId(jobKey string) int { return len(podDevice.Devices) } } + hwlog.RunLog.Warnf("failed get pod device num, job key: %s, len(podJobMap): %d", jobKey, len(podJobMap)) return 0 } diff --git a/component/clusterd/pkg/interface/grpc/common/utils.go b/component/clusterd/pkg/interface/grpc/common/utils.go index 954c30baf..a7da46271 100644 --- a/component/clusterd/pkg/interface/grpc/common/utils.go +++ b/component/clusterd/pkg/interface/grpc/common/utils.go @@ -297,6 +297,10 @@ func RemoveSliceDuplicateFaults(faults []*pb.FaultRank) []*pb.FaultRank { func LabelFaultPod(jobId string, rankList []string, labeledMap map[string]string) (map[string]string, error) { var faultPodRankList []string devicePerNode := pod.GetPodDeviceNumByJobId(jobId) + if devicePerNode == 0 { + hwlog.RunLog.Errorf("get device num per pod failed, task id: %s", jobId) + return nil, fmt.Errorf("get device num per pod failed, task id: %s", jobId) + } for _, rank := range rankList { faultRank, err := strconv.Atoi(rank) if err != nil { -- Gitee