diff --git a/component/clusterd/pkg/application/profiling/grpc_service.go b/component/clusterd/pkg/application/profiling/grpc_service.go index 6ea03f4d66479c4f7f611f84bd495531d542a914..b6db1a1f300817254fbbaae1032a5bb0caaff581 100644 --- a/component/clusterd/pkg/application/profiling/grpc_service.go +++ b/component/clusterd/pkg/application/profiling/grpc_service.go @@ -76,7 +76,11 @@ func (ps *SwitchManager) ModifyTrainingDataTraceSwitch(ctx context.Context, Code: ErrInvalidParam}, fmt.Errorf("the format of jobNsName is not namespace/jobName") } jobNs, jobName := jobNameInfo[0], jobNameInfo[1] - owner := getPGOwner(jobNs, jobName) + owner, err := getPGOwner(jobNs, jobName) + if err != nil { + // failed to get owner reference, need return. This is to avoid bypassing the lifecycle control for this cm + return &profiling.DataTypeRes{Message: err.Error(), Code: ErrServerFault}, err + } dtc := profile.NewDataTraceController(jobNs, jobName) if cm, err := kube.GetConfigMap(profile.DataTraceCmPrefix+dtc.JobName, dtc.JobNamespace); cm == nil || err != nil { @@ -108,15 +112,18 @@ func (ps *SwitchManager) ModifyTrainingDataTraceSwitch(ctx context.Context, return response, nil } -func getPGOwner(jobNs, jobName string) v1.OwnerReference { +func getPGOwner(jobNs, jobName string) (v1.OwnerReference, error) { jobInfo := job.GetJobByNameSpaceAndName(jobName, jobNs) + if jobInfo.Key == "" { + return v1.OwnerReference{}, fmt.Errorf("job does not exist") + } pgInfo := podgroup.GetPodGroup(jobInfo.Key) owner, err := podgroup.GetOwnerRefByPG(&pgInfo) if err != nil { - hwlog.RunLog.Errorf("get owner from pg failed, error: %v", err) - return v1.OwnerReference{} + hwlog.RunLog.Errorf("get owner reference from pg failed, error: %v", err) + return v1.OwnerReference{}, fmt.Errorf("get owner reference from pg failed") } - return owner + return owner, nil } func (ps *SwitchManager) notifySubscriber(jobName string, jobNs string, dtc *profile.DataTraceController, diff --git a/component/clusterd/pkg/application/profiling/grpc_service_test.go b/component/clusterd/pkg/application/profiling/grpc_service_test.go index e2334066f54e32dec2e9b603b6f13063dab9b0c8..b17d7b632a520b474ae4cd2f42536f8bb8e2e0ee 100644 --- a/component/clusterd/pkg/application/profiling/grpc_service_test.go +++ b/component/clusterd/pkg/application/profiling/grpc_service_test.go @@ -14,6 +14,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/applyconfigurations/meta/v1" "ascend-common/common-utils/hwlog" "clusterd/pkg/application/config" @@ -48,6 +49,8 @@ var publisher = config.NewConfigPublisher[*profiling.DataStatusRes]( jobName, context.Background(), "", nil) func TestModifyTrainingDataTraceSwitch(t *testing.T) { + p1 := gomonkey.ApplyFuncReturn(getPGOwner, v1.OwnerReference, nil) + defer p1.Reset() testInvalidJobNsNameFormat(t) testConfigMapNotExistAndCreateSuccess(t) testConfigMapNotExistAndCreateFail(t)