From 41f688973d0e9df4cf2dfdd4656465158301ea7f Mon Sep 17 00:00:00 2001 From: chengjunhua Date: Mon, 23 Jun 2025 20:29:09 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AA=AC?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E4=BA=9E=E5=81=A5=E5=BA=B7?= =?UTF-8?q?=E6=95=85=E9=9A=9C=E8=99=95=E7=90=86=E7=9A=84=E5=95=8F=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/clusterd/pkg/application/recover/controller.go | 6 +++--- .../clusterd/pkg/application/recover/controller_test.go | 4 ++-- .../pkg/application/recover/fault_recover_service.go | 3 +-- .../pkg/application/recover/fault_recover_service_test.go | 2 +- component/clusterd/pkg/domain/common/type.go | 2 +- component/clusterd/pkg/domain/common/utils.go | 7 +++++-- component/clusterd/pkg/domain/common/utils_test.go | 4 ++-- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/component/clusterd/pkg/application/recover/controller.go b/component/clusterd/pkg/application/recover/controller.go index e4606914e..908867367 100644 --- a/component/clusterd/pkg/application/recover/controller.go +++ b/component/clusterd/pkg/application/recover/controller.go @@ -324,11 +324,11 @@ func (ctl *EventController) shouldDumpWhenOccurFault() bool { return false } if ctl.healthState == constant.UnHealthyState || - (ctl.healthState == constant.SubHealthyState && ctl.jobInfo.GraceExit) { + (ctl.healthState == constant.SubHealthyState && ctl.jobInfo.SubHealthyStrategy == "graceExit") { return true } - hwlog.RunLog.Infof("jobId=%s healthState=%v graceExit=%v, should not dump", - ctl.jobInfo.JobId, ctl.healthState, ctl.jobInfo.GraceExit) + hwlog.RunLog.Infof("jobId=%s healthState=%v subHealthy strategy=%v, should not dump", + ctl.jobInfo.JobId, ctl.healthState, ctl.jobInfo.SubHealthyStrategy) return false } diff --git a/component/clusterd/pkg/application/recover/controller_test.go b/component/clusterd/pkg/application/recover/controller_test.go index d50b025ae..315c0a794 100644 --- a/component/clusterd/pkg/application/recover/controller_test.go +++ b/component/clusterd/pkg/application/recover/controller_test.go @@ -278,11 +278,11 @@ func TestShouldDumpWhenOccurFault(t *testing.T) { }) ctl.healthState = constant.SubHealthyState convey.Convey("06-healthState is subHealthy and graceExit is false, should return false", func() { - ctl.jobInfo.GraceExit = false + ctl.jobInfo.SubHealthyStrategy = "" convey.So(ctl.shouldDumpWhenOccurFault(), convey.ShouldBeFalse) }) convey.Convey("07-healthState is subHealthy and graceExit is true, should return true", func() { - ctl.jobInfo.GraceExit = true + ctl.jobInfo.SubHealthyStrategy = "graceExit" convey.So(ctl.shouldDumpWhenOccurFault(), convey.ShouldBeTrue) }) }) diff --git a/component/clusterd/pkg/application/recover/fault_recover_service.go b/component/clusterd/pkg/application/recover/fault_recover_service.go index c00a7ca32..ef20b4eab 100644 --- a/component/clusterd/pkg/application/recover/fault_recover_service.go +++ b/component/clusterd/pkg/application/recover/fault_recover_service.go @@ -60,8 +60,7 @@ func (s *FaultRecoverService) notifyFaultInfoForJob(faultInfo constant.JobFaultI return } hwlog.RunLog.Infof("get fault info from fault center=%v", faultInfo) - if faultInfo.HealthyState == constant.SubHealthyState && - (!controller.jobInfo.GraceExit || !controller.onlySupportDumpStrategy()) { + if faultInfo.HealthyState == constant.SubHealthyState && controller.jobInfo.SubHealthyStrategy == "ignore" { hwlog.RunLog.Infof("jobId=%s skip handle subHealthy faults", faultInfo.JobId) return } diff --git a/component/clusterd/pkg/application/recover/fault_recover_service_test.go b/component/clusterd/pkg/application/recover/fault_recover_service_test.go index f44fa2f56..dcc4fd078 100644 --- a/component/clusterd/pkg/application/recover/fault_recover_service_test.go +++ b/component/clusterd/pkg/application/recover/fault_recover_service_test.go @@ -31,7 +31,7 @@ func TestNotifyFaultInfoForJob(t *testing.T) { svr := &FaultRecoverService{ eventCtl: map[string]*EventController{ fakeJobID1: {jobInfo: common.JobBaseInfo{ - RecoverConfig: common.RecoverConfig{GraceExit: false}}, + RecoverConfig: common.RecoverConfig{SubHealthyStrategy: ""}}, }, }, } diff --git a/component/clusterd/pkg/domain/common/type.go b/component/clusterd/pkg/domain/common/type.go index 583af9d17..1c801ac75 100644 --- a/component/clusterd/pkg/domain/common/type.go +++ b/component/clusterd/pkg/domain/common/type.go @@ -42,7 +42,7 @@ type RecoverConfig struct { ProcessRecoverEnable bool MindXConfigStrategies []string PlatFormMode bool - GraceExit bool + SubHealthyStrategy string } // JobBaseInfo job base info diff --git a/component/clusterd/pkg/domain/common/utils.go b/component/clusterd/pkg/domain/common/utils.go index 7f72eb232..ba6669289 100644 --- a/component/clusterd/pkg/domain/common/utils.go +++ b/component/clusterd/pkg/domain/common/utils.go @@ -118,9 +118,12 @@ func GetRecoverBaseInfo(name, namespace string) (RecoverConfig, RespCode, error) strategy, ok := pg.Labels[constant.SubHealthyStrategy] if !ok { hwlog.RunLog.Debugf("can not find subHealthyStrategy label") - config.GraceExit = false + config.SubHealthyStrategy = "" } - config.GraceExit = strategy == constant.SubHealthyGraceExit + if strategy != "forceExit" && strategy != "graceExit" && strategy != "ignore" { + strategy = "ignore" + } + config.SubHealthyStrategy = strategy return config, OK, nil } diff --git a/component/clusterd/pkg/domain/common/utils_test.go b/component/clusterd/pkg/domain/common/utils_test.go index bd1464376..efdf232d4 100644 --- a/component/clusterd/pkg/domain/common/utils_test.go +++ b/component/clusterd/pkg/domain/common/utils_test.go @@ -309,7 +309,7 @@ func TestGetRecoverBaseInfo(t *testing.T) { convey.So(err, convey.ShouldBeNil) convey.So(code, convey.ShouldEqual, OK) convey.So(config.ProcessRecoverEnable, convey.ShouldBeFalse) - convey.So(config.GraceExit, convey.ShouldBeFalse) + convey.So(config.SubHealthyStrategy, convey.ShouldEqual, "") convey.So(config.PlatFormMode, convey.ShouldBeFalse) }) convey.Convey("case get pod group success, and process-recover-enable on", func() { @@ -325,7 +325,7 @@ func TestGetRecoverBaseInfo(t *testing.T) { convey.So(err, convey.ShouldBeNil) convey.So(code, convey.ShouldEqual, OK) convey.So(config.ProcessRecoverEnable, convey.ShouldBeTrue) - convey.So(config.GraceExit, convey.ShouldBeTrue) + convey.So(config.SubHealthyStrategy, convey.ShouldEqual, "graceExit") convey.So(config.PlatFormMode, convey.ShouldBeFalse) }) addTestCaseForLabelNotExist(info.PgName, info.Namespace) -- Gitee From 067b461aec8ae35b239d08e0d84fe14e4b4edb07 Mon Sep 17 00:00:00 2001 From: chengjunhua Date: Tue, 24 Jun 2025 18:08:58 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AA=AC?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E4=BA=9E=E5=81=A5=E5=BA=B7?= =?UTF-8?q?=E6=95=85=E9=9A=9C=E8=99=95=E7=90=86=E7=9A=84=E5=95=8F=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pkg/application/recover/fault_recover_service.go | 3 ++- component/clusterd/pkg/common/constant/const.go | 4 ++++ component/clusterd/pkg/domain/common/utils.go | 5 +++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/component/clusterd/pkg/application/recover/fault_recover_service.go b/component/clusterd/pkg/application/recover/fault_recover_service.go index ef20b4eab..f1d145f66 100644 --- a/component/clusterd/pkg/application/recover/fault_recover_service.go +++ b/component/clusterd/pkg/application/recover/fault_recover_service.go @@ -60,7 +60,8 @@ func (s *FaultRecoverService) notifyFaultInfoForJob(faultInfo constant.JobFaultI return } hwlog.RunLog.Infof("get fault info from fault center=%v", faultInfo) - if faultInfo.HealthyState == constant.SubHealthyState && controller.jobInfo.SubHealthyStrategy == "ignore" { + if faultInfo.HealthyState == constant.SubHealthyState && + controller.jobInfo.SubHealthyStrategy == constant.SubHealthyIgnore { hwlog.RunLog.Infof("jobId=%s skip handle subHealthy faults", faultInfo.JobId) return } diff --git a/component/clusterd/pkg/common/constant/const.go b/component/clusterd/pkg/common/constant/const.go index e67f0e2f1..301790ca4 100644 --- a/component/clusterd/pkg/common/constant/const.go +++ b/component/clusterd/pkg/common/constant/const.go @@ -38,6 +38,10 @@ const ( SubHealthyStrategy = "subHealthyStrategy" // SubHealthyGraceExit strategy name of grace exit SubHealthyGraceExit = "graceExit" + // SubHealthyIgnore strategy name of ignore + SubHealthyIgnore = "ignore" + // SubHealthyForceExit strategy name of force exit + SubHealthyForceExit = "forceExit" // HealthyState state of Healthy HealthyState = "Healthy" // UnHealthyState state of unHealthy diff --git a/component/clusterd/pkg/domain/common/utils.go b/component/clusterd/pkg/domain/common/utils.go index ba6669289..e818b1aca 100644 --- a/component/clusterd/pkg/domain/common/utils.go +++ b/component/clusterd/pkg/domain/common/utils.go @@ -120,8 +120,9 @@ func GetRecoverBaseInfo(name, namespace string) (RecoverConfig, RespCode, error) hwlog.RunLog.Debugf("can not find subHealthyStrategy label") config.SubHealthyStrategy = "" } - if strategy != "forceExit" && strategy != "graceExit" && strategy != "ignore" { - strategy = "ignore" + if strategy != constant.SubHealthyIgnore && strategy != constant.SubHealthyGraceExit && + strategy != constant.SubHealthyForceExit { + strategy = constant.SubHealthyIgnore } config.SubHealthyStrategy = strategy return config, OK, nil -- Gitee From 202edcbe0f2bdb5a7b85c3ba1642523c893626c5 Mon Sep 17 00:00:00 2001 From: chengjunhua Date: Tue, 24 Jun 2025 18:35:22 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AA=AC?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E4=BA=9E=E5=81=A5=E5=BA=B7?= =?UTF-8?q?=E6=95=85=E9=9A=9C=E8=99=95=E7=90=86=E7=9A=84=E5=95=8F=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pkg/application/recover/fault_recover_service_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/component/clusterd/pkg/application/recover/fault_recover_service_test.go b/component/clusterd/pkg/application/recover/fault_recover_service_test.go index dcc4fd078..257cda95e 100644 --- a/component/clusterd/pkg/application/recover/fault_recover_service_test.go +++ b/component/clusterd/pkg/application/recover/fault_recover_service_test.go @@ -31,7 +31,7 @@ func TestNotifyFaultInfoForJob(t *testing.T) { svr := &FaultRecoverService{ eventCtl: map[string]*EventController{ fakeJobID1: {jobInfo: common.JobBaseInfo{ - RecoverConfig: common.RecoverConfig{SubHealthyStrategy: ""}}, + RecoverConfig: common.RecoverConfig{SubHealthyStrategy: constant.SubHealthyIgnore}}, }, }, } @@ -41,7 +41,7 @@ func TestNotifyFaultInfoForJob(t *testing.T) { svr.notifyFaultInfoForJob(info) convey.So(svr.eventCtl[mockJob], convey.ShouldBeNil) }) - convey.Convey("02-subHealthy fault and not graceExit, should not add event", func() { + convey.Convey("02-subHealthy fault and ignore, should not add event", func() { mockJob := fakeJobID1 info := constant.JobFaultInfo{ JobId: mockJob, -- Gitee From 0cee18922a1f786f0e3e44655ffcece46f4226e5 Mon Sep 17 00:00:00 2001 From: chengjunhua Date: Tue, 24 Jun 2025 18:42:47 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AA=AC?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E4=BA=9E=E5=81=A5=E5=BA=B7?= =?UTF-8?q?=E6=95=85=E9=9A=9C=E8=99=95=E7=90=86=E7=9A=84=E5=95=8F=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/clusterd/pkg/domain/common/utils_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/clusterd/pkg/domain/common/utils_test.go b/component/clusterd/pkg/domain/common/utils_test.go index efdf232d4..f92c320ba 100644 --- a/component/clusterd/pkg/domain/common/utils_test.go +++ b/component/clusterd/pkg/domain/common/utils_test.go @@ -309,7 +309,7 @@ func TestGetRecoverBaseInfo(t *testing.T) { convey.So(err, convey.ShouldBeNil) convey.So(code, convey.ShouldEqual, OK) convey.So(config.ProcessRecoverEnable, convey.ShouldBeFalse) - convey.So(config.SubHealthyStrategy, convey.ShouldEqual, "") + convey.So(config.SubHealthyStrategy, convey.ShouldEqual, constant.SubHealthyIgnore) convey.So(config.PlatFormMode, convey.ShouldBeFalse) }) convey.Convey("case get pod group success, and process-recover-enable on", func() { -- Gitee From 4aec47c6e5d627c79a180a3ba394a85f800f4ef2 Mon Sep 17 00:00:00 2001 From: chengjunhua Date: Tue, 24 Jun 2025 19:13:57 +0800 Subject: [PATCH 5/5] =?UTF-8?q?=E3=80=90=E4=BF=AE=E6=94=B9=E8=AA=AC?= =?UTF-8?q?=E6=98=8E=E3=80=91=E4=BF=AE=E6=94=B9=E4=BA=9E=E5=81=A5=E5=BA=B7?= =?UTF-8?q?=E6=95=85=E9=9A=9C=E8=99=95=E7=90=86=E7=9A=84=E5=95=8F=E9=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/clusterd/pkg/domain/common/utils_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/clusterd/pkg/domain/common/utils_test.go b/component/clusterd/pkg/domain/common/utils_test.go index f92c320ba..211b07937 100644 --- a/component/clusterd/pkg/domain/common/utils_test.go +++ b/component/clusterd/pkg/domain/common/utils_test.go @@ -325,7 +325,7 @@ func TestGetRecoverBaseInfo(t *testing.T) { convey.So(err, convey.ShouldBeNil) convey.So(code, convey.ShouldEqual, OK) convey.So(config.ProcessRecoverEnable, convey.ShouldBeTrue) - convey.So(config.SubHealthyStrategy, convey.ShouldEqual, "graceExit") + convey.So(config.SubHealthyStrategy, convey.ShouldEqual, constant.SubHealthyGraceExit) convey.So(config.PlatFormMode, convey.ShouldBeFalse) }) addTestCaseForLabelNotExist(info.PgName, info.Namespace) -- Gitee