From f872b7787ee421e58f3bde91761df35c4e43810f Mon Sep 17 00:00:00 2001 From: Lianjun Zhang Atlas Date: Wed, 15 Jan 2025 15:29:48 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=95=85=E9=9A=9C?= =?UTF-8?q?=E4=B8=8A=E6=8A=A5=E9=98=BB=E5=A1=9E=E7=82=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/ascend-device-plugin/pkg/device/ascendcommon.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/ascend-device-plugin/pkg/device/ascendcommon.go b/component/ascend-device-plugin/pkg/device/ascendcommon.go index 212f16fab..e5cbcdd8b 100644 --- a/component/ascend-device-plugin/pkg/device/ascendcommon.go +++ b/component/ascend-device-plugin/pkg/device/ascendcommon.go @@ -1020,7 +1020,7 @@ func (tool *AscendTools) writeNewFaultCode(deviceMap map[string][]*common.NpuDev func (tool *AscendTools) flushFaultCodesWithInit(device *common.NpuDevice, devFaultInfoMap map[int32][]npuCommon.DevFaultInfo) { if devFaultInfo, ok := devFaultInfoMap[device.LogicID]; ok { - tool.writeFaultToEvent(devFaultInfo) + go tool.writeFaultToEvent(devFaultInfo) } common.SetNewFaultAndCacheOnceRecoverFault(device.LogicID, devFaultInfoMap[device.LogicID], device) common.SetNetworkNewFaultAndCacheOnceRecoverFault(device.LogicID, devFaultInfoMap[device.LogicID], device) -- Gitee From 4c154bbe80e65c4c87637a3409151d205171403b Mon Sep 17 00:00:00 2001 From: Lianjun Zhang Atlas Date: Wed, 15 Jan 2025 15:32:45 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=95=85=E9=9A=9C?= =?UTF-8?q?=E4=B8=8A=E6=8A=A5=E9=98=BB=E5=A1=9E=E7=82=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/ascend-device-plugin/pkg/common/constants.go | 5 +++++ component/ascend-device-plugin/pkg/device/ascendcommon.go | 1 + 2 files changed, 6 insertions(+) diff --git a/component/ascend-device-plugin/pkg/common/constants.go b/component/ascend-device-plugin/pkg/common/constants.go index e6fd3b3d1..3d7ff4735 100644 --- a/component/ascend-device-plugin/pkg/common/constants.go +++ b/component/ascend-device-plugin/pkg/common/constants.go @@ -840,3 +840,8 @@ const ( // TimeMilliseconds indicate how many milliseconds are there in a second TimeMilliseconds = 1000 ) + +const ( + // WriteK8sEventIntervalInMillSec write fault event to k8s event by 100 ms + WriteK8sEventIntervalInMillSec = 100 +) diff --git a/component/ascend-device-plugin/pkg/device/ascendcommon.go b/component/ascend-device-plugin/pkg/device/ascendcommon.go index e5cbcdd8b..066c3e96b 100644 --- a/component/ascend-device-plugin/pkg/device/ascendcommon.go +++ b/component/ascend-device-plugin/pkg/device/ascendcommon.go @@ -1182,6 +1182,7 @@ func (tool *AscendTools) writeFaultToEvent(devFaultInfo []npuCommon.DevFaultInfo hwlog.RunLog.Errorf("failed to write device fault to event, %v", err) continue } + time.Sleep(common.WriteK8sEventIntervalInMillSec * time.Millisecond) } } -- Gitee