From b05b7d5b2643f4952f57d66387eee2bbc4560d0c Mon Sep 17 00:00:00 2001 From: wangjun Date: Sun, 17 Aug 2025 10:40:35 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E3=80=90clusterd=E3=80=91=E3=80=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=E3=80=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?L2=E7=BA=A7=E5=88=AB=E6=95=85=E9=9A=9C=E4=B8=8A=E6=8A=A5?= =?UTF-8?q?=E5=92=8C=E5=90=8E=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/clusterd/pkg/interface/grpc/fault/fault.proto | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/component/clusterd/pkg/interface/grpc/fault/fault.proto b/component/clusterd/pkg/interface/grpc/fault/fault.proto index 44529d40c..0f55ed05f 100644 --- a/component/clusterd/pkg/interface/grpc/fault/fault.proto +++ b/component/clusterd/pkg/interface/grpc/fault/fault.proto @@ -34,6 +34,12 @@ message NodeFaultInfo { repeated DeviceFaultInfo faultDevice = 5; } +message DeviceFaultCodeInfo { + string faultCode = 1; + string chipId = 2; + string portId = 3; +} + message DeviceFaultInfo { string deviceId = 1; string deviceType = 2; @@ -41,6 +47,7 @@ message DeviceFaultInfo { string faultLevel = 4; repeated string faultType = 5; repeated string faultReason = 6; + repeated DeviceFaultCodeInfo faultCodeInfo = 7; } service Fault { -- Gitee From 6165af16187389acf9e4f28f0f9c2889bdbda3da Mon Sep 17 00:00:00 2001 From: wangjun Date: Sun, 17 Aug 2025 17:21:06 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E3=80=90clusterd=E3=80=91=E3=80=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=E3=80=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?L2=E7=BA=A7=E5=88=AB=E6=95=85=E9=9A=9C=E4=B8=8A=E6=8A=A5?= =?UTF-8?q?=E5=92=8C=E5=90=8E=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../faultrank/job_fault_rank_processor.go | 74 ++++++++++++++----- .../relationfault/relation_fault_processor.go | 8 +- .../clusterd/pkg/common/constant/constants.go | 4 + .../clusterd/pkg/common/constant/type.go | 14 ++++ .../pkg/interface/grpc/fault/fault.pb.go | 10 +++ 5 files changed, 90 insertions(+), 20 deletions(-) diff --git a/component/clusterd/pkg/application/faultmanager/jobprocess/faultrank/job_fault_rank_processor.go b/component/clusterd/pkg/application/faultmanager/jobprocess/faultrank/job_fault_rank_processor.go index 1df9eafe1..eb03b97f0 100644 --- a/component/clusterd/pkg/application/faultmanager/jobprocess/faultrank/job_fault_rank_processor.go +++ b/component/clusterd/pkg/application/faultmanager/jobprocess/faultrank/job_fault_rank_processor.go @@ -297,8 +297,15 @@ func (processor *jobRankFaultInfoProcessor) findNodeDeviceAndSwitchFault( if node == nil || !faultdomain.IsNodeReady(node) { hwlog.RunLog.Debugf("node %s is not ready", nodeName) faultList = append(faultList, serverHcclToFaultRank(server, info, "")...) - faultDeviceList = append(faultDeviceList, convertToFaultDevice(&server, "", - constant.SeparateNPU, constant.EmptyDeviceId, constant.FaultTypeNode)) + param := constant.FaultCovertParam{ + DeviceId: constant.EmptyDeviceId, + FaultCode: "", + FaultLevel: constant.SeparateNPU, + DeviceType: constant.FaultTypeNode, + ChipId: constant.EmptyChipId, + PortId: constant.EmptyPortId, + } + faultDeviceList = append(faultDeviceList, convertToFaultDevice(&server, param)) } advanceDeviceInfo := deviceCmForNodeMap[nodeName] faultRankList := processor.findFaultRankForJob(advanceDeviceInfo, nodeName, serverList, info) @@ -314,9 +321,10 @@ func getFaultDeviceInfoByRelationFault(jobId, nodeName string, server *constant. hwlog.RunLog.Debugf("jobId: %s, nodeName: %s, relationFaultList: %v", jobId, nodeName, relationFaultList) faultList := make([]constant.FaultDevice, 0) for _, fault := range relationFaultList { - faultType, deviceId := "", "" + faultType, deviceId, chipId, portId := "", "", constant.EmptyChipId, constant.EmptyPortId if fault.FaultType == constant.SwitchFaultType { - faultType, deviceId = constant.FaultTypeSwitch, constant.EmptyDeviceId + faultType, deviceId, chipId, portId = constant.FaultTypeSwitch, constant.EmptyDeviceId, + fault.FaultChipId, fault.FaultPortId } else if fault.FaultType == constant.DeviceFaultType { targetLength := 2 if fields := strings.Split(fault.NPUName, constant.Minus); len(fields) == targetLength { @@ -331,8 +339,15 @@ func getFaultDeviceInfoByRelationFault(jobId, nodeName string, server *constant. hwlog.RunLog.Warnf("relation fault type:[%s] is unknown", fault.FaultType) continue } - faultList = append(faultList, convertToFaultDevice(server, fault.FaultCode, fault.ExecutedStrategy, - deviceId, faultType)) + param := constant.FaultCovertParam{ + DeviceId: deviceId, + FaultCode: fault.FaultCode, + FaultLevel: fault.ExecutedStrategy, + DeviceType: faultType, + ChipId: chipId, + PortId: portId, + } + faultList = append(faultList, convertToFaultDevice(server, param)) } return faultList } @@ -344,8 +359,15 @@ func getFautDeviceInfoByFaultRank(server *constant.ServerHccl, } faultList := make([]constant.FaultDevice, 0) for _, faultRank := range faultRankList { - faultList = append(faultList, convertToFaultDevice(server, faultRank.FaultCode, faultRank.FaultLevel, - faultRank.DeviceId, constant.FaultTypeNPU)) + param := constant.FaultCovertParam{ + DeviceId: faultRank.DeviceId, + FaultCode: faultRank.FaultCode, + FaultLevel: faultRank.FaultLevel, + DeviceType: constant.FaultTypeNPU, + ChipId: constant.EmptyChipId, + PortId: constant.EmptyPortId, + } + faultList = append(faultList, convertToFaultDevice(server, param)) } return faultList } @@ -357,9 +379,15 @@ func getFaultDeviceInfoByNodeInfo(server *constant.ServerHccl, nodeInfo *constan faultList := make([]constant.FaultDevice, 0) for _, faultDev := range nodeInfo.FaultDevList { for _, faultCode := range faultDev.FaultCode { - deviceId := strconv.FormatInt(faultDev.DeviceId, constant.FormatBase) - faultList = append(faultList, convertToFaultDevice(server, faultCode, faultDev.FaultLevel, - deviceId, faultDev.DeviceType)) + param := constant.FaultCovertParam{ + DeviceId: strconv.FormatInt(faultDev.DeviceId, constant.FormatBase), + FaultCode: faultCode, + FaultLevel: faultDev.FaultLevel, + DeviceType: faultDev.DeviceType, + ChipId: constant.EmptyChipId, + PortId: constant.EmptyPortId, + } + faultList = append(faultList, convertToFaultDevice(server, param)) } } return faultList @@ -372,22 +400,30 @@ func getFaultDeviceInfoBySwitchInfo(server *constant.ServerHccl, } faultList := make([]constant.FaultDevice, 0) for _, faultInfo := range switchInfo.SwitchFaultInfo.FaultInfo { - faultList = append(faultList, convertToFaultDevice(server, faultInfo.AssembledFaultCode, - switchInfo.SwitchFaultInfo.FaultLevel, constant.EmptyDeviceId, constant.FaultTypeSwitch)) + param := constant.FaultCovertParam{ + DeviceId: constant.EmptyDeviceId, + FaultCode: faultInfo.AssembledFaultCode, + FaultLevel: switchInfo.SwitchFaultInfo.FaultLevel, + DeviceType: constant.FaultTypeSwitch, + ChipId: strconv.FormatUint(uint64(faultInfo.SwitchChipId), constant.FormatBase), + PortId: strconv.FormatUint(uint64(faultInfo.SwitchPortId), constant.FormatBase), + } + faultList = append(faultList, convertToFaultDevice(server, param)) } return faultList } -func convertToFaultDevice(server *constant.ServerHccl, faultCode, - faultLevel, deviceId, deviceType string) constant.FaultDevice { +func convertToFaultDevice(server *constant.ServerHccl, param constant.FaultCovertParam) constant.FaultDevice { return constant.FaultDevice{ ServerName: server.ServerName, ServerSN: server.ServerSN, ServerId: server.ServerID, - DeviceId: deviceId, - FaultCode: faultCode, - FaultLevel: faultLevel, - DeviceType: deviceType, + DeviceId: param.DeviceId, + FaultCode: param.FaultCode, + FaultLevel: param.FaultLevel, + DeviceType: param.DeviceType, + ChipId: param.ChipId, + PortId: param.PortId, } } diff --git a/component/clusterd/pkg/application/faultmanager/jobprocess/relationfault/relation_fault_processor.go b/component/clusterd/pkg/application/faultmanager/jobprocess/relationfault/relation_fault_processor.go index 7192b5b65..ab7314bff 100644 --- a/component/clusterd/pkg/application/faultmanager/jobprocess/relationfault/relation_fault_processor.go +++ b/component/clusterd/pkg/application/faultmanager/jobprocess/relationfault/relation_fault_processor.go @@ -6,6 +6,7 @@ package relationfault import ( "encoding/json" "fmt" + "strconv" "sync" "time" @@ -88,6 +89,7 @@ func (processor *relationFaultProcessor) InitFaultJobs() { tmpFaultJob.NameSpace = serverList.PodNameSpace switchInfo, ok := processor.switchInfoCm[constant.SwitchInfoPrefix+nodeName] if ok { + //todo if issubscribe filter L2 fault tmpFaultJob.initBySwitchFault(switchInfo, serverList) } deviceInfo, ok := processor.deviceInfoCm[nodeName] @@ -407,6 +409,8 @@ func (fJob *FaultJob) initFaultInfoByDeviceFault( DealMaxTime: getFaultCodeDelMaxTime(faultCode), FaultUid: nodeName + "-" + fault.NPUName + "-" + faultCode, ForceAdd: fault.ForceAdd, + FaultChipId: constant.EmptyChipId, + FaultPortId: constant.EmptyPortId, } fJob.AllFaultCode.Insert(tmpFaultInfo.FaultUid) fJob.addFaultInfoByCodeType(&tmpFaultInfo) @@ -461,7 +465,9 @@ func (fJob *FaultJob) initBySwitchFault(switchInfo *constant.SwitchInfo, serverL FaultLevel: switchInfo.FaultLevel, FaultUid: serverList.ServerName + "-" + constant.AllCardId + "-" + faultInfo.AssembledFaultCode, - ForceAdd: faultInfo.ForceAdd, + ForceAdd: faultInfo.ForceAdd, + FaultChipId: strconv.FormatUint(uint64(faultInfo.SwitchChipId), constant.FormatBase), + FaultPortId: strconv.FormatUint(uint64(faultInfo.SwitchPortId), constant.FormatBase), } fJob.AllFaultCode.Insert(tmpFaultInfo.FaultUid) fJob.addFaultInfoByCodeType(&tmpFaultInfo) diff --git a/component/clusterd/pkg/common/constant/constants.go b/component/clusterd/pkg/common/constant/constants.go index 835ab116c..63e072894 100644 --- a/component/clusterd/pkg/common/constant/constants.go +++ b/component/clusterd/pkg/common/constant/constants.go @@ -226,6 +226,10 @@ const ( EmptyDeviceId = "-1" // FormatBase The base number used to convert int to string FormatBase = 10 + // EmptyChipId chip id for node or device fault + EmptyChipId = "-1" + // EmptyPortId port id for node or device fault + EmptyPortId ) // ras feature const diff --git a/component/clusterd/pkg/common/constant/type.go b/component/clusterd/pkg/common/constant/type.go index 1c3d480cf..814eb13a9 100644 --- a/component/clusterd/pkg/common/constant/type.go +++ b/component/clusterd/pkg/common/constant/type.go @@ -309,6 +309,8 @@ type FaultDevice struct { FaultCode string FaultLevel string DeviceType string + ChipId string + PortId string } // FaultStrategy fault strategies @@ -335,6 +337,8 @@ type FaultInfo struct { ExecutedStrategy string DealMaxTime int64 ForceAdd bool + FaultChipId string + FaultPortId string } // FaultDuration fault duration config @@ -441,3 +445,13 @@ type SimplePodInfo struct { PodUid string PodRank string } + +// FaultCovertParam fault convert param +type FaultCovertParam struct { + DeviceId string + FaultCode string + FaultLevel string + DeviceType string + ChipId string + PortId string +} diff --git a/component/clusterd/pkg/interface/grpc/fault/fault.pb.go b/component/clusterd/pkg/interface/grpc/fault/fault.pb.go index 424655f2c..aaa43d78f 100644 --- a/component/clusterd/pkg/interface/grpc/fault/fault.pb.go +++ b/component/clusterd/pkg/interface/grpc/fault/fault.pb.go @@ -303,6 +303,15 @@ func (m *NodeFaultInfo) GetFaultDevice() []*DeviceFaultInfo { return nil } +type DeviceFaultCodeInfo struct { + FaultCode string `protobuf:"bytes,1,opt,name=faultCode,proto3" json:"faultCode,omitempty"` + ChipId string `protobuf:"bytes,2,opt,name=chipId,proto3" json:"chipId,omitempty"` + PortId string `protobuf:"bytes,3,opt,name=portId,proto3" json:"portId,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + type DeviceFaultInfo struct { DeviceId string `protobuf:"bytes,1,opt,name=deviceId,proto3" json:"deviceId,omitempty"` DeviceType string `protobuf:"bytes,2,opt,name=deviceType,proto3" json:"deviceType,omitempty"` @@ -310,6 +319,7 @@ type DeviceFaultInfo struct { FaultLevel string `protobuf:"bytes,4,opt,name=faultLevel,proto3" json:"faultLevel,omitempty"` FaultType []string `protobuf:"bytes,5,rep,name=faultType,proto3" json:"faultType,omitempty"` FaultReason []string `protobuf:"bytes,6,rep,name=faultReason,proto3" json:"faultReason,omitempty"` + FaultCodeInfo []DeviceFaultCodeInfo `protobuf:"bytes,7,rep,name=faultCodeInfo,proto3" json:"faultCodeInfo,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` -- Gitee From a3b896abe9390559d26fcc7346bc9e0246892aa0 Mon Sep 17 00:00:00 2001 From: wangjun Date: Mon, 18 Aug 2025 17:22:24 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E3=80=90clusterd=E3=80=91=E3=80=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=E3=80=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?L2=E7=BA=A7=E5=88=AB=E6=95=85=E9=9A=9C=E4=B8=8A=E6=8A=A5?= =?UTF-8?q?=E5=92=8C=E5=90=8E=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- component/clusterd/pkg/common/constant/constants.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/component/clusterd/pkg/common/constant/constants.go b/component/clusterd/pkg/common/constant/constants.go index 63e072894..fa0f5d8f3 100644 --- a/component/clusterd/pkg/common/constant/constants.go +++ b/component/clusterd/pkg/common/constant/constants.go @@ -229,7 +229,7 @@ const ( // EmptyChipId chip id for node or device fault EmptyChipId = "-1" // EmptyPortId port id for node or device fault - EmptyPortId + EmptyPortId = "-1" ) // ras feature const -- Gitee From 601adae1955abd5e471e5481cbdd867a79fc3272 Mon Sep 17 00:00:00 2001 From: wangjun Date: Mon, 18 Aug 2025 20:02:13 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E3=80=90clusterd=E3=80=91=E3=80=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=E3=80=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?L2=E7=BA=A7=E5=88=AB=E6=95=85=E9=9A=9C=E4=B8=8A=E6=8A=A5?= =?UTF-8?q?=E5=92=8C=E5=90=8E=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pkg/interface/grpc/fault/fault.pb.go | 151 ++++++++++++------ 1 file changed, 104 insertions(+), 47 deletions(-) diff --git a/component/clusterd/pkg/interface/grpc/fault/fault.pb.go b/component/clusterd/pkg/interface/grpc/fault/fault.pb.go index aaa43d78f..6cfadd13c 100644 --- a/component/clusterd/pkg/interface/grpc/fault/fault.pb.go +++ b/component/clusterd/pkg/interface/grpc/fault/fault.pb.go @@ -304,32 +304,78 @@ func (m *NodeFaultInfo) GetFaultDevice() []*DeviceFaultInfo { } type DeviceFaultCodeInfo struct { - FaultCode string `protobuf:"bytes,1,opt,name=faultCode,proto3" json:"faultCode,omitempty"` - ChipId string `protobuf:"bytes,2,opt,name=chipId,proto3" json:"chipId,omitempty"` - PortId string `protobuf:"bytes,3,opt,name=portId,proto3" json:"portId,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` -} - -type DeviceFaultInfo struct { - DeviceId string `protobuf:"bytes,1,opt,name=deviceId,proto3" json:"deviceId,omitempty"` - DeviceType string `protobuf:"bytes,2,opt,name=deviceType,proto3" json:"deviceType,omitempty"` - FaultCodes []string `protobuf:"bytes,3,rep,name=faultCodes,proto3" json:"faultCodes,omitempty"` - FaultLevel string `protobuf:"bytes,4,opt,name=faultLevel,proto3" json:"faultLevel,omitempty"` - FaultType []string `protobuf:"bytes,5,rep,name=faultType,proto3" json:"faultType,omitempty"` - FaultReason []string `protobuf:"bytes,6,rep,name=faultReason,proto3" json:"faultReason,omitempty"` - FaultCodeInfo []DeviceFaultCodeInfo `protobuf:"bytes,7,rep,name=faultCodeInfo,proto3" json:"faultCodeInfo,omitempty"` + FaultCode string `protobuf:"bytes,1,opt,name=faultCode,proto3" json:"faultCode,omitempty"` + ChipId string `protobuf:"bytes,2,opt,name=chipId,proto3" json:"chipId,omitempty"` + PortId string `protobuf:"bytes,3,opt,name=portId,proto3" json:"portId,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` } +func (m *DeviceFaultCodeInfo) Reset() { *m = DeviceFaultCodeInfo{} } +func (m *DeviceFaultCodeInfo) String() string { return proto.CompactTextString(m) } +func (*DeviceFaultCodeInfo) ProtoMessage() {} +func (*DeviceFaultCodeInfo) Descriptor() ([]byte, []int) { + return fileDescriptor_1f6b57b59ad5d7d5, []int{5} +} + +func (m *DeviceFaultCodeInfo) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_DeviceFaultCodeInfo.Unmarshal(m, b) +} +func (m *DeviceFaultCodeInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_DeviceFaultCodeInfo.Marshal(b, m, deterministic) +} +func (m *DeviceFaultCodeInfo) XXX_Merge(src proto.Message) { + xxx_messageInfo_DeviceFaultCodeInfo.Merge(m, src) +} +func (m *DeviceFaultCodeInfo) XXX_Size() int { + return xxx_messageInfo_DeviceFaultCodeInfo.Size(m) +} +func (m *DeviceFaultCodeInfo) XXX_DiscardUnknown() { + xxx_messageInfo_DeviceFaultCodeInfo.DiscardUnknown(m) +} + +var xxx_messageInfo_DeviceFaultCodeInfo proto.InternalMessageInfo + +func (m *DeviceFaultCodeInfo) GetFaultCode() string { + if m != nil { + return m.FaultCode + } + return "" +} + +func (m *DeviceFaultCodeInfo) GetChipId() string { + if m != nil { + return m.ChipId + } + return "" +} + +func (m *DeviceFaultCodeInfo) GetPortId() string { + if m != nil { + return m.PortId + } + return "" +} + +type DeviceFaultInfo struct { + DeviceId string `protobuf:"bytes,1,opt,name=deviceId,proto3" json:"deviceId,omitempty"` + DeviceType string `protobuf:"bytes,2,opt,name=deviceType,proto3" json:"deviceType,omitempty"` + FaultCodes []string `protobuf:"bytes,3,rep,name=faultCodes,proto3" json:"faultCodes,omitempty"` + FaultLevel string `protobuf:"bytes,4,opt,name=faultLevel,proto3" json:"faultLevel,omitempty"` + FaultType []string `protobuf:"bytes,5,rep,name=faultType,proto3" json:"faultType,omitempty"` + FaultReason []string `protobuf:"bytes,6,rep,name=faultReason,proto3" json:"faultReason,omitempty"` + FaultCodeInfo []*DeviceFaultCodeInfo `protobuf:"bytes,7,rep,name=faultCodeInfo,proto3" json:"faultCodeInfo,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + func (m *DeviceFaultInfo) Reset() { *m = DeviceFaultInfo{} } func (m *DeviceFaultInfo) String() string { return proto.CompactTextString(m) } func (*DeviceFaultInfo) ProtoMessage() {} func (*DeviceFaultInfo) Descriptor() ([]byte, []int) { - return fileDescriptor_1f6b57b59ad5d7d5, []int{5} + return fileDescriptor_1f6b57b59ad5d7d5, []int{6} } func (m *DeviceFaultInfo) XXX_Unmarshal(b []byte) error { @@ -392,12 +438,20 @@ func (m *DeviceFaultInfo) GetFaultReason() []string { return nil } +func (m *DeviceFaultInfo) GetFaultCodeInfo() []*DeviceFaultCodeInfo { + if m != nil { + return m.FaultCodeInfo + } + return nil +} + func init() { proto.RegisterType((*FaultQueryResult)(nil), "fault.FaultQueryResult") proto.RegisterType((*Status)(nil), "fault.Status") proto.RegisterType((*ClientInfo)(nil), "fault.ClientInfo") proto.RegisterType((*FaultMsgSignal)(nil), "fault.FaultMsgSignal") proto.RegisterType((*NodeFaultInfo)(nil), "fault.NodeFaultInfo") + proto.RegisterType((*DeviceFaultCodeInfo)(nil), "fault.DeviceFaultCodeInfo") proto.RegisterType((*DeviceFaultInfo)(nil), "fault.DeviceFaultInfo") } @@ -406,34 +460,37 @@ func init() { } var fileDescriptor_1f6b57b59ad5d7d5 = []byte{ - // 454 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x93, 0xcf, 0x6e, 0x13, 0x31, - 0x10, 0xc6, 0xe3, 0x26, 0x1b, 0xba, 0x13, 0x05, 0xa8, 0x55, 0xda, 0x55, 0x85, 0xd0, 0xca, 0xa7, - 0x9c, 0xa2, 0x2a, 0x48, 0x80, 0xe0, 0x04, 0x45, 0x54, 0x91, 0x20, 0x02, 0x87, 0x13, 0xb7, 0x4d, - 0x76, 0x1a, 0x2d, 0x5a, 0xd6, 0xd5, 0xda, 0x5b, 0xa9, 0x2f, 0xc2, 0x9b, 0xf0, 0x04, 0x9c, 0x79, - 0x27, 0xe4, 0xb1, 0xb3, 0xeb, 0x54, 0x95, 0xd2, 0xdb, 0xcc, 0xe7, 0x99, 0xcf, 0xbf, 0xf1, 0x1f, - 0x18, 0x5d, 0x65, 0x4d, 0x69, 0xa6, 0xd7, 0xb5, 0x32, 0x8a, 0x47, 0x94, 0x08, 0x0d, 0x4f, 0x3f, - 0xd9, 0xe0, 0x5b, 0x83, 0xf5, 0xad, 0x44, 0xdd, 0x94, 0x86, 0x73, 0x18, 0xac, 0x55, 0x8e, 0x09, - 0x4b, 0xd9, 0x24, 0x92, 0x14, 0x5b, 0xad, 0xa8, 0xae, 0x54, 0x72, 0x90, 0xb2, 0x49, 0x2c, 0x29, - 0xe6, 0xaf, 0xbd, 0xe3, 0xb2, 0xd8, 0x54, 0x59, 0x99, 0xf4, 0x53, 0x36, 0x19, 0xcd, 0x9e, 0x4d, - 0xdd, 0x2e, 0xe4, 0xfa, 0x45, 0x6f, 0xdc, 0xa2, 0x0c, 0x2b, 0xc5, 0x39, 0x0c, 0x97, 0x26, 0x33, - 0x8d, 0x7e, 0xe8, 0x56, 0xe2, 0x15, 0xc0, 0x45, 0x59, 0x60, 0x65, 0xe6, 0x76, 0xe3, 0x63, 0x88, - 0x7e, 0xaa, 0xd5, 0x3c, 0xa7, 0xb6, 0x58, 0xba, 0xc4, 0xf6, 0xd5, 0xaa, 0xc4, 0x6d, 0x9f, 0x8d, - 0xc5, 0x6f, 0x06, 0x8f, 0x77, 0x49, 0x6c, 0x59, 0xd3, 0x14, 0xdb, 0x5e, 0x8a, 0x3b, 0xc3, 0x83, - 0xd0, 0xf0, 0x05, 0x80, 0xa6, 0x9e, 0xef, 0xb7, 0xd7, 0x48, 0xe3, 0xc5, 0x32, 0x50, 0xf8, 0x5b, - 0x18, 0x57, 0x2a, 0x47, 0xf2, 0xb7, 0x5c, 0xc9, 0x20, 0xed, 0x4f, 0x46, 0xb3, 0x63, 0x7f, 0x02, - 0x8b, 0x70, 0x4d, 0xee, 0x96, 0x8a, 0x3f, 0x0c, 0xc6, 0x3b, 0x05, 0xfc, 0x0c, 0x0e, 0x6d, 0xc9, - 0x22, 0xfb, 0x85, 0x9e, 0xad, 0xcd, 0xf9, 0x09, 0x0c, 0x6d, 0x3c, 0xff, 0xea, 0x01, 0x7d, 0xb6, - 0xd5, 0x97, 0x0b, 0x4f, 0xe7, 0x33, 0x4b, 0x4e, 0x0c, 0x9f, 0xf1, 0x06, 0xcb, 0x64, 0xe0, 0xc8, - 0x3b, 0x85, 0xbf, 0xf1, 0x37, 0xf7, 0x11, 0x6f, 0x8a, 0x35, 0x26, 0x11, 0x71, 0x9f, 0x78, 0x6e, - 0x27, 0x76, 0xe4, 0x61, 0xa9, 0xf8, 0xc7, 0xe0, 0xc9, 0x9d, 0x02, 0x4b, 0x9e, 0x93, 0xd4, 0xde, - 0x48, 0x9b, 0x5b, 0x12, 0x17, 0xd3, 0x19, 0x3a, 0xfa, 0x40, 0x69, 0x49, 0x2f, 0x54, 0x8e, 0x3a, - 0xe9, 0xa7, 0xfd, 0x96, 0x94, 0x94, 0xbd, 0x93, 0x3c, 0x87, 0x98, 0x32, 0xb2, 0x8f, 0xa8, 0xbd, - 0x13, 0x78, 0xea, 0xe7, 0x94, 0x98, 0x69, 0x55, 0x25, 0x43, 0x5a, 0x0f, 0xa5, 0xd9, 0x5f, 0x06, - 0x11, 0x4d, 0xc2, 0xa7, 0x70, 0x28, 0x71, 0x53, 0x68, 0x83, 0x35, 0x3f, 0xf2, 0x47, 0xd1, 0xbd, - 0xb9, 0xb3, 0xb1, 0x97, 0xdc, 0xc3, 0x15, 0x3d, 0x7e, 0x09, 0xa7, 0xcb, 0x66, 0xa5, 0xd7, 0x75, - 0xb1, 0xc2, 0x3b, 0x4f, 0xec, 0x9e, 0xf6, 0xfb, 0xbf, 0x85, 0xe8, 0x9d, 0x33, 0xfe, 0x1e, 0x8e, - 0x2e, 0xd1, 0xec, 0xb7, 0x38, 0x0d, 0x2d, 0x82, 0xff, 0x2a, 0x7a, 0x1f, 0xe2, 0x1f, 0x8f, 0xa6, - 0xef, 0x68, 0x75, 0x35, 0xa4, 0xef, 0xfd, 0xf2, 0x7f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xd4, 0x25, - 0x61, 0x7b, 0xed, 0x03, 0x00, 0x00, + // 505 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x94, 0xdf, 0x6e, 0xd3, 0x30, + 0x14, 0xc6, 0x97, 0xb6, 0xe9, 0x96, 0x53, 0x15, 0x98, 0x19, 0x9b, 0x55, 0x21, 0x14, 0xf9, 0xaa, + 0x57, 0xd5, 0x54, 0x24, 0x40, 0x70, 0x03, 0x0c, 0x31, 0x55, 0x82, 0x0a, 0x5c, 0xae, 0xb8, 0x4b, + 0x13, 0xb7, 0x04, 0x85, 0xb8, 0x8a, 0x9d, 0x49, 0x7b, 0x0a, 0xee, 0x78, 0x13, 0x9e, 0x80, 0x17, + 0x43, 0x3e, 0x76, 0x12, 0x77, 0x4c, 0x1a, 0x77, 0xe7, 0x7c, 0xe7, 0x8f, 0x7f, 0xf9, 0x2c, 0x07, + 0x46, 0x9b, 0xa4, 0x2e, 0xf4, 0x6c, 0x57, 0x49, 0x2d, 0x49, 0x88, 0x09, 0x53, 0xf0, 0xe0, 0xbd, + 0x09, 0x3e, 0xd7, 0xa2, 0xba, 0xe6, 0x42, 0xd5, 0x85, 0x26, 0x04, 0x06, 0xa9, 0xcc, 0x04, 0x0d, + 0xe2, 0x60, 0x1a, 0x72, 0x8c, 0x8d, 0x96, 0x97, 0x1b, 0x49, 0x7b, 0x71, 0x30, 0x8d, 0x38, 0xc6, + 0xe4, 0xb9, 0xdb, 0xb8, 0xca, 0xb7, 0x65, 0x52, 0xd0, 0x7e, 0x1c, 0x4c, 0x47, 0xf3, 0x47, 0x33, + 0x7b, 0x0a, 0x6e, 0xfd, 0xa8, 0xb6, 0xb6, 0xc8, 0xfd, 0x4e, 0x76, 0x0e, 0xc3, 0x95, 0x4e, 0x74, + 0xad, 0xfe, 0xf7, 0x28, 0xf6, 0x0c, 0xe0, 0xa2, 0xc8, 0x45, 0xa9, 0x17, 0xe6, 0xe0, 0x13, 0x08, + 0xbf, 0xcb, 0xf5, 0x22, 0xc3, 0xb1, 0x88, 0xdb, 0xc4, 0xcc, 0x55, 0xb2, 0x10, 0xcd, 0x9c, 0x89, + 0xd9, 0xaf, 0x00, 0xee, 0xed, 0x93, 0x98, 0xb6, 0xba, 0xce, 0x9b, 0x59, 0x8c, 0xbb, 0x85, 0x3d, + 0x7f, 0xe1, 0x13, 0x00, 0x85, 0x33, 0x5f, 0xae, 0x77, 0x02, 0x3f, 0x2f, 0xe2, 0x9e, 0x42, 0x5e, + 0xc2, 0xb8, 0x94, 0x99, 0xc0, 0xfd, 0x86, 0x8b, 0x0e, 0xe2, 0xfe, 0x74, 0x34, 0x3f, 0x71, 0x0e, + 0x2c, 0xfd, 0x1a, 0xdf, 0x6f, 0x65, 0xbf, 0x03, 0x18, 0xef, 0x35, 0x90, 0x09, 0x1c, 0x99, 0x96, + 0x65, 0xf2, 0x43, 0x38, 0xb6, 0x36, 0x27, 0xa7, 0x30, 0x34, 0xf1, 0xe2, 0x93, 0x03, 0x74, 0x59, + 0xa3, 0xaf, 0x96, 0x8e, 0xce, 0x65, 0x86, 0x1c, 0x19, 0x3e, 0x88, 0x2b, 0x51, 0xd0, 0x81, 0x25, + 0xef, 0x14, 0xf2, 0xc2, 0xdd, 0xdc, 0x3b, 0x71, 0x95, 0xa7, 0x82, 0x86, 0xc8, 0x7d, 0xea, 0xb8, + 0xad, 0xd8, 0x91, 0xfb, 0xad, 0x2c, 0x85, 0x87, 0x5e, 0xfd, 0xc2, 0x60, 0x18, 0xf8, 0xc7, 0x10, + 0x6d, 0x1a, 0xc1, 0xd1, 0x77, 0x82, 0xc1, 0x4c, 0xbf, 0xe5, 0xbb, 0xd6, 0x5f, 0x97, 0x19, 0x7d, + 0x27, 0x2b, 0xbd, 0xc8, 0x1a, 0x7c, 0x9b, 0xb1, 0x9f, 0x3d, 0xb8, 0x7f, 0x83, 0xc2, 0xd8, 0x93, + 0xa1, 0xd4, 0x5e, 0x7b, 0x9b, 0x9b, 0xcf, 0xb5, 0x31, 0x5e, 0x94, 0x3d, 0xc3, 0x53, 0x5a, 0x3b, + 0x0c, 0x8c, 0xa2, 0xfd, 0xb8, 0xdf, 0xda, 0x81, 0xca, 0x9d, 0x76, 0x35, 0x5f, 0x87, 0xeb, 0x43, + 0x1c, 0xef, 0x04, 0x12, 0x3b, 0x33, 0xb9, 0x48, 0x94, 0x2c, 0xe9, 0x10, 0xeb, 0xbe, 0x44, 0x5e, + 0xc3, 0x78, 0xe3, 0xdb, 0x45, 0x0f, 0xd1, 0xf0, 0xc9, 0xbf, 0x86, 0x37, 0x1d, 0x7c, 0x7f, 0x60, + 0xfe, 0x27, 0x80, 0x10, 0x1b, 0xc8, 0x0c, 0x8e, 0xb8, 0xd8, 0xe6, 0x4a, 0x8b, 0x8a, 0x1c, 0xbb, + 0x05, 0xdd, 0xd3, 0x98, 0x8c, 0x9d, 0x64, 0xdf, 0x17, 0x3b, 0x20, 0x97, 0x70, 0xb6, 0xaa, 0xd7, + 0x2a, 0xad, 0xf2, 0xb5, 0xb8, 0xf1, 0x12, 0x6e, 0x19, 0xbf, 0xfd, 0xf5, 0xb2, 0x83, 0xf3, 0x80, + 0xbc, 0x81, 0xe3, 0x4b, 0xa1, 0xef, 0x5e, 0x71, 0xe6, 0xaf, 0xf0, 0x7e, 0x2b, 0xec, 0xe0, 0x6d, + 0xf4, 0xf5, 0x70, 0xf6, 0x0a, 0xab, 0xeb, 0x21, 0xfe, 0x85, 0x9e, 0xfe, 0x0d, 0x00, 0x00, 0xff, + 0xff, 0x7f, 0x6c, 0x68, 0x5e, 0x94, 0x04, 0x00, 0x00, } -- Gitee From cc6cc0ef6e8a8938ab88cee26f4750e46014182b Mon Sep 17 00:00:00 2001 From: wangjun Date: Tue, 19 Aug 2025 16:41:50 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E3=80=90clusterd=E3=80=91=E3=80=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=E3=80=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?L2=E7=BA=A7=E5=88=AB=E6=95=85=E9=9A=9C=E4=B8=8A=E6=8A=A5?= =?UTF-8?q?=E5=92=8C=E5=90=8E=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../application/fault/publish_fault_plugin.go | 1 + .../cluster_fault_processor.go | 32 ++++++-- .../pkg/interface/grpc/fault/fault.pb.go | 81 ++++++++++--------- .../pkg/interface/grpc/fault/fault.proto | 3 +- 4 files changed, 73 insertions(+), 44 deletions(-) diff --git a/component/clusterd/pkg/application/fault/publish_fault_plugin.go b/component/clusterd/pkg/application/fault/publish_fault_plugin.go index c73dfeb4d..99c8ede09 100644 --- a/component/clusterd/pkg/application/fault/publish_fault_plugin.go +++ b/component/clusterd/pkg/application/fault/publish_fault_plugin.go @@ -206,6 +206,7 @@ func getFaultDeviceInfo(faultList []constant.FaultDevice) (*fault.DeviceFaultInf } } info.FaultLevel = getStateByLevel(maxLevel) + //todo info.FaultCodeInfos RemoveDuplicates ? if len(info.FaultCodes) > 1 { info.FaultCodes = util.RemoveDuplicates(info.FaultCodes) sort.Strings(info.FaultCodes) diff --git a/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go b/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go index 678c8f041..f0cbfadba 100644 --- a/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go +++ b/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go @@ -147,10 +147,11 @@ func getNodeReadyFaultInfo(nodeName string) *fault.DeviceFaultInfo { nodeStatus := kube.GetNode(nodeName) if nodeStatus == nil || !faultdomain.IsNodeReady(nodeStatus) { return &fault.DeviceFaultInfo{ - DeviceId: constant.EmptyDeviceId, - DeviceType: constant.FaultTypeNode, - FaultCodes: nil, - FaultLevel: constant.UnHealthyState, + DeviceId: constant.EmptyDeviceId, + DeviceType: constant.FaultTypeNode, + FaultCodes: nil, + FaultLevel: constant.UnHealthyState, + FaultCodeInfos: nil, } } return nil @@ -191,11 +192,19 @@ func getSwitchFaultInfo(switchInfo *constant.SwitchInfo) []*fault.DeviceFaultInf DeviceType: constant.FaultTypeSwitch, } faultCodes := make([]string, 0) + faultCodeInfos := make([]*fault.DeviceFaultCodeInfo, 0) for _, device := range switchInfo.SwitchFaultInfo.FaultInfo { faultCodes = append(faultCodes, device.AssembledFaultCode) + faultCodeInfos = append(faultCodeInfos, &fault.DeviceFaultCodeInfo{ + FaultCode: device.AssembledFaultCode, + ChipId: strconv.FormatUint(uint64(device.SwitchChipId), constant.FormatBase), + PortId: strconv.FormatUint(uint64(device.SwitchPortId), constant.FormatBase), + FaultTime: strconv.FormatInt(device.AlarmRaisedTime, constant.FormatBase), + }) } allFault.FaultCodes = faultCodes allFault.FaultLevel = switchInfo.NodeStatus + allFault.FaultCodeInfos = faultCodeInfos return []*fault.DeviceFaultInfo{&allFault} } @@ -216,19 +225,28 @@ func getNpuDeviceFaultInfo(deviceCm *constant.AdvanceDeviceFaultCm) []*fault.Dev FaultReason: nil, } - deviceFaultCodes, maxFaultLevl := getDeviceFaultInfo(deviceFaults) + deviceFaultCodes, maxFaultLevl, deviceFaultCodeInfos := getDeviceFaultInfo(deviceFaults) deviceFault.FaultCodes = deviceFaultCodes + deviceFault.FaultCodeInfos = deviceFaultCodeInfos deviceFault.FaultLevel = maxFaultLevl faultsOnDevice = append(faultsOnDevice, &deviceFault) } return faultsOnDevice } -func getDeviceFaultInfo(deviceFaults []constant.DeviceFault) ([]string, string) { +func getDeviceFaultInfo(deviceFaults []constant.DeviceFault) ([]string, string, []*fault.DeviceFaultCodeInfo) { maxFaultLevel := constant.HealthyState faultCode := make([]string, 0) + faultCodeInfos := make([]*fault.DeviceFaultCodeInfo, 0) for _, faultMsg := range deviceFaults { faultCode = append(faultCode, faultMsg.FaultCode) + faultTime := faultMsg.FaultTimeAndLevelMap[faultMsg.FaultCode].FaultTime + faultCodeInfos = append(faultCodeInfos, &fault.DeviceFaultCodeInfo{ + FaultCode: faultMsg.FaultCode, + ChipId: constant.EmptyChipId, + PortId: constant.EmptyPortId, + FaultTime: strconv.FormatInt(faultTime, constant.FormatBase), + }) faultLevel, ok := faultLevelMap[faultMsg.FaultLevel] if !ok { maxFaultLevel = constant.UnHealthyState @@ -237,5 +255,5 @@ func getDeviceFaultInfo(deviceFaults []constant.DeviceFault) ([]string, string) maxFaultLevel = constant.SubHealthyState } } - return faultCode, maxFaultLevel + return faultCode, maxFaultLevel, faultCodeInfos } diff --git a/component/clusterd/pkg/interface/grpc/fault/fault.pb.go b/component/clusterd/pkg/interface/grpc/fault/fault.pb.go index 6cfadd13c..aed093a3e 100644 --- a/component/clusterd/pkg/interface/grpc/fault/fault.pb.go +++ b/component/clusterd/pkg/interface/grpc/fault/fault.pb.go @@ -307,6 +307,7 @@ type DeviceFaultCodeInfo struct { FaultCode string `protobuf:"bytes,1,opt,name=faultCode,proto3" json:"faultCode,omitempty"` ChipId string `protobuf:"bytes,2,opt,name=chipId,proto3" json:"chipId,omitempty"` PortId string `protobuf:"bytes,3,opt,name=portId,proto3" json:"portId,omitempty"` + FaultTime string `protobuf:"bytes,4,opt,name=faultTime,proto3" json:"faultTime,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -358,6 +359,13 @@ func (m *DeviceFaultCodeInfo) GetPortId() string { return "" } +func (m *DeviceFaultCodeInfo) GetFaultTime() string { + if m != nil { + return m.FaultTime + } + return "" +} + type DeviceFaultInfo struct { DeviceId string `protobuf:"bytes,1,opt,name=deviceId,proto3" json:"deviceId,omitempty"` DeviceType string `protobuf:"bytes,2,opt,name=deviceType,proto3" json:"deviceType,omitempty"` @@ -365,7 +373,7 @@ type DeviceFaultInfo struct { FaultLevel string `protobuf:"bytes,4,opt,name=faultLevel,proto3" json:"faultLevel,omitempty"` FaultType []string `protobuf:"bytes,5,rep,name=faultType,proto3" json:"faultType,omitempty"` FaultReason []string `protobuf:"bytes,6,rep,name=faultReason,proto3" json:"faultReason,omitempty"` - FaultCodeInfo []*DeviceFaultCodeInfo `protobuf:"bytes,7,rep,name=faultCodeInfo,proto3" json:"faultCodeInfo,omitempty"` + FaultCodeInfos []*DeviceFaultCodeInfo `protobuf:"bytes,7,rep,name=faultCodeInfos,proto3" json:"faultCodeInfos,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` @@ -438,9 +446,9 @@ func (m *DeviceFaultInfo) GetFaultReason() []string { return nil } -func (m *DeviceFaultInfo) GetFaultCodeInfo() []*DeviceFaultCodeInfo { +func (m *DeviceFaultInfo) GetFaultCodeInfos() []*DeviceFaultCodeInfo { if m != nil { - return m.FaultCodeInfo + return m.FaultCodeInfos } return nil } @@ -460,37 +468,38 @@ func init() { } var fileDescriptor_1f6b57b59ad5d7d5 = []byte{ - // 505 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x94, 0xdf, 0x6e, 0xd3, 0x30, - 0x14, 0xc6, 0x97, 0xb6, 0xe9, 0x96, 0x53, 0x15, 0x98, 0x19, 0x9b, 0x55, 0x21, 0x14, 0xf9, 0xaa, - 0x57, 0xd5, 0x54, 0x24, 0x40, 0x70, 0x03, 0x0c, 0x31, 0x55, 0x82, 0x0a, 0x5c, 0xae, 0xb8, 0x4b, - 0x13, 0xb7, 0x04, 0x85, 0xb8, 0x8a, 0x9d, 0x49, 0x7b, 0x0a, 0xee, 0x78, 0x13, 0x9e, 0x80, 0x17, - 0x43, 0x3e, 0x76, 0x12, 0x77, 0x4c, 0x1a, 0x77, 0xe7, 0x7c, 0xe7, 0x8f, 0x7f, 0xf9, 0x2c, 0x07, - 0x46, 0x9b, 0xa4, 0x2e, 0xf4, 0x6c, 0x57, 0x49, 0x2d, 0x49, 0x88, 0x09, 0x53, 0xf0, 0xe0, 0xbd, - 0x09, 0x3e, 0xd7, 0xa2, 0xba, 0xe6, 0x42, 0xd5, 0x85, 0x26, 0x04, 0x06, 0xa9, 0xcc, 0x04, 0x0d, - 0xe2, 0x60, 0x1a, 0x72, 0x8c, 0x8d, 0x96, 0x97, 0x1b, 0x49, 0x7b, 0x71, 0x30, 0x8d, 0x38, 0xc6, - 0xe4, 0xb9, 0xdb, 0xb8, 0xca, 0xb7, 0x65, 0x52, 0xd0, 0x7e, 0x1c, 0x4c, 0x47, 0xf3, 0x47, 0x33, - 0x7b, 0x0a, 0x6e, 0xfd, 0xa8, 0xb6, 0xb6, 0xc8, 0xfd, 0x4e, 0x76, 0x0e, 0xc3, 0x95, 0x4e, 0x74, - 0xad, 0xfe, 0xf7, 0x28, 0xf6, 0x0c, 0xe0, 0xa2, 0xc8, 0x45, 0xa9, 0x17, 0xe6, 0xe0, 0x13, 0x08, - 0xbf, 0xcb, 0xf5, 0x22, 0xc3, 0xb1, 0x88, 0xdb, 0xc4, 0xcc, 0x55, 0xb2, 0x10, 0xcd, 0x9c, 0x89, - 0xd9, 0xaf, 0x00, 0xee, 0xed, 0x93, 0x98, 0xb6, 0xba, 0xce, 0x9b, 0x59, 0x8c, 0xbb, 0x85, 0x3d, - 0x7f, 0xe1, 0x13, 0x00, 0x85, 0x33, 0x5f, 0xae, 0x77, 0x02, 0x3f, 0x2f, 0xe2, 0x9e, 0x42, 0x5e, - 0xc2, 0xb8, 0x94, 0x99, 0xc0, 0xfd, 0x86, 0x8b, 0x0e, 0xe2, 0xfe, 0x74, 0x34, 0x3f, 0x71, 0x0e, - 0x2c, 0xfd, 0x1a, 0xdf, 0x6f, 0x65, 0xbf, 0x03, 0x18, 0xef, 0x35, 0x90, 0x09, 0x1c, 0x99, 0x96, - 0x65, 0xf2, 0x43, 0x38, 0xb6, 0x36, 0x27, 0xa7, 0x30, 0x34, 0xf1, 0xe2, 0x93, 0x03, 0x74, 0x59, - 0xa3, 0xaf, 0x96, 0x8e, 0xce, 0x65, 0x86, 0x1c, 0x19, 0x3e, 0x88, 0x2b, 0x51, 0xd0, 0x81, 0x25, - 0xef, 0x14, 0xf2, 0xc2, 0xdd, 0xdc, 0x3b, 0x71, 0x95, 0xa7, 0x82, 0x86, 0xc8, 0x7d, 0xea, 0xb8, - 0xad, 0xd8, 0x91, 0xfb, 0xad, 0x2c, 0x85, 0x87, 0x5e, 0xfd, 0xc2, 0x60, 0x18, 0xf8, 0xc7, 0x10, - 0x6d, 0x1a, 0xc1, 0xd1, 0x77, 0x82, 0xc1, 0x4c, 0xbf, 0xe5, 0xbb, 0xd6, 0x5f, 0x97, 0x19, 0x7d, - 0x27, 0x2b, 0xbd, 0xc8, 0x1a, 0x7c, 0x9b, 0xb1, 0x9f, 0x3d, 0xb8, 0x7f, 0x83, 0xc2, 0xd8, 0x93, - 0xa1, 0xd4, 0x5e, 0x7b, 0x9b, 0x9b, 0xcf, 0xb5, 0x31, 0x5e, 0x94, 0x3d, 0xc3, 0x53, 0x5a, 0x3b, - 0x0c, 0x8c, 0xa2, 0xfd, 0xb8, 0xdf, 0xda, 0x81, 0xca, 0x9d, 0x76, 0x35, 0x5f, 0x87, 0xeb, 0x43, - 0x1c, 0xef, 0x04, 0x12, 0x3b, 0x33, 0xb9, 0x48, 0x94, 0x2c, 0xe9, 0x10, 0xeb, 0xbe, 0x44, 0x5e, - 0xc3, 0x78, 0xe3, 0xdb, 0x45, 0x0f, 0xd1, 0xf0, 0xc9, 0xbf, 0x86, 0x37, 0x1d, 0x7c, 0x7f, 0x60, - 0xfe, 0x27, 0x80, 0x10, 0x1b, 0xc8, 0x0c, 0x8e, 0xb8, 0xd8, 0xe6, 0x4a, 0x8b, 0x8a, 0x1c, 0xbb, - 0x05, 0xdd, 0xd3, 0x98, 0x8c, 0x9d, 0x64, 0xdf, 0x17, 0x3b, 0x20, 0x97, 0x70, 0xb6, 0xaa, 0xd7, - 0x2a, 0xad, 0xf2, 0xb5, 0xb8, 0xf1, 0x12, 0x6e, 0x19, 0xbf, 0xfd, 0xf5, 0xb2, 0x83, 0xf3, 0x80, - 0xbc, 0x81, 0xe3, 0x4b, 0xa1, 0xef, 0x5e, 0x71, 0xe6, 0xaf, 0xf0, 0x7e, 0x2b, 0xec, 0xe0, 0x6d, - 0xf4, 0xf5, 0x70, 0xf6, 0x0a, 0xab, 0xeb, 0x21, 0xfe, 0x85, 0x9e, 0xfe, 0x0d, 0x00, 0x00, 0xff, - 0xff, 0x7f, 0x6c, 0x68, 0x5e, 0x94, 0x04, 0x00, 0x00, + // 517 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x94, 0xc1, 0x6e, 0xd3, 0x40, + 0x10, 0x86, 0xe3, 0x24, 0x4e, 0x9b, 0x89, 0x52, 0xe8, 0x52, 0x5a, 0x2b, 0x42, 0xc8, 0xda, 0x53, + 0x4e, 0x51, 0x15, 0x24, 0x40, 0x70, 0xa2, 0x45, 0x54, 0x91, 0x20, 0x82, 0x4d, 0x4f, 0xdc, 0x9c, + 0x78, 0x12, 0x8c, 0x5c, 0x3b, 0xf2, 0xda, 0x95, 0x7a, 0xe4, 0x11, 0xb8, 0xf0, 0x26, 0x3c, 0x01, + 0x2f, 0x86, 0x66, 0xbc, 0xb6, 0x37, 0xa1, 0x52, 0xb9, 0xcd, 0xfc, 0x3b, 0xf3, 0xef, 0x97, 0xd9, + 0x8c, 0x61, 0xb0, 0x0e, 0x8a, 0x38, 0x9f, 0x6c, 0xb3, 0x34, 0x4f, 0x85, 0xcb, 0x89, 0xd4, 0xf0, + 0xf8, 0x03, 0x05, 0x5f, 0x0a, 0xcc, 0xee, 0x14, 0xea, 0x22, 0xce, 0x85, 0x80, 0xee, 0x2a, 0x0d, + 0xd1, 0x73, 0x7c, 0x67, 0xec, 0x2a, 0x8e, 0x49, 0x8b, 0x92, 0x75, 0xea, 0xb5, 0x7d, 0x67, 0xdc, + 0x57, 0x1c, 0x8b, 0x57, 0xc6, 0x71, 0x11, 0x6d, 0x92, 0x20, 0xf6, 0x3a, 0xbe, 0x33, 0x1e, 0x4c, + 0x9f, 0x4e, 0xca, 0x5b, 0xd8, 0xf5, 0x93, 0xde, 0x94, 0x87, 0xca, 0xae, 0x94, 0xe7, 0xd0, 0x5b, + 0xe4, 0x41, 0x5e, 0xe8, 0xff, 0xbd, 0x4a, 0xbe, 0x04, 0xb8, 0x8c, 0x23, 0x4c, 0xf2, 0x19, 0x5d, + 0x7c, 0x02, 0xee, 0xf7, 0x74, 0x39, 0x0b, 0xb9, 0xad, 0xaf, 0xca, 0x84, 0xfa, 0xb2, 0x34, 0xc6, + 0xaa, 0x8f, 0x62, 0xf9, 0xcb, 0x81, 0xa3, 0x5d, 0x12, 0x2a, 0x2b, 0x8a, 0xa8, 0xea, 0xe5, 0xb8, + 0x31, 0x6c, 0xdb, 0x86, 0xcf, 0x01, 0x34, 0xf7, 0x5c, 0xdf, 0x6d, 0x91, 0x7f, 0x5e, 0x5f, 0x59, + 0x8a, 0x78, 0x03, 0xc3, 0x24, 0x0d, 0x91, 0xfd, 0x89, 0xcb, 0xeb, 0xfa, 0x9d, 0xf1, 0x60, 0x7a, + 0x62, 0x26, 0x30, 0xb7, 0xcf, 0xd4, 0x6e, 0xa9, 0xfc, 0xed, 0xc0, 0x70, 0xa7, 0x40, 0x8c, 0xe0, + 0x90, 0x4a, 0xe6, 0xc1, 0x0d, 0x1a, 0xb6, 0x3a, 0x17, 0xa7, 0xd0, 0xa3, 0x78, 0xf6, 0xd9, 0x00, + 0x9a, 0xac, 0xd2, 0x17, 0x73, 0x43, 0x67, 0x32, 0x22, 0x67, 0x86, 0x8f, 0x78, 0x8b, 0xb1, 0xd7, + 0x2d, 0xc9, 0x1b, 0x45, 0xbc, 0x36, 0x2f, 0xf7, 0x1e, 0x6f, 0xa3, 0x15, 0x7a, 0x2e, 0x73, 0x9f, + 0x1a, 0xee, 0x52, 0x6c, 0xc8, 0xed, 0x52, 0xf9, 0xc3, 0x81, 0x27, 0x56, 0xc1, 0x25, 0x71, 0x10, + 0xfd, 0x33, 0xe8, 0xaf, 0x2b, 0xc1, 0xe0, 0x37, 0x02, 0x71, 0xae, 0xbe, 0x45, 0xdb, 0x7a, 0xc0, + 0x26, 0x23, 0x7d, 0x9b, 0x66, 0xf9, 0x2c, 0xac, 0xf8, 0xcb, 0xac, 0x76, 0xbb, 0x8e, 0x6e, 0xd0, + 0xe0, 0x37, 0x82, 0xfc, 0xd9, 0x86, 0x47, 0x7b, 0x90, 0x34, 0xbd, 0x90, 0xa5, 0xfa, 0x5f, 0x51, + 0xe7, 0x34, 0x8d, 0x32, 0xe6, 0x77, 0x2c, 0x09, 0x2c, 0xa5, 0x9e, 0x16, 0xa1, 0x6a, 0xaf, 0xe3, + 0x77, 0xea, 0x69, 0xb1, 0xf2, 0xe0, 0x34, 0x6b, 0x5a, 0xb2, 0x77, 0xb9, 0xbd, 0x11, 0x84, 0x6f, + 0x66, 0xad, 0x30, 0xd0, 0x69, 0xe2, 0xf5, 0xf8, 0xdc, 0x96, 0xc4, 0x05, 0x1c, 0xad, 0xed, 0x61, + 0x6a, 0xef, 0x80, 0x1f, 0x64, 0xf4, 0xef, 0x83, 0x54, 0x25, 0x6a, 0xaf, 0x63, 0xfa, 0xc7, 0x01, + 0x97, 0x2b, 0xc4, 0x04, 0x0e, 0x15, 0x6e, 0x22, 0x9d, 0x63, 0x26, 0x8e, 0x8d, 0x43, 0xb3, 0x3b, + 0xa3, 0xa1, 0x91, 0xca, 0x05, 0x94, 0x2d, 0x71, 0x05, 0x67, 0x8b, 0x62, 0xa9, 0x57, 0x59, 0xb4, + 0xc4, 0xbd, 0x55, 0xb9, 0xa7, 0xfd, 0xfe, 0xf5, 0x96, 0xad, 0x73, 0x47, 0xbc, 0x83, 0xe3, 0x2b, + 0xcc, 0x1f, 0xb6, 0x38, 0xb3, 0x2d, 0xac, 0xef, 0x8e, 0x6c, 0x5d, 0xf4, 0xbf, 0x1e, 0x4c, 0xde, + 0xf2, 0xe9, 0xb2, 0xc7, 0x9f, 0xa9, 0x17, 0x7f, 0x03, 0x00, 0x00, 0xff, 0xff, 0x05, 0xc3, 0xc3, + 0x11, 0xb5, 0x04, 0x00, 0x00, } diff --git a/component/clusterd/pkg/interface/grpc/fault/fault.proto b/component/clusterd/pkg/interface/grpc/fault/fault.proto index 0f55ed05f..69a64d3be 100644 --- a/component/clusterd/pkg/interface/grpc/fault/fault.proto +++ b/component/clusterd/pkg/interface/grpc/fault/fault.proto @@ -38,6 +38,7 @@ message DeviceFaultCodeInfo { string faultCode = 1; string chipId = 2; string portId = 3; + string faultTime = 4; } message DeviceFaultInfo { @@ -47,7 +48,7 @@ message DeviceFaultInfo { string faultLevel = 4; repeated string faultType = 5; repeated string faultReason = 6; - repeated DeviceFaultCodeInfo faultCodeInfo = 7; + repeated DeviceFaultCodeInfo faultCodeInfos = 7; } service Fault { -- Gitee From 4f662a10e01733644874923865a9ed7af7829e0d Mon Sep 17 00:00:00 2001 From: wangjun Date: Mon, 25 Aug 2025 21:54:37 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E3=80=90clusterd=E3=80=91=E3=80=90?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=AF=B4=E6=98=8E=E3=80=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?L2=E7=BA=A7=E5=88=AB=E6=95=85=E9=9A=9C=E4=B8=8A=E6=8A=A5?= =?UTF-8?q?=E5=92=8C=E5=90=8E=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../cluster_fault_processor.go | 16 +++++++++++++--- .../clusterd/pkg/common/constant/constants.go | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go b/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go index f0cbfadba..58292e5d6 100644 --- a/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go +++ b/component/clusterd/pkg/application/faultmanager/faultclusterprocess/cluster_fault_processor.go @@ -163,10 +163,20 @@ func getNodeFaultInfo(nodeInfo *constant.NodeInfo) []*fault.DeviceFaultInfo { } faultsOnNode := make([]*fault.DeviceFaultInfo, 0) for _, device := range nodeInfo.FaultDevList { + faultCodeInfos := make([]*fault.DeviceFaultCodeInfo, 0) + for _, faultCode := range device.FaultCode { + faultCodeInfos = append(faultCodeInfos, &fault.DeviceFaultCodeInfo{ + FaultCode: faultCode, + ChipId: constant.EmptyChipId, + PortId: constant.EmptyPortId, + FaultTime: constant.EmptyFaultTime, + }) + } deviceFault := fault.DeviceFaultInfo{ - DeviceId: strconv.Itoa(int(device.DeviceId)), - DeviceType: device.DeviceType, - FaultCodes: device.FaultCode, + DeviceId: strconv.Itoa(int(device.DeviceId)), + DeviceType: device.DeviceType, + FaultCodes: device.FaultCode, + FaultCodeInfos: faultCodeInfos, } switch nodeInfo.NodeStatus { case constant.NotHandleFaultLevelStr: diff --git a/component/clusterd/pkg/common/constant/constants.go b/component/clusterd/pkg/common/constant/constants.go index fa0f5d8f3..b87e4edcd 100644 --- a/component/clusterd/pkg/common/constant/constants.go +++ b/component/clusterd/pkg/common/constant/constants.go @@ -230,6 +230,8 @@ const ( EmptyChipId = "-1" // EmptyPortId port id for node or device fault EmptyPortId = "-1" + // EmptyFaultTime fault time for node fault + EmptyFaultTime = "-1" ) // ras feature const -- Gitee