diff --git a/0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch b/0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch new file mode 100644 index 0000000000000000000000000000000000000000..d15a7142eb2cf604a988c46217c33253d7143f54 --- /dev/null +++ b/0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch @@ -0,0 +1,224 @@ +From 62218a9c3aec44330ce3b77f3634c788b6e6f60c Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 2 Mar 2022 12:20:40 +0000 +Subject: [PATCH 1/6] rasdaemon: Modify recording Hisilicon common error data + +The error statistics for the Hisilicon common +error need to do based on module, error severity etc. + +Modify recording Hisilicon common error data as separate fields +in the sql db table instead of the combined single field. + +Signed-off-by: Shiju Jose +--- + non-standard-hisilicon.c | 122 ++++++++++++++++++++++++++++++++------- + 1 file changed, 102 insertions(+), 20 deletions(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 1432163..dc69d46 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -17,6 +17,7 @@ + #include "non-standard-hisilicon.h" + + #define HISI_BUF_LEN 2048 ++#define HISI_PCIE_INFO_BUF_LEN 256 + + struct hisi_common_error_section { + uint32_t val_bits; +@@ -63,12 +64,25 @@ enum { + enum { + HISI_COMMON_FIELD_ID, + HISI_COMMON_FIELD_TIMESTAMP, +- HISI_COMMON_FIELD_ERR_INFO, ++ HISI_COMMON_FIELD_VERSION, ++ HISI_COMMON_FIELD_SOC_ID, ++ HISI_COMMON_FIELD_SOCKET_ID, ++ HISI_COMMON_FIELD_TOTEM_ID, ++ HISI_COMMON_FIELD_NIMBUS_ID, ++ HISI_COMMON_FIELD_SUB_SYSTEM_ID, ++ HISI_COMMON_FIELD_MODULE_ID, ++ HISI_COMMON_FIELD_SUB_MODULE_ID, ++ HISI_COMMON_FIELD_CORE_ID, ++ HISI_COMMON_FIELD_PORT_ID, ++ HISI_COMMON_FIELD_ERR_TYPE, ++ HISI_COMMON_FIELD_PCIE_INFO, ++ HISI_COMMON_FIELD_ERR_SEVERITY, + HISI_COMMON_FIELD_REGS_DUMP, + }; + + struct hisi_event { + char error_msg[HISI_BUF_LEN]; ++ char pcie_info[HISI_PCIE_INFO_BUF_LEN]; + char reg_msg[HISI_BUF_LEN]; + }; + +@@ -134,12 +148,24 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) + static const struct db_fields hisi_common_section_fields[] = { + { .name = "id", .type = "INTEGER PRIMARY KEY" }, + { .name = "timestamp", .type = "TEXT" }, +- { .name = "err_info", .type = "TEXT" }, ++ { .name = "version", .type = "INTEGER" }, ++ { .name = "soc_id", .type = "INTEGER" }, ++ { .name = "socket_id", .type = "INTEGER" }, ++ { .name = "totem_id", .type = "INTEGER" }, ++ { .name = "nimbus_id", .type = "INTEGER" }, ++ { .name = "sub_system_id", .type = "INTEGER" }, ++ { .name = "module_id", .type = "TEXT" }, ++ { .name = "sub_module_id", .type = "INTEGER" }, ++ { .name = "core_id", .type = "INTEGER" }, ++ { .name = "port_id", .type = "INTEGER" }, ++ { .name = "err_type", .type = "INTEGER" }, ++ { .name = "pcie_info", .type = "TEXT" }, ++ { .name = "err_severity", .type = "TEXT" }, + { .name = "regs_dump", .type = "TEXT" }, + }; + + static const struct db_table_descriptor hisi_common_section_tab = { +- .name = "hisi_common_section", ++ .name = "hisi_common_section_v2", + .fields = hisi_common_section_fields, + .num_fields = ARRAY_SIZE(hisi_common_section_fields), + }; +@@ -199,12 +225,20 @@ static const char* get_soc_desc(uint8_t soc_id) + return soc_desc[soc_id]; + } + +-static void decode_module(struct hisi_event *event, uint8_t module_id) ++static void decode_module(struct ras_ns_ev_decoder *ev_decoder, ++ struct hisi_event *event, uint8_t module_id) + { +- if (module_id >= sizeof(module_name)/sizeof(char *)) ++ if (module_id >= sizeof(module_name)/sizeof(char *)) { + HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id); +- else ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_MODULE_ID, ++ 0, "unknown"); ++ } else { + HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_MODULE_ID, ++ 0, module_name[module_id]); ++ } + } + + static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, +@@ -212,43 +246,93 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, + struct hisi_event *event) + { + HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version); +- if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_VERSION, ++ err->version, NULL); ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) { + HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id)); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SOC_ID, ++ err->soc_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) { + HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SOCKET_ID, ++ err->socket_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) { + HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_TOTEM_ID, ++ err->totem_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) { + HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_NIMBUS_ID, ++ err->nimbus_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) { + HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SUB_SYSTEM_ID, ++ err->subsystem_id, NULL); ++ } + + if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID)) +- decode_module(event, err->module_id); ++ decode_module(ev_decoder, event, err->module_id); + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) { + HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SUB_MODULE_ID, ++ err->submodule_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) { + HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_CORE_ID, ++ err->core_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) { + HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_PORT_ID, ++ err->port_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) { + HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_ERR_TYPE, ++ err->err_type, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) { + HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x", + err->pcie_info.segment, err->pcie_info.bus, + err->pcie_info.device, err->pcie_info.function); ++ HISI_SNPRINTF(event->pcie_info, "%04x:%02x:%02x.%x", ++ err->pcie_info.segment, err->pcie_info.bus, ++ err->pcie_info.device, err->pcie_info.function); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_PCIE_INFO, ++ 0, event->pcie_info); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) { + HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity)); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_ERR_SEVERITY, ++ 0, err_severity(err->err_severity)); ++ } + + HISI_SNPRINTF(event->error_msg, "]"); + } +@@ -293,8 +377,6 @@ static int decode_hisi_common_section(struct ras_events *ras, + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_TIMESTAMP, + 0, event->timestamp); +- record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, +- HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg); + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg); + step_vendor_data_tab(ev_decoder, "hisi_common_section_tab"); +-- +2.25.1 + diff --git a/0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch b/0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f7eb2406fa143e08079930c2316c1e55168f866 --- /dev/null +++ b/0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch @@ -0,0 +1,97 @@ +From 4d9f297028ce3116eaf574b2570d71a4ed666b7d Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 24 Feb 2022 18:02:14 +0000 +Subject: [PATCH 2/6] rasdaemon: ras-mc-ctl: Modify error statistics for + HiSilicon Kunpeng9xx common errors + +Modify the error statistics for the HiSilicon Kunpeng9xx platforms common errors +to display the statistics and error info based on the module and the error severity. + +Signed-off-by: Shiju Jose +--- + util/ras-mc-ctl.in | 40 +++++++++++++++++++++++++++++----------- + 1 file changed, 29 insertions(+), 11 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 1e3aeb7..22ba1fd 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1535,7 +1535,7 @@ sub vendor_errors_summary + require DBI; + my ($num_args, $platform_id); + my ($query, $query_handle, $count, $out); +- my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info); ++ my ($module_id, $sub_module_id, $err_severity, $err_sev); + + $num_args = $#ARGV + 1; + $platform_id = 0; +@@ -1612,13 +1612,18 @@ sub vendor_errors_summary + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $query = "select err_info, count(*) from hisi_common_section"; ++ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($err_info, $count)); ++ $query_handle->bind_columns(\($err_severity, $module_id, $count)); + $out = ""; ++ $err_sev = ""; + while($query_handle->fetch()) { +- $out .= "\terrors: $count\n"; ++ if ($err_severity ne $err_sev) { ++ $out .= "$err_severity errors:\n"; ++ $err_sev = $err_severity; ++ } ++ $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events summary:\n$out\n"; +@@ -1636,8 +1641,8 @@ sub vendor_errors + require DBI; + my ($num_args, $platform_id); + my ($query, $query_handle, $id, $timestamp, $out); +- my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id); +- my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs); ++ my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id); ++ my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs); + + $num_args = $#ARGV + 1; + $platform_id = 0; +@@ -1725,15 +1730,28 @@ sub vendor_errors + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $query = "select id, timestamp, err_info, regs_dump from hisi_common_section order by id"; ++ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $timestamp, $err_info, $regs)); ++ $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp "; +- $out .= "Error Info:$err_info \n" if ($err_info); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "totem_id=$totem_id, " if ($totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "pcie_info=$pcie_info, " if ($pcie_info); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs" if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +-- +2.25.1 + diff --git a/0003-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch b/0003-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch new file mode 100644 index 0000000000000000000000000000000000000000..7600b58c6cf8e95ca2d48c56ae51da7bc6cdabb3 --- /dev/null +++ b/0003-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch @@ -0,0 +1,56 @@ +From eb93d77b417b58cba27799ae85747b8a193cf063 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 5 Mar 2022 16:18:55 +0000 +Subject: [PATCH 3/6] rasdaemon: ras-mc-ctl: Reformat error info of the + HiSilicon Kunpeng920 + +Reformat the code to display the error info of HiSilicon Kunpeng920. + +Signed-off-by: Shiju Jose +--- + util/ras-mc-ctl.in | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 22ba1fd..eeaf885 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1669,8 +1669,9 @@ sub vendor_errors + $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); + $out .= "module_id=$module_id, " if ($module_id); + $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, \n" if ($err_severity); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n"; +@@ -1692,8 +1693,9 @@ sub vendor_errors + $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); + $out .= "module_id=$module_id, " if ($module_id); + $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, \n" if ($err_severity); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n"; +@@ -1717,8 +1719,9 @@ sub vendor_errors + $out .= "core_id=$core_id, " if ($core_id); + $out .= "port_id=$port_id, " if ($port_id); + $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "err_type=$err_type, \n" if ($err_type); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n"; +-- +2.25.1 + diff --git a/0004-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch b/0004-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch new file mode 100644 index 0000000000000000000000000000000000000000..15ab710dc8bb21c6b4c44f245319eb52af1301cb --- /dev/null +++ b/0004-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch @@ -0,0 +1,36 @@ +From 623e85c07ab21ccc89ffe2bb444eb000a2664a9d Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 5 Mar 2022 17:01:35 +0000 +Subject: [PATCH 4/6] rasdaemon: ras-mc-ctl: Add printing usage if necessary + parameters are not passed for the HiSilicon vendor-errors options + +Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options of the ras-mc-ctl. + +Signed-off-by: Shiju Jose +--- + util/ras-mc-ctl.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index eeaf885..0e32cb1 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1542,6 +1542,7 @@ sub vendor_errors_summary + if ($num_args ne 0) { + $platform_id = $ARGV[0]; + } else { ++ usage(1); + return; + } + +@@ -1649,6 +1650,7 @@ sub vendor_errors + if ($num_args ne 0) { + $platform_id = $ARGV[0]; + } else { ++ usage(1); + return; + } + +-- +2.25.1 + diff --git a/0005-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch b/0005-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch new file mode 100644 index 0000000000000000000000000000000000000000..6153a85717c21b50da036c97cff54d5e3c963b27 --- /dev/null +++ b/0005-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch @@ -0,0 +1,198 @@ +From 4007c95f8a8d570542ffc11676b619ea5649d0e7 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 5 Mar 2022 18:19:38 +0000 +Subject: [PATCH 5/6] rasdaemon: ras-mc-ctl: Add support to display the + HiSilicon vendor errors for a specified module + +Add support to display the HiSilicon vendor errors for a specified module. + +Signed-off-by: Shiju Jose +--- + util/ras-mc-ctl.in | 119 ++++++++++++++++++++++++--------------------- + 1 file changed, 63 insertions(+), 56 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 0e32cb1..d728300 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -96,7 +96,8 @@ Usage: $prog [OPTIONS...] + --errors Shows the errors stored at the error database. + --error-count Shows the corrected and uncorrected error counts using sysfs. + --vendor-errors-summary Presents a summary of the vendor-specific logged errors. +- --vendor-errors Shows the vendor-specific errors stored in the error database. ++ --vendor-errors Shows the vendor-specific errors stored in the error database. ++ --vendor-errors Shows the vendor-specific errors for a specific module stored in the error database. + --vendor-platforms Shows the supported platforms with platform-ids for the vendor-specific errors. + --help This help message. + EOF +@@ -1640,15 +1641,19 @@ sub vendor_errors_summary + sub vendor_errors + { + require DBI; +- my ($num_args, $platform_id); ++ my ($num_args, $platform_id, $module); + my ($query, $query_handle, $id, $timestamp, $out); + my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id); + my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs); + + $num_args = $#ARGV + 1; + $platform_id = 0; ++ $module = 0; + if ($num_args ne 0) { + $platform_id = $ARGV[0]; ++ if ($num_args gt 1) { ++ $module = $ARGV[1]; ++ } + } else { + usage(1); + return; +@@ -1664,21 +1669,21 @@ sub vendor_errors + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($module_id && ($module eq $module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type1 errors.\n"; + } + $query_handle->finish; + +@@ -1688,21 +1693,21 @@ sub vendor_errors + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($module_id && ($module eq $module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type2 errors.\n"; + } + $query_handle->finish; + +@@ -1712,23 +1717,23 @@ sub vendor_errors + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $sub_module_id, $core_id, $port_id, $err_severity, $err_type, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "Error Registers: $regs " if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($sub_module_id && ($module eq $sub_module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 PCIe controller errors.\n"; + } + $query_handle->finish; + } +@@ -1741,22 +1746,24 @@ sub vendor_errors + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "totem_id=$totem_id, " if ($totem_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "pcie_info=$pcie_info, " if ($pcie_info); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs" if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($module_id && ($module eq $module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "totem_id=$totem_id, " if ($totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "pcie_info=$pcie_info, " if ($pcie_info); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs" if ($regs); ++ $out .= "\n\n"; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +-- +2.25.1 + diff --git a/0006-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch b/0006-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch new file mode 100644 index 0000000000000000000000000000000000000000..073d33562f1dd3c1d619549f85823fe906b5e788 --- /dev/null +++ b/0006-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch @@ -0,0 +1,148 @@ +From 88bf3126312645843152c6c3215b54b120bcc1ec Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Mon, 7 Mar 2022 12:38:45 +0000 +Subject: [PATCH 6/6] rasdaemon: ras-mc-ctl: Relocate reading and display + Kunpeng920 errors to under Kunpeng9xx + +Relocate reading and display Kunpeng920 errors to under Kunpeng9xx. + +Signed-off-by: Shiju Jose +--- + util/ras-mc-ctl.in | 38 ++++++++++---------------------------- + 1 file changed, 10 insertions(+), 28 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index d728300..2ab9602 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1527,7 +1527,6 @@ sub errors + + # Definitions of the vendor platform IDs. + use constant { +- HISILICON_KUNPENG_920 => "Kunpeng920", + HISILICON_KUNPENG_9XX => "Kunpeng9xx", + }; + +@@ -1549,8 +1548,8 @@ sub vendor_errors_summary + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +- # HiSilicon Kunpeng920 errors +- if ($platform_id eq HISILICON_KUNPENG_920) { ++ # HiSilicon Kunpeng9xx common errors ++ if ($platform_id eq HISILICON_KUNPENG_9XX) { + $query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1565,9 +1564,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type1 error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type1 errors.\n\n"; ++ print "HiSilicon Kunpeng9xx OEM type1 error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1585,9 +1582,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type2 error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type2 errors.\n\n"; ++ print "HiSilicon Kunpeng9xx OEM type2 error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1605,15 +1600,10 @@ sub vendor_errors_summary + $out .= "\t$sub_module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 PCIe controller error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 PCIe controller errors.\n\n"; ++ print "HiSilicon Kunpeng9xx PCIe controller error events summary:\n$out\n"; + } + $query_handle->finish; +- } + +- # HiSilicon Kunpeng9xx common errors +- if ($platform_id eq HISILICON_KUNPENG_9XX) { + $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1629,8 +1619,6 @@ sub vendor_errors_summary + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng9xx common errors.\n\n"; + } + $query_handle->finish; + } +@@ -1661,8 +1649,8 @@ sub vendor_errors + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +- # HiSilicon Kunpeng920 errors +- if ($platform_id eq HISILICON_KUNPENG_920) { ++ # HiSilicon Kunpeng9xx common errors ++ if ($platform_id eq HISILICON_KUNPENG_9XX) { + $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1683,7 +1671,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n"; ++ print "HiSilicon Kunpeng9xx OEM type1 error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1707,7 +1695,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n"; ++ print "HiSilicon Kunpeng9xx OEM type2 error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1733,13 +1721,10 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n"; ++ print "HiSilicon Kunpeng9xx PCIe controller error events:\n$out\n"; + } + $query_handle->finish; +- } + +- # HiSilicon Kunpeng9xx common errors +- if ($platform_id eq HISILICON_KUNPENG_9XX) { + $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1767,8 +1752,6 @@ sub vendor_errors + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng9xx common errors.\n"; + } + $query_handle->finish; + } +@@ -1779,7 +1762,6 @@ sub vendor_errors + sub vendor_platforms + { + print "\nSupported platforms for the vendor-specific errors:\n"; +- print "\tHiSilicon Kunpeng920, platform-id=\"", HISILICON_KUNPENG_920, "\"\n"; + print "\tHiSilicon Kunpeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n"; + print "\n"; + } +-- +2.25.1 + diff --git a/rasdaemon.spec b/rasdaemon.spec index 6e31834a756e25f165599a48daa23a494d47f848..62576b1acbab1c17be47be2bafb57795f58e74c8 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 3 +Release: 4 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -29,6 +29,12 @@ Patch7: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch Patch8: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch Patch9: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch Patch10: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch +Patch11: 0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch +Patch12: 0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch +Patch13: 0003-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch +Patch14: 0004-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch +Patch15: 0005-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch +Patch16: 0006-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch %description The rasdaemon program is a daemon which monitors the platform @@ -74,6 +80,19 @@ rm INSTALL %{buildroot}/usr/include/*.h /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Mon Mar 07 2022 Shiju Jose - 0.6.7-4 +- Type:feature +- ID:NA +- SUG:NA +- DESC: + 1. Modify recording Hisilicon common error data in the rasdaemon and + 2. In the ras-mc-ctl, + 2.1. Improve Hisilicon common error statistics. + 2.2. Add support to display the HiSilicon vendor-errors for a specified module. + 2.3. Add printing usage if necessary parameters are not passed for the HiSilicon vendor-errors options. + 2.4. Reformat error info of the HiSilicon Kunpeng920. + 2.5. Relocate reading and display Kunpeng920 errors to under Kunpeng9xx. + * Wed Mar 2 2022 tanxiaofei - 0.6.7-3 - Type:bugfix - ID:NA