From 598c6ecfe2f18491bbf1b08020d5ad98ced918af Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Wed, 2 Mar 2022 15:59:07 +0800 Subject: [PATCH 1/2] Fix some issues: 1.Backport 4 patches from openEuler master branch. 2.Enable compilation of the feature memory fault prediction based on corrected error. 3.Fix changelog date error of this spec file. Signed-off-by: Xiaofei Tan --- ...e-issue-of-sprintf-data-type-mismatc.patch | 55 ++++++++++++++ ...e-issue-of-command-option-r-for-hip0.patch | 72 ++++++++++++++++++ ...me-print-format-issues-for-hisi-comm.patch | 76 +++++++++++++++++++ ...me-modules-supported-by-hisi-common-.patch | 35 +++++++++ rasdaemon.spec | 24 +++++- 5 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch create mode 100644 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch create mode 100644 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch create mode 100644 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch diff --git a/0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch b/0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch new file mode 100644 index 0000000..25a61ff --- /dev/null +++ b/0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch @@ -0,0 +1,55 @@ +From 44fa917e290255570772926a2a11fd5bee3af90c Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Mon, 11 Oct 2021 10:22:10 +0800 +Subject: [PATCH 1/4] rasdaemon: Fix the issue of sprintf data type mismatch in + uuid_le() + +The data type of sprintf called in the function uuid_le() is mismatch. +Arm64 compiler force it to unsigned char by default, and can work normally. +But if someone compile it with the option -fsigned-char, the function +can't work correctly. + +Signed-off-by: Xiaofei Tan +--- + ras-extlog-handler.c | 2 +- + ras-non-standard-handler.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/ras-extlog-handler.c b/ras-extlog-handler.c +index 5fd3580..1834687 100644 +--- a/ras-extlog-handler.c ++++ b/ras-extlog-handler.c +@@ -152,7 +152,7 @@ static char *uuid_le(const char *uu) + static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; + + for (i = 0; i < 16; i++) { +- p += sprintf(p, "%.2x", uu[le[i]]); ++ p += sprintf(p, "%.2x", (unsigned char) uu[le[i]]); + switch (i) { + case 3: + case 5: +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 6ccf5bc..6d5a6f8 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -36,7 +36,7 @@ static char *uuid_le(const char *uu) + static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; + + for (i = 0; i < 16; i++) { +- p += sprintf(p, "%.2x", uu[le[i]]); ++ p += sprintf(p, "%.2x", (unsigned char) uu[le[i]]); + switch (i) { + case 3: + case 5: +@@ -61,7 +61,7 @@ static int uuid_le_cmp(const char *sec_type, const char *uuid2) + 3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; + + for (i = 0; i < 16; i++) +- p += sprintf(p, "%.2x", sec_type[le[i]]); ++ p += sprintf(p, "%.2x", (unsigned char) sec_type[le[i]]); + *p = 0; + return strncmp(uuid1, uuid2, 32); + } +-- +2.33.0 + diff --git a/0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch b/0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch new file mode 100644 index 0000000..248ef78 --- /dev/null +++ b/0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch @@ -0,0 +1,72 @@ +From 6af26231fd58be578ea7d2ceb009fddf14c386a7 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Mon, 1 Feb 2021 19:09:59 +0800 +Subject: [PATCH 2/4] rasdaemon: Fix the issue of command option -r for hip08 + +It will record event even the option -r is not provided for hip08. +It is not right, and fix it. + +Signed-off-by: Xiaofei Tan +--- + non-standard-hisi_hip08.c | 6 +++--- + non-standard-hisilicon.c | 6 ++++++ + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c +index ebf03e1..9092183 100644 +--- a/non-standard-hisi_hip08.c ++++ b/non-standard-hisi_hip08.c +@@ -670,7 +670,7 @@ static int decode_hip08_oem_type1_error(struct ras_events *ras, + } + + #ifdef HAVE_SQLITE3 +- if (!ev_decoder->stmt_dec_record) { ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &hip08_oem_type1_event_tab) + != SQLITE_OK) { +@@ -842,7 +842,7 @@ static int decode_hip08_oem_type2_error(struct ras_events *ras, + } + + #ifdef HAVE_SQLITE3 +- if (!ev_decoder->stmt_dec_record) { ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &hip08_oem_type2_event_tab) != SQLITE_OK) { + trace_seq_printf(s, +@@ -992,7 +992,7 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, + } + + #ifdef HAVE_SQLITE3 +- if (!ev_decoder->stmt_dec_record) { ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &hip08_pcie_local_event_tab) != SQLITE_OK) { + trace_seq_printf(s, +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index a6f5e78..3fccff6 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -77,6 +77,9 @@ void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, + enum hisi_oem_data_type data_type, + int id, int64_t data, const char *text) + { ++ if (ev_decoder->stmt_dec_record == NULL) ++ return; ++ + switch (data_type) { + case HISI_OEM_DATA_TYPE_INT: + sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data); +@@ -94,6 +97,9 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) + { + int rc; + ++ if (ev_decoder->stmt_dec_record == NULL) ++ return 0; ++ + rc = sqlite3_step(ev_decoder->stmt_dec_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, +-- +2.33.0 + diff --git a/0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch b/0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch new file mode 100644 index 0000000..4391e9d --- /dev/null +++ b/0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch @@ -0,0 +1,76 @@ +From 6ff3528ea696377fe1ec666839258abfcf8802b6 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Fri, 20 Aug 2021 10:02:04 +0800 +Subject: [PATCH 3/4] rasdaemon: Fix some print format issues for hisi common + error section + +It is not right to use '%d' to print uint8_t and uint16_t, although +there is no function issue. Change to use '%hhu' and '%hu' separately. + +Signed-off-by: Xiaofei Tan +--- + non-standard-hisilicon.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 3fccff6..f9c7bd4 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -198,7 +198,7 @@ static const char* get_soc_desc(uint8_t soc_id) + static void decode_module(struct hisi_event *event, uint8_t module_id) + { + if (module_id >= sizeof(module_name)/sizeof(char *)) +- HISI_SNPRINTF(event->error_msg, "module=unknown(id=%d) ", module_id); ++ HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id); + else + HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); + } +@@ -207,36 +207,36 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, + const struct hisi_common_error_section *err, + struct hisi_event *event) + { +- HISI_SNPRINTF(event->error_msg, "[ table_version=%d", err->version); ++ HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version); + if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) + HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id)); + + if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) +- HISI_SNPRINTF(event->error_msg, "socket_id=%d", err->socket_id); ++ HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) +- HISI_SNPRINTF(event->error_msg, "totem_id=%d", err->totem_id); ++ HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) +- HISI_SNPRINTF(event->error_msg, "nimbus_id=%d", err->nimbus_id); ++ HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) +- HISI_SNPRINTF(event->error_msg, "subsystem_id=%d", err->subsystem_id); ++ HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID)) + decode_module(event, err->module_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) +- HISI_SNPRINTF(event->error_msg, "submodule_id=%d", err->submodule_id); ++ HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) +- HISI_SNPRINTF(event->error_msg, "core_id=%d", err->core_id); ++ HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) +- HISI_SNPRINTF(event->error_msg, "port_id=%d", err->port_id); ++ HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) +- HISI_SNPRINTF(event->error_msg, "err_type=%d", err->err_type); ++ HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type); + + if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) + HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x", +-- +2.33.0 + diff --git a/0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch b/0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch new file mode 100644 index 0000000..1d7a09a --- /dev/null +++ b/0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch @@ -0,0 +1,35 @@ +From 31c1d5f335aa63fb02583eca8fe21a18fbcb865e Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Fri, 20 Aug 2021 10:39:12 +0800 +Subject: [PATCH 4/4] rasdaemon: Add some modules supported by hisi common + error section + +Add some modules supported by hisi common error section. Besides, +HHA is the module for some old platform, and it takes the same place +of MATA, so remove it. + +Signed-off-by: Xiaofei Tan +--- + non-standard-hisilicon.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index f9c7bd4..1432163 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -184,7 +184,11 @@ static const char* module_name[] = { + "SEC", + "RDE", + "MEE", +- "HHA", ++ "L4D", ++ "Tsensor", ++ "ROH", ++ "BTC", ++ "HILINK" + }; + + static const char* get_soc_desc(uint8_t soc_id) +-- +2.33.0 + diff --git a/rasdaemon.spec b/rasdaemon.spec index cba988a..6e31834 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 2 +Release: 3 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -25,6 +25,10 @@ Patch3: bugfix-fix-disk-error-log-storm.patch Patch4: backport-configure.ac-fix-SYSCONFDEFDIR-default-value.patch Patch5: 0001-Support-cpu-fault-isolation-for-corrected-errors.patch Patch6: 0002-Support-cpu-fault-isolation-for-recoverable-errors.patch +Patch7: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch +Patch8: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch +Patch9: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch +Patch10: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch %description The rasdaemon program is a daemon which monitors the platform @@ -43,7 +47,7 @@ autoheader libtoolize --automake --copy --debug --force automake --add-missing %ifarch %{arm} aarch64 -%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror --enable-non-standard --enable-hisi-ns-decode --enable-arm +%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-failure --enable-memory-ce-pfa %else %configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror %endif @@ -70,6 +74,20 @@ rm INSTALL %{buildroot}/usr/include/*.h /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Wed Mar 2 2022 tanxiaofei - 0.6.7-3 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC: + 1. Backport 4 patches from openEuler master branch. + 1) Fix the issue of sprintf data type mismatch in uuid_le() + 2) Fix the issue of command option -r for hip08 + 3) Fix some print format issues for hisi common error section + 4) Add some modules supported by hisi common error section + 2.Enable compilation of the feature memory fault prediction based on + corrected error. + 3.Fix changelog date error of this spec file. + * Wed Feb 23 2022 luoshengwei - 0.6.7-2 - Type:feature - ID:NA @@ -79,7 +97,7 @@ rm INSTALL %{buildroot}/usr/include/*.h * Wed Dec 8 2021 xujing - 0.6.7-1 - Update software to v0.6.7 -* Sat July 29 2021 tanxiaofei - 0.6.6-6 +* Thu Jul 29 2021 tanxiaofei - 0.6.6-6 - Type:feature - ID:NA - SUG:NA -- Gitee From 783747fbfa4cadedeb4a22770362a84eada8b34b Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 3 Mar 2022 14:27:51 +0000 Subject: [PATCH 2/2] Modify format of the Hisilicon common error records and improve Hisilicon common error statistics 1. Modify the recording format of the Hisilicon common errors and 2. Improve error statistics for the Hisilicon common errors in the ras-mc-ctl. Signed-off-by: Shiju Jose --- ...-recording-Hisilicon-common-error-da.patch | 237 ++++++++++++++++++ ...-ctl-Modify-error-statistics-for-HiS.patch | 97 +++++++ rasdaemon.spec | 12 +- 3 files changed, 345 insertions(+), 1 deletion(-) create mode 100644 0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch create mode 100644 0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch diff --git a/0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch b/0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch new file mode 100644 index 0000000..2975d7c --- /dev/null +++ b/0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch @@ -0,0 +1,237 @@ +From 32cadf429eac7d1e8e5c7f9c8d4c1e2add05c93e Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 2 Mar 2022 12:20:40 +0000 +Subject: [PATCH v3 1/2] rasdaemon: Modify recording Hisilicon common error + data + +The error statistics for the Hisilicon common +error need to do based on module, error severity etc. + +Modify recording Hisilicon common error data as separate fields +in the sql db table instead of the combined single field. + +Signed-off-by: Shiju Jose +--- + non-standard-hisilicon.c | 127 +++++++++++++++++++++++++++++++++------ + 1 file changed, 107 insertions(+), 20 deletions(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index a6f5e78..fcdddab 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -17,6 +17,7 @@ + #include "non-standard-hisilicon.h" + + #define HISI_BUF_LEN 2048 ++#define HISI_PCIE_INFO_BUF_LEN 256 + + struct hisi_common_error_section { + uint32_t val_bits; +@@ -63,12 +64,25 @@ enum { + enum { + HISI_COMMON_FIELD_ID, + HISI_COMMON_FIELD_TIMESTAMP, +- HISI_COMMON_FIELD_ERR_INFO, ++ HISI_COMMON_FIELD_VERSION, ++ HISI_COMMON_FIELD_SOC_ID, ++ HISI_COMMON_FIELD_SOCKET_ID, ++ HISI_COMMON_FIELD_TOTEM_ID, ++ HISI_COMMON_FIELD_NIMBUS_ID, ++ HISI_COMMON_FIELD_SUB_SYSTEM_ID, ++ HISI_COMMON_FIELD_MODULE_ID, ++ HISI_COMMON_FIELD_SUB_MODULE_ID, ++ HISI_COMMON_FIELD_CORE_ID, ++ HISI_COMMON_FIELD_PORT_ID, ++ HISI_COMMON_FIELD_ERR_TYPE, ++ HISI_COMMON_FIELD_PCIE_INFO, ++ HISI_COMMON_FIELD_ERR_SEVERITY, + HISI_COMMON_FIELD_REGS_DUMP, + }; + + struct hisi_event { + char error_msg[HISI_BUF_LEN]; ++ char pcie_info[HISI_PCIE_INFO_BUF_LEN]; + char reg_msg[HISI_BUF_LEN]; + }; + +@@ -77,6 +91,11 @@ void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, + enum hisi_oem_data_type data_type, + int id, int64_t data, const char *text) + { ++ if (!ev_decoder->stmt_dec_record) { ++ log(TERM, LOG_WARNING, "sql hisi section tab does not exist\n"); ++ return; ++ } ++ + switch (data_type) { + case HISI_OEM_DATA_TYPE_INT: + sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data); +@@ -128,12 +147,24 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) + static const struct db_fields hisi_common_section_fields[] = { + { .name = "id", .type = "INTEGER PRIMARY KEY" }, + { .name = "timestamp", .type = "TEXT" }, +- { .name = "err_info", .type = "TEXT" }, ++ { .name = "version", .type = "INTEGER" }, ++ { .name = "soc_id", .type = "INTEGER" }, ++ { .name = "socket_id", .type = "INTEGER" }, ++ { .name = "totem_id", .type = "INTEGER" }, ++ { .name = "nimbus_id", .type = "INTEGER" }, ++ { .name = "sub_system_id", .type = "INTEGER" }, ++ { .name = "module_id", .type = "TEXT" }, ++ { .name = "sub_module_id", .type = "INTEGER" }, ++ { .name = "core_id", .type = "INTEGER" }, ++ { .name = "port_id", .type = "INTEGER" }, ++ { .name = "err_type", .type = "INTEGER" }, ++ { .name = "pcie_info", .type = "TEXT" }, ++ { .name = "err_severity", .type = "TEXT" }, + { .name = "regs_dump", .type = "TEXT" }, + }; + + static const struct db_table_descriptor hisi_common_section_tab = { +- .name = "hisi_common_section", ++ .name = "hisi_common_section_v2", + .fields = hisi_common_section_fields, + .num_fields = ARRAY_SIZE(hisi_common_section_fields), + }; +@@ -189,12 +220,20 @@ static const char* get_soc_desc(uint8_t soc_id) + return soc_desc[soc_id]; + } + +-static void decode_module(struct hisi_event *event, uint8_t module_id) ++static void decode_module(struct ras_ns_ev_decoder *ev_decoder, ++ struct hisi_event *event, uint8_t module_id) + { +- if (module_id >= sizeof(module_name)/sizeof(char *)) ++ if (module_id >= sizeof(module_name)/sizeof(char *)) { + HISI_SNPRINTF(event->error_msg, "module=unknown(id=%d) ", module_id); +- else ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_MODULE_ID, ++ 0, "unknown"); ++ } else { + HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_MODULE_ID, ++ 0, module_name[module_id]); ++ } + } + + static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, +@@ -202,43 +241,93 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, + struct hisi_event *event) + { + HISI_SNPRINTF(event->error_msg, "[ table_version=%d", err->version); +- if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_VERSION, ++ err->version, NULL); ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) { + HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id)); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SOC_ID, ++ err->soc_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) { + HISI_SNPRINTF(event->error_msg, "socket_id=%d", err->socket_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SOCKET_ID, ++ err->socket_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) { + HISI_SNPRINTF(event->error_msg, "totem_id=%d", err->totem_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_TOTEM_ID, ++ err->totem_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) { + HISI_SNPRINTF(event->error_msg, "nimbus_id=%d", err->nimbus_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_NIMBUS_ID, ++ err->nimbus_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) { + HISI_SNPRINTF(event->error_msg, "subsystem_id=%d", err->subsystem_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SUB_SYSTEM_ID, ++ err->subsystem_id, NULL); ++ } + + if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID)) +- decode_module(event, err->module_id); ++ decode_module(ev_decoder, event, err->module_id); + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) { + HISI_SNPRINTF(event->error_msg, "submodule_id=%d", err->submodule_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SUB_MODULE_ID, ++ err->submodule_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) { + HISI_SNPRINTF(event->error_msg, "core_id=%d", err->core_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_CORE_ID, ++ err->core_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) { + HISI_SNPRINTF(event->error_msg, "port_id=%d", err->port_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_PORT_ID, ++ err->port_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) { + HISI_SNPRINTF(event->error_msg, "err_type=%d", err->err_type); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_ERR_TYPE, ++ err->err_type, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) { + HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x", + err->pcie_info.segment, err->pcie_info.bus, + err->pcie_info.device, err->pcie_info.function); ++ HISI_SNPRINTF(event->pcie_info, "%04x:%02x:%02x.%x", ++ err->pcie_info.segment, err->pcie_info.bus, ++ err->pcie_info.device, err->pcie_info.function); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_PCIE_INFO, ++ 0, event->pcie_info); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) { + HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity)); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_ERR_SEVERITY, ++ 0, err_severity(err->err_severity)); ++ } + + HISI_SNPRINTF(event->error_msg, "]"); + } +@@ -283,8 +372,6 @@ static int decode_hisi_common_section(struct ras_events *ras, + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_TIMESTAMP, + 0, event->timestamp); +- record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, +- HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg); + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg); + step_vendor_data_tab(ev_decoder, "hisi_common_section_tab"); +-- +2.25.1 + diff --git a/0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch b/0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch new file mode 100644 index 0000000..0838605 --- /dev/null +++ b/0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch @@ -0,0 +1,97 @@ +From fb1c9bcdc2dd7fa0704c401e6e17d40f47b94d0a Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 24 Feb 2022 18:02:14 +0000 +Subject: [PATCH v3 2/2] rasdaemon: ras-mc-ctl: Modify error statistics for + HiSilicon Kunpeng9xx common errors + +Modify the error statistics for the HiSilicon Kunpeng9xx platforms common errors +to display the statistics and error info based on the module and the error severity. + +Signed-off-by: Shiju Jose +--- + util/ras-mc-ctl.in | 40 +++++++++++++++++++++++++++++----------- + 1 file changed, 29 insertions(+), 11 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 1e3aeb7..22ba1fd 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1535,7 +1535,7 @@ sub vendor_errors_summary + require DBI; + my ($num_args, $platform_id); + my ($query, $query_handle, $count, $out); +- my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info); ++ my ($module_id, $sub_module_id, $err_severity, $err_sev); + + $num_args = $#ARGV + 1; + $platform_id = 0; +@@ -1612,13 +1612,18 @@ sub vendor_errors_summary + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $query = "select err_info, count(*) from hisi_common_section"; ++ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($err_info, $count)); ++ $query_handle->bind_columns(\($err_severity, $module_id, $count)); + $out = ""; ++ $err_sev = ""; + while($query_handle->fetch()) { +- $out .= "\terrors: $count\n"; ++ if ($err_severity ne $err_sev) { ++ $out .= "$err_severity errors:\n"; ++ $err_sev = $err_severity; ++ } ++ $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events summary:\n$out\n"; +@@ -1636,8 +1641,8 @@ sub vendor_errors + require DBI; + my ($num_args, $platform_id); + my ($query, $query_handle, $id, $timestamp, $out); +- my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id); +- my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs); ++ my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id); ++ my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs); + + $num_args = $#ARGV + 1; + $platform_id = 0; +@@ -1725,15 +1730,28 @@ sub vendor_errors + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $query = "select id, timestamp, err_info, regs_dump from hisi_common_section order by id"; ++ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $timestamp, $err_info, $regs)); ++ $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp "; +- $out .= "Error Info:$err_info \n" if ($err_info); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "totem_id=$totem_id, " if ($totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "pcie_info=$pcie_info, " if ($pcie_info); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs" if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +-- +2.25.1 + diff --git a/rasdaemon.spec b/rasdaemon.spec index 6e31834..e6fb48d 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 3 +Release: 4 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -29,6 +29,8 @@ Patch7: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch Patch8: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch Patch9: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch Patch10: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch +Patch11: 0001-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch +Patch12: 0002-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch %description The rasdaemon program is a daemon which monitors the platform @@ -74,6 +76,14 @@ rm INSTALL %{buildroot}/usr/include/*.h /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Thursday March 03 2022 Shiju Jose - 0.6.7-4 +- Type:feature +- ID:NA +- SUG:NA +- DESC: + 1. Modify recording Hisilicon common error data in the rasdaemon and + 2. Improve Hisilicon common error statistics in the ras-mc-ctl. + * Wed Mar 2 2022 tanxiaofei - 0.6.7-3 - Type:bugfix - ID:NA -- Gitee