diff --git a/0006-add-cpu-online-fault-isolation.patch b/0006-add-cpu-online-fault-isolation.patch index b796a48845963d1c62298e5f372bc1045b4496a5..6228b3fe87cdcd272dfb27decea2506599ec9f97 100644 --- a/0006-add-cpu-online-fault-isolation.patch +++ b/0006-add-cpu-online-fault-isolation.patch @@ -1,14 +1,13 @@ -From 94f9581a6b398f178fcabf0fde2cce7eebb15ea7 Mon Sep 17 00:00:00 2001 +From 9e2d3f84c4f158dd58bce4a30eec568331749501 Mon Sep 17 00:00:00 2001 From: Lostwayzxc Date: Tue, 25 May 2021 20:05:49 +0800 -Subject: [PATCH 1/2] add cpu online fault isolation +Subject: [PATCH] add cpu online fault isolation Add cpu online fault isolation, when CE/UCE occurs, we choose to offline the error cpu according to threshold algorithm. Signed-off-by: Luo Shengwei --- - .travis.yml | 2 +- Makefile.am | 6 +- configure.ac | 11 + misc/rasdaemon.env | 17 ++ @@ -19,32 +18,19 @@ Signed-off-by: Luo Shengwei ras-cpu-isolation.h | 76 +++++++ ras-events.c | 8 + ras-record.h | 5 + - 11 files changed, 864 insertions(+), 2 deletions(-) + 10 files changed, 863 insertions(+), 1 deletion(-) create mode 100644 queue.c create mode 100644 queue.h create mode 100644 ras-cpu-isolation.c create mode 100644 ras-cpu-isolation.h -diff --git a/.travis.yml b/.travis.yml -index 79cf4ca..5ab3957 100644 ---- a/.travis.yml -+++ b/.travis.yml -@@ -20,7 +20,7 @@ before_install: - - sudo apt-get install -y sqlite3 - install: - - autoreconf -vfi --- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa -+- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa --enable-cpu-fault-isolation - - script: - - make && sudo make install diff --git a/Makefile.am b/Makefile.am -index f4822b9..6431dd3 100644 +index fabca78..242ceb7 100644 --- a/Makefile.am +++ b/Makefile.am -@@ -57,12 +57,16 @@ endif - if WITH_MEMORY_CE_PFA - rasdaemon_SOURCES += rbtree.c ras-page-isolation.c +@@ -63,13 +63,17 @@ endif + if WITH_AMP_NS_DECODE + rasdaemon_SOURCES += non-standard-ampere.c endif +if WITH_CPU_FAULT_ISOLATION + rasdaemon_SOURCES += ras-cpu-isolation.c queue.c @@ -54,19 +40,20 @@ index f4822b9..6431dd3 100644 include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ -- ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h -+ ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ + ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ +- non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h ++ non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ + ras-cpu-isolation.h queue.h # This rule can't be called with more than one Makefile job (like make -j8) # I can't figure out a way to fix that diff --git a/configure.ac b/configure.ac -index 2d6c59c..a682bb9 100644 +index 33b81fe..d098fcf 100644 --- a/configure.ac +++ b/configure.ac -@@ -141,6 +141,16 @@ AS_IF([test "x$enable_memory_ce_pfa" = "xyes" || test "x$enable_all" == "xyes"], - AM_CONDITIONAL([WITH_MEMORY_CE_PFA], [test x$enable_memory_ce_pfa = xyes || test x$enable_all == xyes]) - AM_COND_IF([WITH_MEMORY_CE_PFA], [USE_MEMORY_CE_PFA="yes"], [USE_MEMORY_CE_PFA="no"]) +@@ -161,6 +161,16 @@ AS_IF([test "x$enable_amp_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], + AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all == xyes]) + AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"]) +AC_ARG_ENABLE([cpu_fault_isolation], + AS_HELP_STRING([--enable-cpu-fault-isolation], [enable cpu online fault isolation])) @@ -81,10 +68,10 @@ index 2d6c59c..a682bb9 100644 test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" -@@ -173,4 +183,5 @@ compile time options summary - DEVLINK : $USE_DEVLINK - Disk I/O errors : $USE_DISKERROR +@@ -201,4 +211,5 @@ compile time options summary + Memory Failure : $USE_MEMORY_FAILURE Memory CE PFA : $USE_MEMORY_CE_PFA + AMP RAS errors : $USE_AMP_NS_DECODE + CPU fault isolation : $USE_CPU_FAULT_ISOLATION EOF diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env @@ -295,13 +282,13 @@ index 0000000..9684c58 +#endif \ No newline at end of file diff --git a/ras-arm-handler.c b/ras-arm-handler.c -index 2f170e2..10d0099 100644 +index 1149dc6..a64f20b 100644 --- a/ras-arm-handler.c +++ b/ras-arm-handler.c -@@ -20,6 +20,44 @@ - #include "ras-record.h" - #include "ras-logger.h" +@@ -22,6 +22,44 @@ #include "ras-report.h" + #include "ras-non-standard-handler.h" + #include "non-standard-ampere.h" +#include "ras-cpu-isolation.h" + +#ifdef HAVE_CPU_FAULT_ISOLATION @@ -341,11 +328,11 @@ index 2f170e2..10d0099 100644 +} +#endif - int ras_arm_event_handler(struct trace_seq *s, - struct pevent_record *record, -@@ -78,6 +116,41 @@ int ras_arm_event_handler(struct trace_seq *s, - ev.psci_state = val; - trace_seq_printf(s, "\n psci_state: %d", ev.psci_state); + void display_raw_data(struct trace_seq *s, + const uint8_t *buf, +@@ -139,6 +177,41 @@ int ras_arm_event_handler(struct trace_seq *s, + display_raw_data(s, ev.vsei_error, ev.oem_len); + #endif +#ifdef HAVE_CPU_FAULT_ISOLATION + /* record cpu error */ @@ -387,7 +374,7 @@ index 2f170e2..10d0099 100644 ras_store_arm_record(ras, &ev); diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c new file mode 100644 -index 0000000..a809f91 +index 0000000..6dcff70 --- /dev/null +++ b/ras-cpu-isolation.c @@ -0,0 +1,499 @@ @@ -974,10 +961,10 @@ index 0000000..a7d3fdb +#endif \ No newline at end of file diff --git a/ras-events.c b/ras-events.c -index 471d25d..31c4170 100644 +index ba769d1..00938e6 100644 --- a/ras-events.c +++ b/ras-events.c -@@ -40,6 +40,7 @@ +@@ -41,6 +41,7 @@ #include "ras-record.h" #include "ras-logger.h" #include "ras-page-isolation.h" @@ -985,7 +972,7 @@ index 471d25d..31c4170 100644 /* * Polling time, if read() doesn't block. Currently, trace_pipe_raw never -@@ -874,6 +875,10 @@ int handle_ras_events(int record_events) +@@ -879,6 +880,10 @@ int handle_ras_events(int record_events) cpus = get_num_cpus(ras); @@ -996,7 +983,7 @@ index 471d25d..31c4170 100644 #ifdef HAVE_MCE rc = register_mce_handler(ras, cpus); if (rc) -@@ -990,6 +995,9 @@ err: +@@ -1005,6 +1010,9 @@ err: } free(ras); } @@ -1007,13 +994,13 @@ index 471d25d..31c4170 100644 return rc; } diff --git a/ras-record.h b/ras-record.h -index cc217a9..b453f83 100644 +index d9f7733..efaffa5 100644 --- a/ras-record.h +++ b/ras-record.h -@@ -77,6 +77,11 @@ struct ras_arm_event { - int64_t midr; - int32_t running_state; - int32_t psci_state; +@@ -83,6 +83,11 @@ struct ras_arm_event { + uint32_t ctx_len; + const uint8_t *vsei_error; + uint32_t oem_len; +#ifdef HAVE_CPU_FAULT_ISOLATION + const char *severity; + const uint8_t *error_info; diff --git a/0007-add-trace-print-and-add-sqlite-store.patch b/0007-add-trace-print-and-add-sqlite-store.patch index 08361e6cdacc20a9b5bccc5fc251e7014763b7f0..ac031b3e9a6dbb78fe2bcffcf0466a8d850df078 100644 --- a/0007-add-trace-print-and-add-sqlite-store.patch +++ b/0007-add-trace-print-and-add-sqlite-store.patch @@ -52,9 +52,9 @@ index 549c494..33d4741 100644 --- a/ras-record.c +++ b/ras-record.c @@ -210,6 +210,10 @@ static const struct db_fields arm_event_fields[] = { - { .name="mpidr", .type="INTEGER" }, - { .name="running_state", .type="INTEGER" }, - { .name="psci_state", .type="INTEGER" }, + { .name="err_info", .type="BLOB" }, + { .name="context_info", .type="BLOB" }, + { .name="vendor_info", .type="BLOB" }, +#ifdef HAVE_CPU_FAULT_ISOLATION + { .name="severity", .type="TEXT" }, + { .name="error_info", .type="BLOB" }, @@ -63,9 +63,9 @@ index 549c494..33d4741 100644 static const struct db_table_descriptor arm_event_tab = { @@ -233,6 +237,10 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) - sqlite3_bind_int64 (priv->stmt_arm_record, 4, ev->mpidr); - sqlite3_bind_int (priv->stmt_arm_record, 5, ev->running_state); - sqlite3_bind_int (priv->stmt_arm_record, 6, ev->psci_state); + ev->ctx_error, ev->ctx_len, NULL); + sqlite3_bind_blob (priv->stmt_arm_record, 9, + ev->vsei_error, ev->oem_len, NULL); +#ifdef HAVE_CPU_FAULT_ISOLATION + sqlite3_bind_text (priv->stmt_arm_record, 7, ev->severity, -1, NULL); + sqlite3_bind_blob (priv->stmt_arm_record, 8, ev->error_info, ev->length, NULL); diff --git a/backport-0001-ras-page-isolation-do_page_offline-always-considers-.patch b/backport-0001-ras-page-isolation-do_page_offline-always-considers-.patch deleted file mode 100644 index 63ebacd35e74284a263f431eb5a7452d4af6bffb..0000000000000000000000000000000000000000 --- a/backport-0001-ras-page-isolation-do_page_offline-always-considers-.patch +++ /dev/null @@ -1,104 +0,0 @@ -From e4d27840e173491ab29c2d97017da9344e2c2526 Mon Sep 17 00:00:00 2001 -From: lvying -Date: Sat, 31 Oct 2020 17:57:14 +0800 -Subject: [PATCH 1/2] ras-page-isolation: do_page_offline always considers page - offline was successful - -do_page_offline always consider page offline was successful even if -kernel soft/hard offline page failed. - -Calling rasdaemon with: - - /etc/sysconfig/rasdaemon PAGE_CE_THRESHOLD="1" - -i.e when a page's address occurs Corrected Error, rasdaemon should -trigger this page soft offline. - -However, after adding a livepatch into kernel's -store_soft_offline_page to observe this function's return value, -when injecting a CE into address 0x3f7ec30000, the Kernel -lot reports: - - soft_offline: 0x3f7ec30: unknown non LRU page type ffffe0000000000 () - [store_soft_offline_page]return from soft_offline_page: -5 - -While rasdaemon log reports: - - rasdaemon[73711]: cpu 00:rasdaemon: Corrected Errors at 0x3f7ec30000 exceed threshold - rasdaemon[73711]: rasdaemon: Result of offlining page at 0x3f7ec30000: offlined - -using strace to record rasdaemon's system call, it reports: - - strace -p 73711 - openat(AT_FDCWD, "/sys/devices/system/memory/soft_offline_page", - O_WRONLY|O_CREAT|O_TRUNC, 0666) = 28 - fstat(28, {st_mode=S_IFREG|0200, st_size=4096, ...}) = 0 - write(28, "0x3f7ec30000", 12) = -1 EIO (Input/output error) - close(28) = 0 - -So, kernel actually soft offline pfn 0x3f7ec30 failed and -store_soft_offline_page returned -EIO. However, rasdaemon always -considers the page offline to be successful. - -According to strace display, ferror was unable of detecting the -failure of the write syscall. - -This patch changes fopen-fprintf-ferror-fclose process to use -the lower I/O level, by using instead open-write-close, which -can detect such syscall failure. - -Signed-off-by: lvying -Signed-off-by: Mauro Carvalho Chehab ---- - ras-page-isolation.c | 25 ++++++++++++++++--------- - 1 file changed, 16 insertions(+), 9 deletions(-) - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index 50e4406..dc07545 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -17,6 +17,9 @@ - #include - #include - #include -+#include -+#include -+#include - #include "ras-logger.h" - #include "ras-page-isolation.h" - -@@ -210,18 +213,22 @@ void ras_page_account_init(void) - - static int do_page_offline(unsigned long long addr, enum otype type) - { -- FILE *offline_file; -- int err; -+ int fd, rc; -+ char buf[20]; - -- offline_file = fopen(kernel_offline[type], "w"); -- if (!offline_file) -+ fd = open(kernel_offline[type], O_WRONLY); -+ if (fd == -1) { -+ log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, kernel_offline[type]); - return -1; -+ } - -- fprintf(offline_file, "%#llx", addr); -- err = ferror(offline_file) ? -1 : 0; -- fclose(offline_file); -- -- return err; -+ sprintf(buf, "%#llx", addr); -+ rc = write(fd, buf, strlen(buf)); -+ if (rc < 0) { -+ log(TERM, LOG_ERR, "page offline addr(%s) by %s failed, errno:%d\n", buf, kernel_offline[type], errno); -+ } -+ close(fd); -+ return rc; - } - - static void page_offline(struct page_record *pr) --- -2.18.4 - diff --git a/backport-0001-rasdaemon-Modify-non-standard-error-decoding-interfa.patch b/backport-0001-rasdaemon-Modify-non-standard-error-decoding-interfa.patch deleted file mode 100644 index 2d864086f9388c86eb15ced419dfc36115395c87..0000000000000000000000000000000000000000 --- a/backport-0001-rasdaemon-Modify-non-standard-error-decoding-interfa.patch +++ /dev/null @@ -1,785 +0,0 @@ -From 1c085f983f01ec09e5b0dd67dbb8b4afa89e7300 Mon Sep 17 00:00:00 2001 -From: Shiju Jose -Date: Mon, 10 Aug 2020 15:42:56 +0100 -Subject: [PATCH] rasdaemon: Modify non-standard error decoding interface using - linked list - -Replace the current non-standard error decoding interface with the -interface based on the linked list to avoid using realloc and -to improve the interface. - -Signed-off-by: Shiju Jose -Signed-off-by: Mauro Carvalho Chehab ---- - non-standard-hisi_hip08.c | 114 +++++++++++++++++----------------- - non-standard-hisilicon.c | 46 +++++++------- - non-standard-hisilicon.h | 4 +- - ras-non-standard-handler.c | 122 ++++++++++++++++++++----------------- - ras-non-standard-handler.h | 13 ++-- - 5 files changed, 155 insertions(+), 144 deletions(-) - -diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c -index 2197f81..ebf03e1 100644 ---- a/non-standard-hisi_hip08.c -+++ b/non-standard-hisi_hip08.c -@@ -528,7 +528,7 @@ static const struct db_table_descriptor hip08_pcie_local_event_tab = { - #endif - - #define IN_RANGE(p, start, end) ((p) >= (start) && (p) < (end)) --static void decode_oem_type1_err_hdr(struct ras_ns_dec_tab *dec_tab, -+static void decode_oem_type1_err_hdr(struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - const struct hisi_oem_type1_err_sec *err) - { -@@ -537,26 +537,26 @@ static void decode_oem_type1_err_hdr(struct ras_ns_dec_tab *dec_tab, - char *end = buf + HISI_BUF_LEN; - - p += snprintf(p, end - p, "[ table_version=%d ", err->version); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE1_FIELD_VERSION, err->version, NULL); - - if (err->val_bits & HISI_OEM_VALID_SOC_ID && IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "SOC_ID=%d ", err->soc_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE1_FIELD_SOC_ID, - err->soc_id, NULL); - } - - if (err->val_bits & HISI_OEM_VALID_SOCKET_ID && IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "socket_ID=%d ", err->socket_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE1_FIELD_SOCKET_ID, - err->socket_id, NULL); - } - - if (err->val_bits & HISI_OEM_VALID_NIMBUS_ID && IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "nimbus_ID=%d ", err->nimbus_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE1_FIELD_NIMBUS_ID, - err->nimbus_id, NULL); - } -@@ -566,7 +566,7 @@ static void decode_oem_type1_err_hdr(struct ras_ns_dec_tab *dec_tab, - err->module_id); - - p += snprintf(p, end - p, "module=%s ", str); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE1_FIELD_MODULE_ID, - 0, str); - } -@@ -578,7 +578,7 @@ static void decode_oem_type1_err_hdr(struct ras_ns_dec_tab *dec_tab, - err->sub_module_id); - - p += snprintf(p, end - p, "submodule=%s ", str); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE1_FIELD_SUB_MODULE_ID, - 0, str); - } -@@ -587,7 +587,7 @@ static void decode_oem_type1_err_hdr(struct ras_ns_dec_tab *dec_tab, - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "error_severity=%s ", - err_severity(err->err_severity)); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE1_FIELD_ERR_SEV, - 0, err_severity(err->err_severity)); - } -@@ -598,7 +598,7 @@ static void decode_oem_type1_err_hdr(struct ras_ns_dec_tab *dec_tab, - trace_seq_printf(s, "%s\n", buf); - } - --static void decode_oem_type1_err_regs(struct ras_ns_dec_tab *dec_tab, -+static void decode_oem_type1_err_regs(struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - const struct hisi_oem_type1_err_sec *err) - { -@@ -649,14 +649,14 @@ static void decode_oem_type1_err_regs(struct ras_ns_dec_tab *dec_tab, - *p = '\0'; - } - -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE1_FIELD_REGS_DUMP, 0, buf); -- step_vendor_data_tab(dec_tab, "hip08_oem_type1_event_tab"); -+ step_vendor_data_tab(ev_decoder, "hip08_oem_type1_event_tab"); - } - - /* error data decoding functions */ - static int decode_hip08_oem_type1_error(struct ras_events *ras, -- struct ras_ns_dec_tab *dec_tab, -+ struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - struct ras_non_standard_event *event) - { -@@ -670,8 +670,8 @@ static int decode_hip08_oem_type1_error(struct ras_events *ras, - } - - #ifdef HAVE_SQLITE3 -- if (!dec_tab->stmt_dec_record) { -- if (ras_mc_add_vendor_table(ras, &dec_tab->stmt_dec_record, -+ if (!ev_decoder->stmt_dec_record) { -+ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, - &hip08_oem_type1_event_tab) - != SQLITE_OK) { - trace_seq_printf(s, -@@ -680,18 +680,18 @@ static int decode_hip08_oem_type1_error(struct ras_events *ras, - } - } - #endif -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE1_FIELD_TIMESTAMP, - 0, event->timestamp); - - trace_seq_printf(s, "\nHISI HIP08: OEM Type-1 Error\n"); -- decode_oem_type1_err_hdr(dec_tab, s, err); -- decode_oem_type1_err_regs(dec_tab, s, err); -+ decode_oem_type1_err_hdr(ev_decoder, s, err); -+ decode_oem_type1_err_regs(ev_decoder, s, err); - - return 0; - } - --static void decode_oem_type2_err_hdr(struct ras_ns_dec_tab *dec_tab, -+static void decode_oem_type2_err_hdr(struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - const struct hisi_oem_type2_err_sec *err) - { -@@ -700,26 +700,26 @@ static void decode_oem_type2_err_hdr(struct ras_ns_dec_tab *dec_tab, - char *end = buf + HISI_BUF_LEN; - - p += snprintf(p, end - p, "[ table_version=%d ", err->version); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE2_FIELD_VERSION, err->version, NULL); - - if (err->val_bits & HISI_OEM_VALID_SOC_ID && IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "SOC_ID=%d ", err->soc_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE2_FIELD_SOC_ID, - err->soc_id, NULL); - } - - if (err->val_bits & HISI_OEM_VALID_SOCKET_ID && IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "socket_ID=%d ", err->socket_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE2_FIELD_SOCKET_ID, - err->socket_id, NULL); - } - - if (err->val_bits & HISI_OEM_VALID_NIMBUS_ID && IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "nimbus_ID=%d ", err->nimbus_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_OEM_TYPE2_FIELD_NIMBUS_ID, - err->nimbus_id, NULL); - } -@@ -729,7 +729,7 @@ static void decode_oem_type2_err_hdr(struct ras_ns_dec_tab *dec_tab, - err->module_id); - - p += snprintf(p, end - p, "module=%s ", str); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE2_FIELD_MODULE_ID, - 0, str); - } -@@ -741,7 +741,7 @@ static void decode_oem_type2_err_hdr(struct ras_ns_dec_tab *dec_tab, - err->sub_module_id); - - p += snprintf(p, end - p, "submodule=%s ", str); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE2_FIELD_SUB_MODULE_ID, - 0, str); - } -@@ -750,7 +750,7 @@ static void decode_oem_type2_err_hdr(struct ras_ns_dec_tab *dec_tab, - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "error_severity=%s ", - err_severity(err->err_severity)); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE2_FIELD_ERR_SEV, - 0, err_severity(err->err_severity)); - } -@@ -761,7 +761,7 @@ static void decode_oem_type2_err_hdr(struct ras_ns_dec_tab *dec_tab, - trace_seq_printf(s, "%s\n", buf); - } - --static void decode_oem_type2_err_regs(struct ras_ns_dec_tab *dec_tab, -+static void decode_oem_type2_err_regs(struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - const struct hisi_oem_type2_err_sec *err) - { -@@ -822,13 +822,13 @@ static void decode_oem_type2_err_regs(struct ras_ns_dec_tab *dec_tab, - *p = '\0'; - } - -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE2_FIELD_REGS_DUMP, 0, buf); -- step_vendor_data_tab(dec_tab, "hip08_oem_type2_event_tab"); -+ step_vendor_data_tab(ev_decoder, "hip08_oem_type2_event_tab"); - } - - static int decode_hip08_oem_type2_error(struct ras_events *ras, -- struct ras_ns_dec_tab *dec_tab, -+ struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - struct ras_non_standard_event *event) - { -@@ -842,8 +842,8 @@ static int decode_hip08_oem_type2_error(struct ras_events *ras, - } - - #ifdef HAVE_SQLITE3 -- if (!dec_tab->stmt_dec_record) { -- if (ras_mc_add_vendor_table(ras, &dec_tab->stmt_dec_record, -+ if (!ev_decoder->stmt_dec_record) { -+ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, - &hip08_oem_type2_event_tab) != SQLITE_OK) { - trace_seq_printf(s, - "create sql hip08_oem_type2_event_tab fail\n"); -@@ -851,18 +851,18 @@ static int decode_hip08_oem_type2_error(struct ras_events *ras, - } - } - #endif -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_OEM_TYPE2_FIELD_TIMESTAMP, - 0, event->timestamp); - - trace_seq_printf(s, "\nHISI HIP08: OEM Type-2 Error\n"); -- decode_oem_type2_err_hdr(dec_tab, s, err); -- decode_oem_type2_err_regs(dec_tab, s, err); -+ decode_oem_type2_err_hdr(ev_decoder, s, err); -+ decode_oem_type2_err_regs(ev_decoder, s, err); - - return 0; - } - --static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, -+static void decode_pcie_local_err_hdr(struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - const struct hisi_pcie_local_err_sec *err) - { -@@ -871,14 +871,14 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - char *end = buf + HISI_BUF_LEN; - - p += snprintf(p, end - p, "[ table_version=%d ", err->version); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_VERSION, - err->version, NULL); - - if (err->val_bits & HISI_PCIE_LOCAL_VALID_SOC_ID && - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "SOC_ID=%d ", err->soc_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_SOC_ID, - err->soc_id, NULL); - } -@@ -886,7 +886,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - if (err->val_bits & HISI_PCIE_LOCAL_VALID_SOCKET_ID && - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "socket_ID=%d ", err->socket_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_SOCKET_ID, - err->socket_id, NULL); - } -@@ -894,7 +894,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - if (err->val_bits & HISI_PCIE_LOCAL_VALID_NIMBUS_ID && - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "nimbus_ID=%d ", err->nimbus_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_NIMBUS_ID, - err->nimbus_id, NULL); - } -@@ -903,7 +903,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "submodule=%s ", - pcie_local_sub_module_name(err->sub_module_id)); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_PCIE_LOCAL_FIELD_SUB_MODULE_ID, - 0, pcie_local_sub_module_name(err->sub_module_id)); - } -@@ -911,7 +911,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - if (err->val_bits & HISI_PCIE_LOCAL_VALID_CORE_ID && - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "core_ID=core%d ", err->core_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_CORE_ID, - err->core_id, NULL); - } -@@ -919,7 +919,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - if (err->val_bits & HISI_PCIE_LOCAL_VALID_PORT_ID && - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "port_ID=port%d ", err->port_id); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_PORT_ID, - err->port_id, NULL); - } -@@ -928,7 +928,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "error_severity=%s ", - err_severity(err->err_severity)); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_PCIE_LOCAL_FIELD_ERR_SEV, - 0, err_severity(err->err_severity)); - } -@@ -936,7 +936,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - if (err->val_bits & HISI_PCIE_LOCAL_VALID_ERR_TYPE && - IN_RANGE(p, buf, end)) { - p += snprintf(p, end - p, "error_type=0x%x ", err->err_type); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_INT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, - HIP08_PCIE_LOCAL_FIELD_ERR_TYPE, - err->err_type, NULL); - } -@@ -947,7 +947,7 @@ static void decode_pcie_local_err_hdr(struct ras_ns_dec_tab *dec_tab, - trace_seq_printf(s, "%s\n", buf); - } - --static void decode_pcie_local_err_regs(struct ras_ns_dec_tab *dec_tab, -+static void decode_pcie_local_err_regs(struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - const struct hisi_pcie_local_err_sec *err) - { -@@ -972,13 +972,13 @@ static void decode_pcie_local_err_regs(struct ras_ns_dec_tab *dec_tab, - *p = '\0'; - } - -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_PCIE_LOCAL_FIELD_REGS_DUMP, 0, buf); -- step_vendor_data_tab(dec_tab, "hip08_pcie_local_event_tab"); -+ step_vendor_data_tab(ev_decoder, "hip08_pcie_local_event_tab"); - } - - static int decode_hip08_pcie_local_error(struct ras_events *ras, -- struct ras_ns_dec_tab *dec_tab, -+ struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - struct ras_non_standard_event *event) - { -@@ -992,8 +992,8 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, - } - - #ifdef HAVE_SQLITE3 -- if (!dec_tab->stmt_dec_record) { -- if (ras_mc_add_vendor_table(ras, &dec_tab->stmt_dec_record, -+ if (!ev_decoder->stmt_dec_record) { -+ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, - &hip08_pcie_local_event_tab) != SQLITE_OK) { - trace_seq_printf(s, - "create sql hip08_pcie_local_event_tab fail\n"); -@@ -1001,18 +1001,18 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, - } - } - #endif -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HIP08_PCIE_LOCAL_FIELD_TIMESTAMP, - 0, event->timestamp); - - trace_seq_printf(s, "\nHISI HIP08: PCIe local error\n"); -- decode_pcie_local_err_hdr(dec_tab, s, err); -- decode_pcie_local_err_regs(dec_tab, s, err); -+ decode_pcie_local_err_hdr(ev_decoder, s, err); -+ decode_pcie_local_err_regs(ev_decoder, s, err); - - return 0; - } - --struct ras_ns_dec_tab hip08_ns_oem_tab[] = { -+static struct ras_ns_ev_decoder hip08_ns_ev_decoder[] = { - { - .sec_type = "1f8161e155d641e6bd107afd1dc5f7c5", - .decode = decode_hip08_oem_type1_error, -@@ -1025,10 +1025,12 @@ struct ras_ns_dec_tab hip08_ns_oem_tab[] = { - .sec_type = "b2889fc9e7d74f9da867af42e98be772", - .decode = decode_hip08_pcie_local_error, - }, -- { /* sentinel */ } - }; - - static void __attribute__((constructor)) hip08_init(void) - { -- register_ns_dec_tab(hip08_ns_oem_tab); -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(hip08_ns_ev_decoder); i++) -+ register_ns_ev_decoder(&hip08_ns_ev_decoder[i]); - } -diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c -index c9e1fa9..a6f5e78 100644 ---- a/non-standard-hisilicon.c -+++ b/non-standard-hisilicon.c -@@ -73,38 +73,38 @@ struct hisi_event { - }; - - #ifdef HAVE_SQLITE3 --void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -+void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, - enum hisi_oem_data_type data_type, - int id, int64_t data, const char *text) - { - switch (data_type) { - case HISI_OEM_DATA_TYPE_INT: -- sqlite3_bind_int(dec_tab->stmt_dec_record, id, data); -+ sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data); - break; - case HISI_OEM_DATA_TYPE_INT64: -- sqlite3_bind_int64(dec_tab->stmt_dec_record, id, data); -+ sqlite3_bind_int64(ev_decoder->stmt_dec_record, id, data); - break; - case HISI_OEM_DATA_TYPE_TEXT: -- sqlite3_bind_text(dec_tab->stmt_dec_record, id, text, -1, NULL); -+ sqlite3_bind_text(ev_decoder->stmt_dec_record, id, text, -1, NULL); - break; - } - } - --int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name) -+int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) - { - int rc; - -- rc = sqlite3_step(dec_tab->stmt_dec_record); -+ rc = sqlite3_step(ev_decoder->stmt_dec_record); - if (rc != SQLITE_OK && rc != SQLITE_DONE) - log(TERM, LOG_ERR, - "Failed to do %s step on sqlite: error = %d\n", name, rc); - -- rc = sqlite3_reset(dec_tab->stmt_dec_record); -+ rc = sqlite3_reset(ev_decoder->stmt_dec_record); - if (rc != SQLITE_OK && rc != SQLITE_DONE) - log(TERM, LOG_ERR, - "Failed to reset %s on sqlite: error = %d\n", name, rc); - -- rc = sqlite3_clear_bindings(dec_tab->stmt_dec_record); -+ rc = sqlite3_clear_bindings(ev_decoder->stmt_dec_record); - if (rc != SQLITE_OK && rc != SQLITE_DONE) - log(TERM, LOG_ERR, - "Failed to clear bindings %s on sqlite: error = %d\n", -@@ -113,12 +113,12 @@ int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name) - return rc; - } - #else --void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -+void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, - enum hisi_oem_data_type data_type, - int id, int64_t data, const char *text) - { } - --int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name) -+int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) - { - return 0; - } -@@ -197,7 +197,7 @@ static void decode_module(struct hisi_event *event, uint8_t module_id) - HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); - } - --static void decode_hisi_common_section_hdr(struct ras_ns_dec_tab *dec_tab, -+static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, - const struct hisi_common_error_section *err, - struct hisi_event *event) - { -@@ -244,7 +244,7 @@ static void decode_hisi_common_section_hdr(struct ras_ns_dec_tab *dec_tab, - } - - static int decode_hisi_common_section(struct ras_events *ras, -- struct ras_ns_dec_tab *dec_tab, -+ struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, - struct ras_non_standard_event *event) - { -@@ -253,8 +253,8 @@ static int decode_hisi_common_section(struct ras_events *ras, - struct hisi_event hevent; - - #ifdef HAVE_SQLITE3 -- if (ras->record_events && !dec_tab->stmt_dec_record) { -- if (ras_mc_add_vendor_table(ras, &dec_tab->stmt_dec_record, -+ if (ras->record_events && !ev_decoder->stmt_dec_record) { -+ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, - &hisi_common_section_tab) != SQLITE_OK) { - trace_seq_printf(s, "create sql hisi_common_section_tab fail\n"); - return -1; -@@ -264,7 +264,7 @@ static int decode_hisi_common_section(struct ras_events *ras, - - memset(&hevent, 0, sizeof(struct hisi_event)); - trace_seq_printf(s, "\nHisilicon Common Error Section:\n"); -- decode_hisi_common_section_hdr(dec_tab, err, &hevent); -+ decode_hisi_common_section_hdr(ev_decoder, err, &hevent); - trace_seq_printf(s, "%s\n", hevent.error_msg); - - if (err->val_bits & BIT(HISI_COMMON_VALID_REG_ARRAY_SIZE) && err->reg_array_size > 0) { -@@ -280,28 +280,30 @@ static int decode_hisi_common_section(struct ras_events *ras, - } - - if (ras->record_events) { -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HISI_COMMON_FIELD_TIMESTAMP, - 0, event->timestamp); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg); -- record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, - HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg); -- step_vendor_data_tab(dec_tab, "hisi_common_section_tab"); -+ step_vendor_data_tab(ev_decoder, "hisi_common_section_tab"); - } - - return 0; - } - --struct ras_ns_dec_tab hisi_section_ns_tab[] = { -+static struct ras_ns_ev_decoder hisi_section_ns_ev_decoder[] = { - { - .sec_type = "c8b328a899174af69a132e08ab2e7586", - .decode = decode_hisi_common_section, - }, -- { /* sentinel */ } - }; - - static void __attribute__((constructor)) hisi_ns_init(void) - { -- register_ns_dec_tab(hisi_section_ns_tab); -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(hisi_section_ns_ev_decoder); i++) -+ register_ns_ev_decoder(&hisi_section_ns_ev_decoder[i]); - } -diff --git a/non-standard-hisilicon.h b/non-standard-hisilicon.h -index 1ce210a..75b911e 100644 ---- a/non-standard-hisilicon.h -+++ b/non-standard-hisilicon.h -@@ -41,9 +41,9 @@ static inline char *err_severity(uint8_t err_sev) - return "unknown"; - } - --void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -+void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, - enum hisi_oem_data_type data_type, - int id, int64_t data, const char *text); --int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name); -+int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name); - - #endif -diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c -index d92fd42..1862335 100644 ---- a/ras-non-standard-handler.c -+++ b/ras-non-standard-handler.c -@@ -22,46 +22,7 @@ - #include "ras-logger.h" - #include "ras-report.h" - --static p_ns_dec_tab * ns_dec_tab; --static size_t dec_tab_count; -- --int register_ns_dec_tab(const p_ns_dec_tab tab) --{ -- ns_dec_tab = (p_ns_dec_tab *)realloc(ns_dec_tab, -- (dec_tab_count + 1) * sizeof(tab)); -- if (ns_dec_tab == NULL) { -- printf("%s p_ns_dec_tab malloc failed", __func__); -- return -1; -- } -- ns_dec_tab[dec_tab_count] = tab; -- dec_tab_count++; -- return 0; --} -- --void unregister_ns_dec_tab(void) --{ -- if (ns_dec_tab) { --#ifdef HAVE_SQLITE3 -- p_ns_dec_tab dec_tab; -- int i, count; -- -- for (count = 0; count < dec_tab_count; count++) { -- dec_tab = ns_dec_tab[count]; -- for (i = 0; dec_tab[i].decode; i++) { -- if (dec_tab[i].stmt_dec_record) { -- ras_mc_finalize_vendor_table( -- dec_tab[i].stmt_dec_record); -- dec_tab[i].stmt_dec_record = NULL; -- } -- } -- } --#endif -- -- free(ns_dec_tab); -- ns_dec_tab = NULL; -- dec_tab_count = 0; -- } --} -+static struct ras_ns_ev_decoder *ras_ns_ev_dec_list; - - void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) { - trace_seq_printf(s, "%02x%02x%02x%02x", buf[index+3], buf[index+2], buf[index+1], buf[index]); -@@ -105,18 +66,75 @@ static int uuid_le_cmp(const char *sec_type, const char *uuid2) - return strncmp(uuid1, uuid2, 32); - } - -+int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) -+{ -+ struct ras_ns_ev_decoder *list; -+ -+ if (!ns_ev_decoder) -+ return -1; -+ -+ ns_ev_decoder->next = NULL; -+ ns_ev_decoder->stmt_dec_record = NULL; -+ if (!ras_ns_ev_dec_list) { -+ ras_ns_ev_dec_list = ns_ev_decoder; -+ } else { -+ list = ras_ns_ev_dec_list; -+ while (list->next) -+ list = list->next; -+ list->next = ns_ev_decoder; -+ } -+ -+ return 0; -+} -+ -+static int find_ns_ev_decoder(const char *sec_type, struct ras_ns_ev_decoder **p_ns_ev_dec) -+{ -+ struct ras_ns_ev_decoder *ns_ev_decoder; -+ int match = 0; -+ -+ ns_ev_decoder = ras_ns_ev_dec_list; -+ while (ns_ev_decoder) { -+ if (uuid_le_cmp(sec_type, ns_ev_decoder->sec_type) == 0) { -+ *p_ns_ev_dec = ns_ev_decoder; -+ match = 1; -+ break; -+ } -+ ns_ev_decoder = ns_ev_decoder->next; -+ } -+ -+ if (!match) -+ return -1; -+ -+ return 0; -+} -+ -+static void unregister_ns_ev_decoder(void) -+{ -+#ifdef HAVE_SQLITE3 -+ struct ras_ns_ev_decoder *ns_ev_decoder = ras_ns_ev_dec_list; -+ -+ while (ns_ev_decoder) { -+ if (ns_ev_decoder->stmt_dec_record) { -+ ras_mc_finalize_vendor_table(ns_ev_decoder->stmt_dec_record); -+ ns_ev_decoder->stmt_dec_record = NULL; -+ } -+ ns_ev_decoder = ns_ev_decoder->next; -+ } -+#endif -+ ras_ns_ev_dec_list = NULL; -+} -+ - int ras_non_standard_event_handler(struct trace_seq *s, - struct pevent_record *record, - struct event_format *event, void *context) - { -- int len, i, line_count, count; -+ int len, i, line_count; - unsigned long long val; - struct ras_events *ras = context; - time_t now; - struct tm *tm; - struct ras_non_standard_event ev; -- p_ns_dec_tab dec_tab; -- bool dec_done = false; -+ struct ras_ns_ev_decoder *ns_ev_decoder; - - /* - * Newer kernels (3.10-rc1 or upper) provide an uptime clock. -@@ -177,19 +195,9 @@ int ras_non_standard_event_handler(struct trace_seq *s, - if(!ev.error) - return -1; - -- for (count = 0; count < dec_tab_count && !dec_done; count++) { -- dec_tab = ns_dec_tab[count]; -- for (i = 0; dec_tab[i].decode; i++) { -- if (uuid_le_cmp(ev.sec_type, -- dec_tab[i].sec_type) == 0) { -- dec_tab[i].decode(ras, &dec_tab[i], s, &ev); -- dec_done = true; -- break; -- } -- } -- } -- -- if (!dec_done) { -+ if (!find_ns_ev_decoder(ev.sec_type, &ns_ev_decoder)) { -+ ns_ev_decoder->decode(ras, ns_ev_decoder, s, &ev); -+ } else { - len = ev.length; - i = 0; - line_count = 0; -@@ -222,5 +230,5 @@ int ras_non_standard_event_handler(struct trace_seq *s, - __attribute__((destructor)) - static void ns_exit(void) - { -- unregister_ns_dec_tab(); -+ unregister_ns_ev_decoder(); - } -diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h -index 2b9bf40..57d4cb5 100644 ---- a/ras-non-standard-handler.h -+++ b/ras-non-standard-handler.h -@@ -20,15 +20,16 @@ - #define BIT(nr) (1UL << (nr)) - #define BIT_ULL(nr) (1ULL << (nr)) - --typedef struct ras_ns_dec_tab { -+struct ras_ns_ev_decoder { -+ struct ras_ns_ev_decoder *next; - const char *sec_type; -- int (*decode)(struct ras_events *ras, struct ras_ns_dec_tab *dec_tab, -+ int (*decode)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder, - struct trace_seq *s, struct ras_non_standard_event *event); - #ifdef HAVE_SQLITE3 - #include - sqlite3_stmt *stmt_dec_record; - #endif --} *p_ns_dec_tab; -+}; - - int ras_non_standard_event_handler(struct trace_seq *s, - struct pevent_record *record, -@@ -37,11 +38,9 @@ int ras_non_standard_event_handler(struct trace_seq *s, - void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index); - - #ifdef HAVE_NON_STANDARD --int register_ns_dec_tab(const p_ns_dec_tab tab); --void unregister_ns_dec_tab(void); -+int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder); - #else --static inline int register_ns_dec_tab(const p_ns_dec_tab tab) { return 0; }; --static inline void unregister_ns_dec_tab(void) { return; }; -+static inline int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) { return 0; }; - #endif - - #endif --- -2.33.0 - diff --git a/backport-0001-rasdaemon-delete-the-duplicate-code-about-the-defini.patch b/backport-0001-rasdaemon-delete-the-duplicate-code-about-the-defini.patch deleted file mode 100644 index b6aba574f8dcf77474e199026cda960d1f8b9e13..0000000000000000000000000000000000000000 --- a/backport-0001-rasdaemon-delete-the-duplicate-code-about-the-defini.patch +++ /dev/null @@ -1,63 +0,0 @@ -From b98880e2cf5fd15e4261676760b719963b956a0e Mon Sep 17 00:00:00 2001 -From: Xiaofei Tan -Date: Mon, 27 Jul 2020 15:38:37 +0800 -Subject: [PATCH 1/3] rasdaemon: delete the duplicate code about the definition - of hip08 DB fields - -Delete the duplicate code about the definition of DB fields for hip08 OEM -event format1 and format2. Because the two OEM event format is the same. - -Signed-off-By: Xiaofei Tan -Signed-off-by: Mauro Carvalho Chehab ---- - non-standard-hisi_hip08.c | 23 +++++------------------ - 1 file changed, 5 insertions(+), 18 deletions(-) - -diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c -index 8bf10c1..7fc6939 100644 ---- a/non-standard-hisi_hip08.c -+++ b/non-standard-hisi_hip08.c -@@ -504,7 +504,7 @@ static char *pcie_local_sub_module_name(uint8_t id) - } - - #ifdef HAVE_SQLITE3 --static const struct db_fields hip08_oem_type1_event_fields[] = { -+static const struct db_fields hip08_oem_event_fields[] = { - { .name = "id", .type = "INTEGER PRIMARY KEY" }, - { .name = "timestamp", .type = "TEXT" }, - { .name = "version", .type = "INTEGER" }, -@@ -519,27 +519,14 @@ static const struct db_fields hip08_oem_type1_event_fields[] = { - - static const struct db_table_descriptor hip08_oem_type1_event_tab = { - .name = "hip08_oem_type1_event_v2", -- .fields = hip08_oem_type1_event_fields, -- .num_fields = ARRAY_SIZE(hip08_oem_type1_event_fields), --}; -- --static const struct db_fields hip08_oem_type2_event_fields[] = { -- { .name = "id", .type = "INTEGER PRIMARY KEY" }, -- { .name = "timestamp", .type = "TEXT" }, -- { .name = "version", .type = "INTEGER" }, -- { .name = "soc_id", .type = "INTEGER" }, -- { .name = "socket_id", .type = "INTEGER" }, -- { .name = "nimbus_id", .type = "INTEGER" }, -- { .name = "module_id", .type = "TEXT" }, -- { .name = "sub_module_id", .type = "TEXT" }, -- { .name = "err_severity", .type = "TEXT" }, -- { .name = "regs_dump", .type = "TEXT" }, -+ .fields = hip08_oem_event_fields, -+ .num_fields = ARRAY_SIZE(hip08_oem_event_fields), - }; - - static const struct db_table_descriptor hip08_oem_type2_event_tab = { - .name = "hip08_oem_type2_event_v2", -- .fields = hip08_oem_type2_event_fields, -- .num_fields = ARRAY_SIZE(hip08_oem_type2_event_fields), -+ .fields = hip08_oem_event_fields, -+ .num_fields = ARRAY_SIZE(hip08_oem_event_fields), - }; - - static const struct db_fields hip08_pcie_local_event_fields[] = { --- -2.7.4 - diff --git a/backport-0002-ras-page-isolation-page-which-is-PAGE_OFFLINE_FAILED.patch b/backport-0002-ras-page-isolation-page-which-is-PAGE_OFFLINE_FAILED.patch deleted file mode 100644 index 724dc9f070eae94f695aac3c351895dc0e35bc69..0000000000000000000000000000000000000000 --- a/backport-0002-ras-page-isolation-page-which-is-PAGE_OFFLINE_FAILED.patch +++ /dev/null @@ -1,44 +0,0 @@ -From c329012ce4b44af08217f2a8f2b3b9b1b4b1c0d3 Mon Sep 17 00:00:00 2001 -From: lvying6 -Date: Sat, 31 Oct 2020 17:57:15 +0800 -Subject: [PATCH 2/2] ras-page-isolation: page which is PAGE_OFFLINE_FAILED can - be offlined again - -OS may fail to offline page at the previous time. After some time, -this page's state changed, and the page can be offlined by OS. -At this time, Correctable errors on this page reached the threshold. -Rasdaemon should trigger to offline this page again. - -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab ---- - ras-page-isolation.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index dc07545..fd7bd70 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -237,12 +237,17 @@ static void page_offline(struct page_record *pr) - int ret; - - /* Offlining page is not required */ -- if (offline <= OFFLINE_ACCOUNT) -+ if (offline <= OFFLINE_ACCOUNT) { -+ log(TERM, LOG_INFO, "PAGE_CE_ACTION=%s, ignore to offline page at %#llx\n", -+ offline_choice[offline].name, addr); - return; -+ } - - /* Ignore offlined pages */ -- if (pr->offlined != PAGE_ONLINE) -+ if (pr->offlined == PAGE_OFFLINE) { -+ log(TERM, LOG_INFO, "page at %#llx is already offlined, ignore\n", addr); - return; -+ } - - /* Time to silence this noisy page */ - if (offline == OFFLINE_SOFT_THEN_HARD) { --- -2.18.4 - diff --git a/backport-0002-rasdaemon-delete-the-code-of-non-standard-error-deco.patch b/backport-0002-rasdaemon-delete-the-code-of-non-standard-error-deco.patch deleted file mode 100644 index 3a22ead14ee8f2177da3d4c5918ac18e338cb0bf..0000000000000000000000000000000000000000 --- a/backport-0002-rasdaemon-delete-the-code-of-non-standard-error-deco.patch +++ /dev/null @@ -1,190 +0,0 @@ -From 6ee76565274f31052868e970bce8768c314f6bb7 Mon Sep 17 00:00:00 2001 -From: Xiaofei Tan -Date: Mon, 27 Jul 2020 15:38:38 +0800 -Subject: [PATCH 2/3] rasdaemon: delete the code of non-standard error decoder - for hip07 - -Delete the code of non-standard error decoder for hip07 that was never -used. Because the corresponding code in Linux kernel wasn't accepted. - -Signed-off-by: Xiaofei Tan -Signed-off-by: Mauro Carvalho Chehab ---- - Makefile.am | 2 +- - non-standard-hisi_hip07.c | 151 ---------------------------------------------- - 2 files changed, 1 insertion(+), 152 deletions(-) - delete mode 100644 non-standard-hisi_hip07.c - -diff --git a/Makefile.am b/Makefile.am -index 51ef4de..23b4d60 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -52,7 +52,7 @@ if WITH_ABRT_REPORT - rasdaemon_SOURCES += ras-report.c - endif - if WITH_HISI_NS_DECODE -- rasdaemon_SOURCES += non-standard-hisi_hip07.c non-standard-hisi_hip08.c -+ rasdaemon_SOURCES += non-standard-hisi_hip08.c - endif - if WITH_MEMORY_CE_PFA - rasdaemon_SOURCES += rbtree.c ras-page-isolation.c -diff --git a/non-standard-hisi_hip07.c b/non-standard-hisi_hip07.c -deleted file mode 100644 -index 09ddcb2..0000000 ---- a/non-standard-hisi_hip07.c -+++ /dev/null -@@ -1,151 +0,0 @@ --/* -- * Copyright (c) 2017 Hisilicon Limited. -- * -- * This program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2 of the License, or -- * (at your option) any later version. -- * -- */ -- --#include --#include --#include --#include "ras-record.h" --#include "ras-logger.h" --#include "ras-report.h" --#include "ras-non-standard-handler.h" -- --/* common definitions */ -- --/* HISI SAS definitions */ --#define HISI_SAS_VALID_PA BIT(0) --#define HISI_SAS_VALID_MB_ERR BIT(1) --#define HISI_SAS_VALID_ERR_TYPE BIT(2) --#define HISI_SAS_VALID_AXI_ERR_INFO BIT(3) -- --struct hisi_sas_err_sec { -- uint64_t val_bits; -- uint64_t physical_addr; -- uint32_t mb; -- uint32_t type; -- uint32_t axi_err_info; --}; -- --/* Common Functions */ --static char *err_bit_type(int etype) --{ -- switch (etype) { -- case 0x0: return "single-bit ecc"; -- case 0x1: return "multi-bit ecc"; -- } -- return "unknown error"; --} -- --/* SAS Functions */ --static char *sas_err_type(int etype) --{ -- switch (etype) { -- case 0x0001: return "hgc_dqe ecc"; -- case 0x0002: return "hgc_iost ecc"; -- case 0x0004: return "hgc_itct ecc"; -- case 0x0008: return "hgc_iostl ecc"; -- case 0x0010: return "hgc_itctl ecc"; -- case 0x0020: return "hgc_cqe ecc"; -- case 0x0040: return "rxm_mem0 ecc"; -- case 0x0080: return "rxm_mem1 ecc"; -- case 0x0100: return "rxm_mem2 ecc"; -- case 0x0200: return "rxm_mem3 ecc"; -- case 0x0400: return "wp_depth"; -- case 0x0800: return "iptt_slot_no_match"; -- case 0x1000: return "rp_depth"; -- case 0x2000: return "axi err"; -- case 0x4000: return "fifo err"; -- case 0x8000: return "lm_add_fetch_list"; -- case 0x10000: return "hgc_abt_fetch_lm"; -- } -- return "unknown error"; --} -- --static char *sas_axi_err_type(int etype) --{ -- switch (etype) { -- case 0x0001: return "IOST_AXI_W_ERR"; -- case 0x0002: return "IOST_AXI_R_ERR"; -- case 0x0004: return "ITCT_AXI_W_ERR"; -- case 0x0008: return "ITCT_AXI_R_ERR"; -- case 0x0010: return "SATA_AXI_W_ERR"; -- case 0x0020: return "SATA_AXI_R_ERR"; -- case 0x0040: return "DQE_AXI_R_ERR"; -- case 0x0080: return "CQE_AXI_W_ERR"; -- case 0x0100: return "CQE_WINFO_FIFO"; -- case 0x0200: return "CQE_MSG_FIFIO"; -- case 0x0400: return "GETDQE_FIFO"; -- case 0x0800: return "CMDP_FIFO"; -- case 0x1000: return "AWTCTRL_FIFO"; -- } -- return "unknown error"; --} -- --static int decode_hip07_sas_error(struct ras_events *ras, -- struct ras_ns_dec_tab *dec_tab, -- struct trace_seq *s, -- struct ras_non_standard_event *event) --{ -- char buf[1024]; -- char *p = buf; -- const struct hisi_sas_err_sec *err = -- (struct hisi_sas_err_sec *)event->error; -- -- if (err->val_bits == 0) { -- trace_seq_printf(s, "%s: no valid error data\n", -- __func__); -- return -1; -- } -- p += sprintf(p, "["); -- if (err->val_bits & HISI_SAS_VALID_PA) -- p += sprintf(p, "phy addr = 0x%p: ", -- (void *)err->physical_addr); -- -- if (err->val_bits & HISI_SAS_VALID_MB_ERR) -- p += sprintf(p, "%s: ", err_bit_type(err->mb)); -- -- if (err->val_bits & HISI_SAS_VALID_ERR_TYPE) -- p += sprintf(p, "error type = %s: ", -- sas_err_type(err->type)); -- -- if (err->val_bits & HISI_SAS_VALID_AXI_ERR_INFO) -- p += sprintf(p, "axi error type = %s", -- sas_axi_err_type(err->axi_err_info)); -- -- p += sprintf(p, "]"); -- -- trace_seq_printf(s, "\nHISI HIP07: SAS error: %s\n", buf); -- return 0; --} -- --static int decode_hip07_hns_error(struct ras_events *ras, -- struct ras_ns_dec_tab *dec_tab, -- struct trace_seq *s, -- struct ras_non_standard_event *event) --{ -- return 0; --} -- --struct ras_ns_dec_tab hisi_ns_dec_tab[] = { -- { -- .sec_type = "daffd8146eba4d8c8a91bc9bbf4aa301", -- .decode = decode_hip07_sas_error, -- }, -- { -- .sec_type = "fbc2d923ea7a453dab132949f5af9e53", -- .decode = decode_hip07_hns_error, -- }, -- { /* sentinel */ } --}; -- --__attribute__((constructor)) --static void hip07_init(void) --{ -- register_ns_dec_tab(hisi_ns_dec_tab); --} --- -2.7.4 - diff --git a/backport-0003-rasdaemon-add-support-for-hisilicon-common-section-d.patch b/backport-0003-rasdaemon-add-support-for-hisilicon-common-section-d.patch deleted file mode 100644 index 7eaa3f38ddc5d7ba1a3ce25b12cf0b03bdaaef5e..0000000000000000000000000000000000000000 --- a/backport-0003-rasdaemon-add-support-for-hisilicon-common-section-d.patch +++ /dev/null @@ -1,527 +0,0 @@ -From 8c30a852493a6204ded59872bb3a0f0e43537713 Mon Sep 17 00:00:00 2001 -From: Xiaofei Tan -Date: Mon, 27 Jul 2020 15:38:39 +0800 -Subject: [PATCH 3/3] rasdaemon: add support for hisilicon common section - decoder - -Add a new non-standard error section, Hisilicon common section. -It is defined for the next generation SoC Kunpeng930. It also supports -Kunpeng920 and some modules of Kunpeng920 could be changed to use -this section. - -We put the code to an new source file, as it supports multiple Hardware -platform. Some code of hip08 could be shared. Move them to this new file. - -Signed-off-by: Xiaofei Tan -Signed-off-by: Mauro Carvalho Chehab ---- - Makefile.am | 2 +- - non-standard-hisi_hip08.c | 79 +----------- - non-standard-hisilicon.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++ - non-standard-hisilicon.h | 49 ++++++++ - 4 files changed, 358 insertions(+), 79 deletions(-) - create mode 100644 non-standard-hisilicon.c - create mode 100644 non-standard-hisilicon.h - -diff --git a/Makefile.am b/Makefile.am -index 23b4d60..18d1a92 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -52,7 +52,7 @@ if WITH_ABRT_REPORT - rasdaemon_SOURCES += ras-report.c - endif - if WITH_HISI_NS_DECODE -- rasdaemon_SOURCES += non-standard-hisi_hip08.c -+ rasdaemon_SOURCES += non-standard-hisi_hip08.c non-standard-hisilicon.c - endif - if WITH_MEMORY_CE_PFA - rasdaemon_SOURCES += rbtree.c ras-page-isolation.c -diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c -index 7fc6939..2197f81 100644 ---- a/non-standard-hisi_hip08.c -+++ b/non-standard-hisi_hip08.c -@@ -15,6 +15,7 @@ - #include "ras-logger.h" - #include "ras-report.h" - #include "ras-non-standard-handler.h" -+#include "non-standard-hisilicon.h" - - /* HISI OEM error definitions */ - /* HISI OEM format1 error definitions */ -@@ -83,11 +84,6 @@ - #define HISI_PCIE_LOCAL_ERR_MISC_MAX 33 - #define HISI_BUF_LEN 1024 - --#define HISI_ERR_SEVERITY_NFE 0 --#define HISI_ERR_SEVERITY_FE 1 --#define HISI_ERR_SEVERITY_CE 2 --#define HISI_ERR_SEVERITY_NONE 3 -- - struct hisi_oem_type1_err_sec { - uint32_t val_bits; - uint8_t version; -@@ -145,12 +141,6 @@ struct hisi_pcie_local_err_sec { - uint32_t err_misc[HISI_PCIE_LOCAL_ERR_MISC_MAX]; - }; - --enum hisi_oem_data_type { -- HISI_OEM_DATA_TYPE_INT, -- HISI_OEM_DATA_TYPE_INT64, -- HISI_OEM_DATA_TYPE_TEXT, --}; -- - enum { - HIP08_OEM_TYPE1_FIELD_ID, - HIP08_OEM_TYPE1_FIELD_TIMESTAMP, -@@ -199,20 +189,6 @@ struct hisi_module_info { - int sub_num; - }; - --/* helper functions */ --static char *err_severity(uint8_t err_sev) --{ -- switch (err_sev) { -- case HISI_ERR_SEVERITY_NFE: return "recoverable"; -- case HISI_ERR_SEVERITY_FE: return "fatal"; -- case HISI_ERR_SEVERITY_CE: return "corrected"; -- case HISI_ERR_SEVERITY_NONE: return "none"; -- default: -- break; -- } -- return "unknown"; --} -- - static const char *pll_submodule_name[] = { - "TB_PLL0", - "TB_PLL1", -@@ -549,59 +525,6 @@ static const struct db_table_descriptor hip08_pcie_local_event_tab = { - .fields = hip08_pcie_local_event_fields, - .num_fields = ARRAY_SIZE(hip08_pcie_local_event_fields), - }; -- --static void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -- enum hisi_oem_data_type data_type, -- int id, int64_t data, const char *text) --{ -- switch (data_type) { -- case HISI_OEM_DATA_TYPE_INT: -- sqlite3_bind_int(dec_tab->stmt_dec_record, id, data); -- break; -- case HISI_OEM_DATA_TYPE_INT64: -- sqlite3_bind_int64(dec_tab->stmt_dec_record, id, data); -- break; -- case HISI_OEM_DATA_TYPE_TEXT: -- sqlite3_bind_text(dec_tab->stmt_dec_record, id, text, -1, NULL); -- break; -- default: -- break; -- } --} -- --static int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, -- const char *name) --{ -- int rc; -- -- rc = sqlite3_step(dec_tab->stmt_dec_record); -- if (rc != SQLITE_OK && rc != SQLITE_DONE) -- log(TERM, LOG_ERR, -- "Failed to do %s step on sqlite: error = %d\n", name, rc); -- -- rc = sqlite3_reset(dec_tab->stmt_dec_record); -- if (rc != SQLITE_OK && rc != SQLITE_DONE) -- log(TERM, LOG_ERR, -- "Failed to reset %s on sqlite: error = %d\n", name, rc); -- -- rc = sqlite3_clear_bindings(dec_tab->stmt_dec_record); -- if (rc != SQLITE_OK && rc != SQLITE_DONE) -- log(TERM, LOG_ERR, -- "Failed to clear bindings %s on sqlite: error = %d\n", -- name, rc); -- -- return rc; --} --#else --static void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -- enum hisi_oem_data_type data_type, -- int id, int64_t data, const char *text) --{ } -- --static int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, char *name) --{ -- return 0; --} - #endif - - #define IN_RANGE(p, start, end) ((p) >= (start) && (p) < (end)) -diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c -new file mode 100644 -index 0000000..c9e1fa9 ---- /dev/null -+++ b/non-standard-hisilicon.c -@@ -0,0 +1,307 @@ -+/* -+ * Copyright (c) 2020 Hisilicon Limited. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ */ -+ -+#include -+#include -+#include -+#include "ras-record.h" -+#include "ras-logger.h" -+#include "ras-report.h" -+#include "non-standard-hisilicon.h" -+ -+#define HISI_BUF_LEN 2048 -+ -+struct hisi_common_error_section { -+ uint32_t val_bits; -+ uint8_t version; -+ uint8_t soc_id; -+ uint8_t socket_id; -+ uint8_t totem_id; -+ uint8_t nimbus_id; -+ uint8_t subsystem_id; -+ uint8_t module_id; -+ uint8_t submodule_id; -+ uint8_t core_id; -+ uint8_t port_id; -+ uint16_t err_type; -+ struct { -+ uint8_t function; -+ uint8_t device; -+ uint16_t segment; -+ uint8_t bus; -+ uint8_t reserved[3]; -+ } pcie_info; -+ uint8_t err_severity; -+ uint8_t reserved[3]; -+ uint32_t reg_array_size; -+ uint32_t reg_array[]; -+}; -+ -+enum { -+ HISI_COMMON_VALID_SOC_ID, -+ HISI_COMMON_VALID_SOCKET_ID, -+ HISI_COMMON_VALID_TOTEM_ID, -+ HISI_COMMON_VALID_NIMBUS_ID, -+ HISI_COMMON_VALID_SUBSYSTEM_ID, -+ HISI_COMMON_VALID_MODULE_ID, -+ HISI_COMMON_VALID_SUBMODULE_ID, -+ HISI_COMMON_VALID_CORE_ID, -+ HISI_COMMON_VALID_PORT_ID, -+ HISI_COMMON_VALID_ERR_TYPE, -+ HISI_COMMON_VALID_PCIE_INFO, -+ HISI_COMMON_VALID_ERR_SEVERITY, -+ HISI_COMMON_VALID_REG_ARRAY_SIZE, -+}; -+ -+enum { -+ HISI_COMMON_FIELD_ID, -+ HISI_COMMON_FIELD_TIMESTAMP, -+ HISI_COMMON_FIELD_ERR_INFO, -+ HISI_COMMON_FIELD_REGS_DUMP, -+}; -+ -+struct hisi_event { -+ char error_msg[HISI_BUF_LEN]; -+ char reg_msg[HISI_BUF_LEN]; -+}; -+ -+#ifdef HAVE_SQLITE3 -+void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -+ enum hisi_oem_data_type data_type, -+ int id, int64_t data, const char *text) -+{ -+ switch (data_type) { -+ case HISI_OEM_DATA_TYPE_INT: -+ sqlite3_bind_int(dec_tab->stmt_dec_record, id, data); -+ break; -+ case HISI_OEM_DATA_TYPE_INT64: -+ sqlite3_bind_int64(dec_tab->stmt_dec_record, id, data); -+ break; -+ case HISI_OEM_DATA_TYPE_TEXT: -+ sqlite3_bind_text(dec_tab->stmt_dec_record, id, text, -1, NULL); -+ break; -+ } -+} -+ -+int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name) -+{ -+ int rc; -+ -+ rc = sqlite3_step(dec_tab->stmt_dec_record); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed to do %s step on sqlite: error = %d\n", name, rc); -+ -+ rc = sqlite3_reset(dec_tab->stmt_dec_record); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed to reset %s on sqlite: error = %d\n", name, rc); -+ -+ rc = sqlite3_clear_bindings(dec_tab->stmt_dec_record); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed to clear bindings %s on sqlite: error = %d\n", -+ name, rc); -+ -+ return rc; -+} -+#else -+void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -+ enum hisi_oem_data_type data_type, -+ int id, int64_t data, const char *text) -+{ } -+ -+int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name) -+{ -+ return 0; -+} -+#endif -+ -+#ifdef HAVE_SQLITE3 -+static const struct db_fields hisi_common_section_fields[] = { -+ { .name = "id", .type = "INTEGER PRIMARY KEY" }, -+ { .name = "timestamp", .type = "TEXT" }, -+ { .name = "err_info", .type = "TEXT" }, -+ { .name = "regs_dump", .type = "TEXT" }, -+}; -+ -+static const struct db_table_descriptor hisi_common_section_tab = { -+ .name = "hisi_common_section", -+ .fields = hisi_common_section_fields, -+ .num_fields = ARRAY_SIZE(hisi_common_section_fields), -+}; -+#endif -+ -+static const char* soc_desc[] = { -+ "Kunpeng916", -+ "Kunpeng920", -+ "Kunpeng930", -+}; -+ -+static const char* module_name[] = { -+ "MN", -+ "PLL", -+ "SLLC", -+ "AA", -+ "SIOE", -+ "POE", -+ "CPA", -+ "DISP", -+ "GIC", -+ "ITS", -+ "AVSBUS", -+ "CS", -+ "PPU", -+ "SMMU", -+ "PA", -+ "HLLC", -+ "DDRC", -+ "L3TAG", -+ "L3DATA", -+ "PCS", -+ "MATA", -+ "PCIe Local", -+ "SAS", -+ "SATA", -+ "NIC", -+ "RoCE", -+ "USB", -+ "ZIP", -+ "HPRE", -+ "SEC", -+ "RDE", -+ "MEE", -+ "HHA", -+}; -+ -+static const char* get_soc_desc(uint8_t soc_id) -+{ -+ if (soc_id >= sizeof(soc_desc)/sizeof(char *)) -+ return "unknown"; -+ -+ return soc_desc[soc_id]; -+} -+ -+static void decode_module(struct hisi_event *event, uint8_t module_id) -+{ -+ if (module_id >= sizeof(module_name)/sizeof(char *)) -+ HISI_SNPRINTF(event->error_msg, "module=unknown(id=%d) ", module_id); -+ else -+ HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); -+} -+ -+static void decode_hisi_common_section_hdr(struct ras_ns_dec_tab *dec_tab, -+ const struct hisi_common_error_section *err, -+ struct hisi_event *event) -+{ -+ HISI_SNPRINTF(event->error_msg, "[ table_version=%d", err->version); -+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) -+ HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id)); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) -+ HISI_SNPRINTF(event->error_msg, "socket_id=%d", err->socket_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) -+ HISI_SNPRINTF(event->error_msg, "totem_id=%d", err->totem_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) -+ HISI_SNPRINTF(event->error_msg, "nimbus_id=%d", err->nimbus_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) -+ HISI_SNPRINTF(event->error_msg, "subsystem_id=%d", err->subsystem_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID)) -+ decode_module(event, err->module_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) -+ HISI_SNPRINTF(event->error_msg, "submodule_id=%d", err->submodule_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) -+ HISI_SNPRINTF(event->error_msg, "core_id=%d", err->core_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) -+ HISI_SNPRINTF(event->error_msg, "port_id=%d", err->port_id); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) -+ HISI_SNPRINTF(event->error_msg, "err_type=%d", err->err_type); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) -+ HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x", -+ err->pcie_info.segment, err->pcie_info.bus, -+ err->pcie_info.device, err->pcie_info.function); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) -+ HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity)); -+ -+ HISI_SNPRINTF(event->error_msg, "]"); -+} -+ -+static int decode_hisi_common_section(struct ras_events *ras, -+ struct ras_ns_dec_tab *dec_tab, -+ struct trace_seq *s, -+ struct ras_non_standard_event *event) -+{ -+ const struct hisi_common_error_section *err = -+ (struct hisi_common_error_section *)event->error; -+ struct hisi_event hevent; -+ -+#ifdef HAVE_SQLITE3 -+ if (ras->record_events && !dec_tab->stmt_dec_record) { -+ if (ras_mc_add_vendor_table(ras, &dec_tab->stmt_dec_record, -+ &hisi_common_section_tab) != SQLITE_OK) { -+ trace_seq_printf(s, "create sql hisi_common_section_tab fail\n"); -+ return -1; -+ } -+ } -+#endif -+ -+ memset(&hevent, 0, sizeof(struct hisi_event)); -+ trace_seq_printf(s, "\nHisilicon Common Error Section:\n"); -+ decode_hisi_common_section_hdr(dec_tab, err, &hevent); -+ trace_seq_printf(s, "%s\n", hevent.error_msg); -+ -+ if (err->val_bits & BIT(HISI_COMMON_VALID_REG_ARRAY_SIZE) && err->reg_array_size > 0) { -+ int i; -+ -+ trace_seq_printf(s, "Register Dump:\n"); -+ for (i = 0; i < err->reg_array_size / sizeof(uint32_t); i++) { -+ trace_seq_printf(s, "reg%02d=0x%08x\n", i, -+ err->reg_array[i]); -+ HISI_SNPRINTF(hevent.reg_msg, "reg%02d=0x%08x", -+ i, err->reg_array[i]); -+ } -+ } -+ -+ if (ras->record_events) { -+ record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ HISI_COMMON_FIELD_TIMESTAMP, -+ 0, event->timestamp); -+ record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg); -+ record_vendor_data(dec_tab, HISI_OEM_DATA_TYPE_TEXT, -+ HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg); -+ step_vendor_data_tab(dec_tab, "hisi_common_section_tab"); -+ } -+ -+ return 0; -+} -+ -+struct ras_ns_dec_tab hisi_section_ns_tab[] = { -+ { -+ .sec_type = "c8b328a899174af69a132e08ab2e7586", -+ .decode = decode_hisi_common_section, -+ }, -+ { /* sentinel */ } -+}; -+ -+static void __attribute__((constructor)) hisi_ns_init(void) -+{ -+ register_ns_dec_tab(hisi_section_ns_tab); -+} -diff --git a/non-standard-hisilicon.h b/non-standard-hisilicon.h -new file mode 100644 -index 0000000..1ce210a ---- /dev/null -+++ b/non-standard-hisilicon.h -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (c) 2020 Hisilicon Limited. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ */ -+ -+#ifndef __NON_STANDARD_HISILICON_H -+#define __NON_STANDARD_HISILICON_H -+ -+#include "ras-non-standard-handler.h" -+#include "ras-mc-handler.h" -+ -+#define HISI_SNPRINTF mce_snprintf -+ -+#define HISI_ERR_SEVERITY_NFE 0 -+#define HISI_ERR_SEVERITY_FE 1 -+#define HISI_ERR_SEVERITY_CE 2 -+#define HISI_ERR_SEVERITY_NONE 3 -+ -+enum hisi_oem_data_type { -+ HISI_OEM_DATA_TYPE_INT, -+ HISI_OEM_DATA_TYPE_INT64, -+ HISI_OEM_DATA_TYPE_TEXT, -+}; -+ -+/* helper functions */ -+static inline char *err_severity(uint8_t err_sev) -+{ -+ switch (err_sev) { -+ case HISI_ERR_SEVERITY_NFE: return "recoverable"; -+ case HISI_ERR_SEVERITY_FE: return "fatal"; -+ case HISI_ERR_SEVERITY_CE: return "corrected"; -+ case HISI_ERR_SEVERITY_NONE: return "none"; -+ default: -+ break; -+ } -+ return "unknown"; -+} -+ -+void record_vendor_data(struct ras_ns_dec_tab *dec_tab, -+ enum hisi_oem_data_type data_type, -+ int id, int64_t data, const char *text); -+int step_vendor_data_tab(struct ras_ns_dec_tab *dec_tab, const char *name); -+ -+#endif --- -2.7.4 - diff --git a/backport-configure.ac-fix-SYSCONFDEFDIR-default-value.patch b/backport-configure.ac-fix-SYSCONFDEFDIR-default-value.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4ba376f865f15e0bab4a52f2444f8a1367954ac --- /dev/null +++ b/backport-configure.ac-fix-SYSCONFDEFDIR-default-value.patch @@ -0,0 +1,37 @@ +From 1ff5f3d2a0fcd48add9462567c30fe0e14585fb4 Mon Sep 17 00:00:00 2001 +From: Matt Whitlock +Date: Wed, 9 Jun 2021 10:25:18 -0400 +Subject: [PATCH] configure.ac: fix SYSCONFDEFDIR default value + +configure.ac was using AC_ARG_WITH incorrectly, yielding a generated configure script like: + + # Check whether --with-sysconfdefdir was given. + if test "${with_sysconfdefdir+set}" = set; then : + withval=$with_sysconfdefdir; SYSCONFDEFDIR=$withval + else + "/etc/sysconfig" + fi + +This commit fixes the default case so that the SYSCONFDEFDIR variable is assigned the value "/etc/sysconfig" rather than trying to execute "/etc/sysconfig" as a command. + +Signed-off-by: Mauro Carvalho Chehab +--- + configure.ac | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/configure.ac b/configure.ac +index f7d1947..33b81fe 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -172,7 +172,7 @@ AC_SUBST([RASSTATEDIR]) + AC_ARG_WITH(sysconfdefdir, + AC_HELP_STRING([--with-sysconfdefdir=DIR], [rasdaemon environment file dir]), + [SYSCONFDEFDIR=$withval], +- ["/etc/sysconfig"]) ++ [SYSCONFDEFDIR=/etc/sysconfig]) + AC_SUBST([SYSCONFDEFDIR]) + + AC_DEFINE([RAS_DB_FNAME], ["ras-mc_event.db"], [ras events database]) +-- +2.27.0 + diff --git a/backport-rasdaemon-Fix-error-print.patch b/backport-rasdaemon-Fix-error-print.patch deleted file mode 100644 index 6e315ba2a9e4d154ac6842c1e76f200eef9bf9b3..0000000000000000000000000000000000000000 --- a/backport-rasdaemon-Fix-error-print.patch +++ /dev/null @@ -1,29 +0,0 @@ -From 00115dda854f4a50681ccc6c017daa991234411b Mon Sep 17 00:00:00 2001 -From: Liguang Zhang -Date: Mon, 10 Aug 2020 11:07:43 +0800 -Subject: [PATCH] rasdaemon: Fix error print - -Fix error print handle_ras_events. - -Signed-off-by: Liguang Zhang -Signed-off-by: Mauro Carvalho Chehab ---- - ras-events.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/ras-events.c b/ras-events.c -index a99fd29..c797b20 100644 ---- a/ras-events.c -+++ b/ras-events.c -@@ -874,7 +874,7 @@ int handle_ras_events(int record_events) - num_events++; - } else - log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", -- "ras", "aer_event"); -+ "ras", "extlog_mem_event"); - #endif - - #ifdef HAVE_DEVLINK --- -2.18.4 - diff --git a/backport-rasdaemon-add-support-for-memory_failure-events.patch b/backport-rasdaemon-add-support-for-memory_failure-events.patch deleted file mode 100644 index c67cd43afc673ed5c901551f9c87f6cb5a78e683..0000000000000000000000000000000000000000 --- a/backport-rasdaemon-add-support-for-memory_failure-events.patch +++ /dev/null @@ -1,634 +0,0 @@ -From efb2a994b1e24c1e6645ec0dee27d8b3a7deae92 Mon Sep 17 00:00:00 2001 -From: Xiaofei Tan -Date: Tue, 30 Nov 2021 19:50:06 +0800 -Subject: [PATCH] rasdaemon: add support for memory_failure events - -Add support to log the memory_failure kernel trace -events. - -Example rasdaemon log and SQLite DB output for the -memory_failure event, -================================================= -rasdaemon: memory_failure_event store: 0x126ce8f8 -rasdaemon: register inserted at db -<...>-785 [000] 0.000024: memory_failure_event: 2020-10-02 -13:27:13 -0400 pfn=0x204000000 page_type=free buddy page -action_result=Delayed - -CREATE TABLE memory_failure_event (id INTEGER PRIMARY KEY, timestamp -TEXT, pfn TEXT, page_type TEXT, action_result TEXT); -INSERT INTO memory_failure_event VALUES(1,'2020-10-02 13:27:13 --0400','0x204000000','free buddy page','Delayed'); -================================================== - -Signed-off-by: Shiju Jose -Signed-off-by: Mauro Carvalho Chehab ---- - .travis.yml | 2 +- - Makefile.am | 5 +- - configure.ac | 11 +++ - ras-events.c | 15 +++ - ras-events.h | 1 + - ras-memory-failure-handler.c | 179 +++++++++++++++++++++++++++++++++++ - ras-memory-failure-handler.h | 25 +++++ - ras-record.c | 70 ++++++++++++++ - ras-record.h | 13 +++ - ras-report.c | 68 +++++++++++++ - ras-report.h | 2 + - 11 files changed, 389 insertions(+), 2 deletions(-) - create mode 100644 ras-memory-failure-handler.c - create mode 100644 ras-memory-failure-handler.h - -diff --git a/.travis.yml b/.travis.yml -index 5ab3957..b865e1d 100644 ---- a/.travis.yml -+++ b/.travis.yml -@@ -20,7 +20,7 @@ before_install: - - sudo apt-get install -y sqlite3 - install: - - autoreconf -vfi --- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa --enable-cpu-fault-isolation -+- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa --enable-cpu-fault-isolation --enable-memory-failure - - script: - - make && sudo make install -diff --git a/Makefile.am b/Makefile.am -index 61dc2cc..a032352 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -48,6 +48,9 @@ endif - if WITH_DISKERROR - rasdaemon_SOURCES += ras-diskerror-handler.c - endif -+if WITH_MEMORY_FAILURE -+ rasdaemon_SOURCES += ras-memory-failure-handler.c -+endif - if WITH_ABRT_REPORT - rasdaemon_SOURCES += ras-report.c - endif -@@ -66,7 +69,7 @@ include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ - ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ - ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ -- ras-cpu-isolation.h queue.h -+ ras-cpu-isolation.h queue.h non-standard-hisilicon.h ras-memory-failure-handler.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that -diff --git a/configure.ac b/configure.ac -index a682bb9..fd67be8 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -111,6 +111,16 @@ AS_IF([test "x$enable_diskerror" = "xyes" || test "x$enable_all" == "xyes"], [ - AM_CONDITIONAL([WITH_DISKERROR], [test x$enable_diskerror = xyes || test x$enable_all == xyes]) - AM_COND_IF([WITH_DISKERROR], [USE_DISKERROR="yes"], [USE_DISKERROR="no"]) - -+AC_ARG_ENABLE([memory_failure], -+ AS_HELP_STRING([--enable-memory-failure], [enable memory failure events (currently experimental)])) -+ -+AS_IF([test "x$enable_memory_failure" = "xyes" || test "x$enable_all" == "xyes"], [ -+ AC_DEFINE(HAVE_MEMORY_FAILURE,1,"have memory failure events collect") -+ AC_SUBST([WITH_MEMORY_FAILURE]) -+]) -+AM_CONDITIONAL([WITH_MEMORY_FAILURE], [test x$enable_memory_failure = xyes || test x$enable_all == xyes]) -+AM_COND_IF([WITH_MEMORY_FAILURE], [USE_MEMORY_FAILURE="yes"], [USE_MEMORY_FAILURE="no"]) -+ - AC_ARG_ENABLE([abrt_report], - AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) - -@@ -182,6 +192,7 @@ compile time options summary - ARM events : $USE_ARM - DEVLINK : $USE_DEVLINK - Disk I/O errors : $USE_DISKERROR -+ Memory Failure : $USE_MEMORY_FAILURE - Memory CE PFA : $USE_MEMORY_CE_PFA - CPU fault isolation : $USE_CPU_FAULT_ISOLATION - EOF -diff --git a/ras-events.c b/ras-events.c -index 31c4170..92ae2c8 100644 ---- a/ras-events.c -+++ b/ras-events.c -@@ -37,6 +37,7 @@ - #include "ras-extlog-handler.h" - #include "ras-devlink-handler.h" - #include "ras-diskerror-handler.h" -+#include "ras-memory-failure-handler.h" - #include "ras-record.h" - #include "ras-logger.h" - #include "ras-page-isolation.h" -@@ -256,6 +257,10 @@ int toggle_ras_mc_event(int enable) - rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable); - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); -+#endif -+ - free_ras: - free(ras); - return rc; -@@ -938,6 +943,16 @@ int handle_ras_events(int record_events) - } - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", -+ ras_memory_failure_event_handler, NULL, MF_EVENT); -+ if (!rc) -+ num_events++; -+ else -+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", -+ "ras", "memory_failure_event"); -+#endif -+ - if (!num_events) { - log(ALL, LOG_INFO, - "Failed to trace all supported RAS events. Aborting.\n"); -diff --git a/ras-events.h b/ras-events.h -index f028741..dfd690c 100644 ---- a/ras-events.h -+++ b/ras-events.h -@@ -38,6 +38,7 @@ enum { - EXTLOG_EVENT, - DEVLINK_EVENT, - DISKERROR_EVENT, -+ MF_EVENT, - NR_EVENTS - }; - -diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c -new file mode 100644 -index 0000000..9941e68 ---- /dev/null -+++ b/ras-memory-failure-handler.c -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+#include "libtrace/kbuffer.h" -+#include "ras-memory-failure-handler.h" -+#include "ras-record.h" -+#include "ras-logger.h" -+#include "ras-report.h" -+ -+/* Memory failure - various types of pages */ -+enum mf_action_page_type { -+ MF_MSG_KERNEL, -+ MF_MSG_KERNEL_HIGH_ORDER, -+ MF_MSG_SLAB, -+ MF_MSG_DIFFERENT_COMPOUND, -+ MF_MSG_POISONED_HUGE, -+ MF_MSG_HUGE, -+ MF_MSG_FREE_HUGE, -+ MF_MSG_NON_PMD_HUGE, -+ MF_MSG_UNMAP_FAILED, -+ MF_MSG_DIRTY_SWAPCACHE, -+ MF_MSG_CLEAN_SWAPCACHE, -+ MF_MSG_DIRTY_MLOCKED_LRU, -+ MF_MSG_CLEAN_MLOCKED_LRU, -+ MF_MSG_DIRTY_UNEVICTABLE_LRU, -+ MF_MSG_CLEAN_UNEVICTABLE_LRU, -+ MF_MSG_DIRTY_LRU, -+ MF_MSG_CLEAN_LRU, -+ MF_MSG_TRUNCATED_LRU, -+ MF_MSG_BUDDY, -+ MF_MSG_BUDDY_2ND, -+ MF_MSG_DAX, -+ MF_MSG_UNSPLIT_THP, -+ MF_MSG_UNKNOWN, -+}; -+ -+/* Action results for various types of pages */ -+enum mf_action_result { -+ MF_IGNORED, /* Error: cannot be handled */ -+ MF_FAILED, /* Error: handling failed */ -+ MF_DELAYED, /* Will be handled later */ -+ MF_RECOVERED, /* Successfully recovered */ -+}; -+ -+/* memory failure page types */ -+static const struct { -+ int type; -+ const char *page_type; -+} mf_page_type[] = { -+ { MF_MSG_KERNEL, "reserved kernel page" }, -+ { MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"}, -+ { MF_MSG_SLAB, "kernel slab page"}, -+ { MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"}, -+ { MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"}, -+ { MF_MSG_HUGE, "huge page"}, -+ { MF_MSG_FREE_HUGE, "free huge page"}, -+ { MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"}, -+ { MF_MSG_UNMAP_FAILED, "unmapping failed page"}, -+ { MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"}, -+ { MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"}, -+ { MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page"}, -+ { MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page"}, -+ { MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page"}, -+ { MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page"}, -+ { MF_MSG_DIRTY_LRU, "dirty LRU page"}, -+ { MF_MSG_CLEAN_LRU, "clean LRU page"}, -+ { MF_MSG_TRUNCATED_LRU, "already truncated LRU page"}, -+ { MF_MSG_BUDDY, "free buddy page"}, -+ { MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"}, -+ { MF_MSG_DAX, "dax page"}, -+ { MF_MSG_UNSPLIT_THP, "unsplit thp"}, -+ { MF_MSG_UNKNOWN, "unknown page"}, -+}; -+ -+/* memory failure action results */ -+static const struct { -+ int result; -+ const char *action_result; -+} mf_action_result[] = { -+ { MF_IGNORED, "Ignored" }, -+ { MF_FAILED, "Failed" }, -+ { MF_DELAYED, "Delayed" }, -+ { MF_RECOVERED, "Recovered" }, -+}; -+ -+static const char *get_page_type(int page_type) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mf_page_type); i++) -+ if (mf_page_type[i].type == page_type) -+ return mf_page_type[i].page_type; -+ -+ return "unknown page"; -+} -+ -+static const char *get_action_result(int result) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mf_action_result); i++) -+ if (mf_action_result[i].result == result) -+ return mf_action_result[i].action_result; -+ -+ return "unknown"; -+} -+ -+ -+int ras_memory_failure_event_handler(struct trace_seq *s, -+ struct pevent_record *record, -+ struct event_format *event, void *context) -+{ -+ unsigned long long val; -+ struct ras_events *ras = context; -+ time_t now; -+ struct tm *tm; -+ struct ras_mf_event ev; -+ -+ /* -+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. -+ * On previous kernels, the way to properly generate an event would -+ * be to inject a fake one, measure its timestamp and diff it against -+ * gettimeofday. We won't do it here. Instead, let's use uptime, -+ * falling-back to the event report's time, if "uptime" clock is -+ * not available (legacy kernels). -+ */ -+ -+ if (ras->use_uptime) -+ now = record->ts/user_hz + ras->uptime_diff; -+ else -+ now = time(NULL); -+ -+ tm = localtime(&now); -+ if (tm) -+ strftime(ev.timestamp, sizeof(ev.timestamp), -+ "%Y-%m-%d %H:%M:%S %z", tm); -+ trace_seq_printf(s, "%s ", ev.timestamp); -+ -+ if (pevent_get_field_val(s, event, "pfn", record, &val, 1) < 0) -+ return -1; -+ sprintf(ev.pfn, "0x%llx", val); -+ trace_seq_printf(s, "pfn=0x%llx ", val); -+ -+ if (pevent_get_field_val(s, event, "type", record, &val, 1) < 0) -+ return -1; -+ ev.page_type = get_page_type(val); -+ trace_seq_printf(s, "page_type=%s ", ev.page_type); -+ -+ if (pevent_get_field_val(s, event, "result", record, &val, 1) < 0) -+ return -1; -+ ev.action_result = get_action_result(val); -+ trace_seq_printf(s, "action_result=%s ", ev.action_result); -+ -+ /* Store data into the SQLite DB */ -+#ifdef HAVE_SQLITE3 -+ ras_store_mf_event(ras, &ev); -+#endif -+ -+#ifdef HAVE_ABRT_REPORT -+ /* Report event to ABRT */ -+ ras_report_mf_event(ras, &ev); -+#endif -+ -+ return 0; -+} -diff --git a/ras-memory-failure-handler.h b/ras-memory-failure-handler.h -new file mode 100644 -index 0000000..b9e9971 ---- /dev/null -+++ b/ras-memory-failure-handler.h -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#ifndef __RAS_MEMORY_FAILURE_HANDLER_H -+#define __RAS_MEMORY_FAILURE_HANDLER_H -+ -+#include "ras-events.h" -+#include "libtrace/event-parse.h" -+ -+int ras_memory_failure_event_handler(struct trace_seq *s, -+ struct pevent_record *record, -+ struct event_format *event, void *context); -+ -+#endif -diff --git a/ras-record.c b/ras-record.c -index 33d4741..27863c7 100644 ---- a/ras-record.c -+++ b/ras-record.c -@@ -506,6 +506,56 @@ int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev - } - #endif - -+/* -+ * Table and functions to handle ras:memory_failure -+ */ -+ -+#ifdef HAVE_MEMORY_FAILURE -+static const struct db_fields mf_event_fields[] = { -+ { .name="id", .type="INTEGER PRIMARY KEY" }, -+ { .name="timestamp", .type="TEXT" }, -+ { .name="pfn", .type="TEXT" }, -+ { .name="page_type", .type="TEXT" }, -+ { .name="action_result", .type="TEXT" }, -+}; -+ -+static const struct db_table_descriptor mf_event_tab = { -+ .name = "memory_failure_event", -+ .fields = mf_event_fields, -+ .num_fields = ARRAY_SIZE(mf_event_fields), -+}; -+ -+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) -+{ -+ int rc; -+ struct sqlite3_priv *priv = ras->db_priv; -+ -+ if (!priv || !priv->stmt_mf_event) -+ return 0; -+ log(TERM, LOG_INFO, "memory_failure_event store: %p\n", priv->stmt_mf_event); -+ -+ sqlite3_bind_text(priv->stmt_mf_event, 1, ev->timestamp, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 2, ev->pfn, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 3, ev->page_type, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 4, ev->action_result, -1, NULL); -+ -+ rc = sqlite3_step(priv->stmt_mf_event); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed to do memory_failure_event step on sqlite: error = %d\n", rc); -+ -+ rc = sqlite3_reset(priv->stmt_mf_event); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed reset memory_failure_event on sqlite: error = %d\n", -+ rc); -+ -+ log(TERM, LOG_INFO, "register inserted at db\n"); -+ -+ return rc; -+} -+#endif -+ - /* - * Generic code - */ -@@ -818,6 +868,16 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) - } - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc = ras_mc_create_table(priv, &mf_event_tab); -+ if (rc == SQLITE_OK) { -+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mf_event, -+ &mf_event_tab); -+ if (rc != SQLITE_OK) -+ goto error; -+ } -+#endif -+ - ras->db_priv = priv; - return 0; - -@@ -920,6 +980,16 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) - } - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ if (priv->stmt_mf_event) { -+ rc = sqlite3_finalize(priv->stmt_mf_event); -+ if (rc != SQLITE_OK) -+ log(TERM, LOG_ERR, -+ "cpu %u: Failed to finalize mf_event sqlite: error = %d\n", -+ cpu, rc); -+ } -+#endif -+ - rc = sqlite3_close_v2(db); - if (rc != SQLITE_OK) - log(TERM, LOG_ERR, -diff --git a/ras-record.h b/ras-record.h -index b453f83..830202f 100644 ---- a/ras-record.h -+++ b/ras-record.h -@@ -103,6 +103,13 @@ struct diskerror_event { - const char *cmd; - }; - -+struct ras_mf_event { -+ char timestamp[64]; -+ char pfn[30]; -+ const char *page_type; -+ const char *action_result; -+}; -+ - struct ras_mc_event; - struct ras_aer_event; - struct ras_extlog_event; -@@ -111,6 +118,7 @@ struct ras_arm_event; - struct mce_event; - struct devlink_event; - struct diskerror_event; -+struct ras_mf_event; - - #ifdef HAVE_SQLITE3 - -@@ -140,6 +148,9 @@ struct sqlite3_priv { - #ifdef HAVE_DISKERROR - sqlite3_stmt *stmt_diskerror_event; - #endif -+#ifdef HAVE_MEMORY_FAILURE -+ sqlite3_stmt *stmt_mf_event; -+#endif - }; - - struct db_fields { -@@ -166,6 +177,7 @@ int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standar - int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); - int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev); - int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); -+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev); - - #else - static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; -@@ -178,6 +190,7 @@ static inline int ras_store_non_standard_record(struct ras_events *ras, struct r - static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; - static inline int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; - static inline int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; -+static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; - - #endif - -diff --git a/ras-report.c b/ras-report.c -index 2710eac..ea3a9b6 100644 ---- a/ras-report.c -+++ b/ras-report.c -@@ -309,6 +309,28 @@ static int set_diskerror_event_backtrace(char *buf, struct diskerror_event *ev) - return 0; - } - -+static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev) -+{ -+ char bt_buf[MAX_BACKTRACE_SIZE]; -+ -+ if (!buf || !ev) -+ return -1; -+ -+ sprintf(bt_buf, "BACKTRACE=" \ -+ "timestamp=%s\n" \ -+ "pfn=%s\n" \ -+ "page_type=%s\n" \ -+ "action_result=%s\n", \ -+ ev->timestamp, \ -+ ev->pfn, \ -+ ev->page_type, \ -+ ev->action_result); -+ -+ strcat(buf, bt_buf); -+ -+ return 0; -+} -+ - static int commit_report_backtrace(int sockfd, int type, void *ev){ - char buf[MAX_BACKTRACE_SIZE]; - char *pbuf = buf; -@@ -343,6 +365,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ - case DISKERROR_EVENT: - rc = set_diskerror_event_backtrace(buf, (struct diskerror_event *)ev); - break; -+ case MF_EVENT: -+ rc = set_mf_event_backtrace(buf, (struct ras_mf_event *)ev); -+ break; - default: - return -1; - } -@@ -708,3 +733,46 @@ diskerror_fail: - return -1; - } - } -+ -+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) -+{ -+ char buf[MAX_MESSAGE_SIZE]; -+ int sockfd = 0; -+ int done = 0; -+ int rc = -1; -+ -+ memset(buf, 0, sizeof(buf)); -+ -+ sockfd = setup_report_socket(); -+ if (sockfd < 0) -+ return -1; -+ -+ rc = commit_report_basic(sockfd); -+ if (rc < 0) -+ goto mf_fail; -+ -+ rc = commit_report_backtrace(sockfd, MF_EVENT, ev); -+ if (rc < 0) -+ goto mf_fail; -+ -+ sprintf(buf, "ANALYZER=%s", "rasdaemon-memory_failure"); -+ rc = write(sockfd, buf, strlen(buf) + 1); -+ if (rc < strlen(buf) + 1) -+ goto mf_fail; -+ -+ sprintf(buf, "REASON=%s", "memory failure problem"); -+ rc = write(sockfd, buf, strlen(buf) + 1); -+ if (rc < strlen(buf) + 1) -+ goto mf_fail; -+ -+ done = 1; -+ -+mf_fail: -+ if (sockfd > 0) -+ close(sockfd); -+ -+ if (done) -+ return 0; -+ else -+ return -1; -+} -diff --git a/ras-report.h b/ras-report.h -index 1d911de..e605eb1 100644 ---- a/ras-report.h -+++ b/ras-report.h -@@ -38,6 +38,7 @@ int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standar - int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); - int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev); - int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); -+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev); - - #else - -@@ -48,6 +49,7 @@ static inline int ras_report_non_standard_event(struct ras_events *ras, struct r - static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; - static inline int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; - static inline int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; -+static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; - - #endif - --- -2.33.0 - diff --git a/bugfix-fix-disk-error-log-storm.patch b/bugfix-fix-disk-error-log-storm.patch index 8241cfcb7e5199ab323c9c478b4e671e3283da13..5df02999047c4418daf3afa89c34dcba46679996 100644 --- a/bugfix-fix-disk-error-log-storm.patch +++ b/bugfix-fix-disk-error-log-storm.patch @@ -15,7 +15,7 @@ index e73a08a..04a0489 100644 @@ -4,7 +4,7 @@ After=syslog.target [Service] - EnvironmentFile=/etc/sysconfig/rasdaemon + EnvironmentFile=@SYSCONFDEFDIR@/rasdaemon -ExecStart=@sbindir@/rasdaemon -f -r +ExecStart=@sbindir@/rasdaemon -f ExecStartPost=@sbindir@/rasdaemon --enable diff --git a/bugfix-ras-events-memory-leak.patch b/bugfix-ras-events-memory-leak.patch deleted file mode 100644 index 977459a356d82fbfd29f4486038a5f9e685b2c43..0000000000000000000000000000000000000000 --- a/bugfix-ras-events-memory-leak.patch +++ /dev/null @@ -1,18 +0,0 @@ -From d59e4d224b3271cf7a7fe53cd7c5d539b58eac32 Mon Sep 17 00:00:00 2001 -From: lvying -Date: Sat, 26 Jan 2019 15:54:17 +0800 -Subject: [PATCH] rasdaemon:fix ras events memory leak - -reason:fix ras events memory leak - -diff -uprN a/ras-events.c b/ras-events.c ---- a/ras-events.c 2018-06-22 14:20:42.880878700 +0800 -+++ b/ras-events.c 2018-06-22 14:38:24.420726900 +0800 -@@ -314,6 +314,7 @@ static void parse_ras_data(struct pthrea - trace_seq_init(&s); - pevent_print_event(pdata->ras->pevent, &s, &record); - trace_seq_do_printf(&s); -+ trace_seq_destroy(&s); - printf("\n"); - fflush(stdout); - } diff --git a/rasdaemon-0.6.6.tar.gz b/rasdaemon-0.6.6.tar.gz deleted file mode 100644 index ea4552e542487c2c4d2e870b222aca8097d8df7c..0000000000000000000000000000000000000000 Binary files a/rasdaemon-0.6.6.tar.gz and /dev/null differ diff --git a/rasdaemon-0.6.7.tar.gz b/rasdaemon-0.6.7.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..553577b805824808efe0b1649b8a328960ad7dbd Binary files /dev/null and b/rasdaemon-0.6.7.tar.gz differ diff --git a/rasdaemon.spec b/rasdaemon.spec index 66bf9e35c0f21b250c395843dce3ab9def4d836f..0a21c3f81bda134a7c28a61a808d509b6fcc496a 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon -Version: 0.6.6 -Release: 10 +Version: 0.6.7 +Release: 1 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -19,25 +19,17 @@ Requires(post): systemd Requires(preun): systemd Requires(postun): systemd -Patch1: bugfix-ras-events-memory-leak.patch -Patch2: bugfix-rasdaemon-wait-for-file-access.patch -Patch3: bugfix-fix-fd-check.patch -Patch4: backport-0001-ras-page-isolation-do_page_offline-always-considers-.patch -Patch5: backport-0002-ras-page-isolation-page-which-is-PAGE_OFFLINE_FAILED.patch -Patch6: backport-rasdaemon-Fix-error-print.patch -Patch7: bugfix-fix-disk-error-log-storm.patch -Patch8: backport-0001-rasdaemon-delete-the-duplicate-code-about-the-defini.patch -Patch9: backport-0002-rasdaemon-delete-the-code-of-non-standard-error-deco.patch -Patch10: backport-0003-rasdaemon-add-support-for-hisilicon-common-section-d.patch -Patch11: backport-0001-rasdaemon-Modify-non-standard-error-decoding-interfa.patch -Patch12: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch -Patch13: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch -Patch14: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch -Patch15: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch -Patch16: 0006-add-cpu-online-fault-isolation.patch -Patch17: 0007-add-trace-print-and-add-sqlite-store.patch -Patch18: 0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch -Patch19: backport-rasdaemon-add-support-for-memory_failure-events.patch +Patch1: bugfix-rasdaemon-wait-for-file-access.patch +Patch2: bugfix-fix-fd-check.patch +Patch3: bugfix-fix-disk-error-log-storm.patch +Patch4: backport-configure.ac-fix-SYSCONFDEFDIR-default-value.patch +Patch5: 0001-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch +Patch6: 0002-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch +Patch7: 0003-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch +Patch8: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch +Patch9: 0006-add-cpu-online-fault-isolation.patch +Patch10: 0007-add-trace-print-and-add-sqlite-store.patch +Patch11: 0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch %description The rasdaemon program is a daemon which monitors the platform @@ -76,7 +68,6 @@ rm INSTALL %{buildroot}/usr/include/*.h %{_sbindir}/ras-mc-ctl %{_mandir}/*/* %{_unitdir}/*.service -%{_sharedstatedir}/rasdaemon %{_sysconfdir}/ras/dimm_labels.d %config(noreplace) %{_sysconfdir}/sysconfig/%{name} @@ -84,14 +75,17 @@ rm INSTALL %{buildroot}/usr/include/*.h /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || : %changelog -* Thurs Dec 9 2021 tanxiaofei - 0.6.6-10 +* Mon Jan 17 2022 xujing - 0.6.7-1 +- DESC: Update software to v0.6.7 + +* Thu Dec 9 2021 tanxiaofei - 0.6.6-10 - Type:feature - ID:NA - SUG:NA - DESC: Enable compilation of the feature memory fault prediction based on corrected error. -* Thurs Dec 2 2021 tanxiaofei - 0.6.6-9 +* Thu Dec 2 2021 tanxiaofei - 0.6.6-9 - Type:feature - ID:NA - SUG:NA