From 4cdf0a2c6bc2969e9129d360f101b7801bc7bdec Mon Sep 17 00:00:00 2001 From: Bing Xia Date: Fri, 29 Mar 2024 18:18:25 +0800 Subject: [PATCH] rasdaemon: Fix for vendor errors are not recorded in the SQLite database if some cpus are offline Fix for vendor errors are not recorded in the SQLite database if some cpus are offline at the system start. Signed-off-by: Bing Xia --- ...r-vendor-errors-are-not-recorded-in-.patch | 103 ++++++++++++++++++ rasdaemon.spec | 10 +- 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch diff --git a/0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch b/0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch new file mode 100644 index 0000000..68ae3a9 --- /dev/null +++ b/0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch @@ -0,0 +1,103 @@ +From 370ac83b39f09eda0fb8a5cfa40ecfc71846eb0d Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 20 Mar 2024 12:16:05 +0000 +Subject: [PATCH] rasdaemon: Fix for vendor errors are not recorded in the + SQLite database if some cpus are offline + +Fix for vendor errors are not recorded in the SQLite database if some cpus +are offline at the system start. + +Issue: + +This issue is reproducible by offline some cpus, run +./rasdaemon -f --record & and +inject vendor specific error supported in the rasdaemon. + +Reason: + +When the system starts with some of the cpus offline and then run +the rasdaemon, read_ras_event_all_cpus() exit with error and switch to +the multi thread way. However read() in read_ras_event() return error in +threads for each of the offline CPUs and does clean up including calling +ras_ns_finalize_vendor_tables(), which invokes sqlite3_finalize() on vendor +tables created. Thus the vendor error data does not stored in the SQLite +database when such error is reported next time. + +Solution: + +In ras_ns_add_vendor_tables() and ras_ns_finalize_vendor_tables() use +reference count and close vendor tables which created in +ras_ns_add_vendor_tables() based on the reference count. + +Reported-by: Junhao He +Signed-off-by: Shiju Jose +Signed-off-by: Junhao He +Signed-off-by: Bing Xia +--- + ras-non-standard-handler.c | 16 ++++++++++++++++ + ras-non-standard-handler.h | 1 + + 2 files changed, 17 insertions(+) + +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 20d514b..13e2acf 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -65,6 +65,7 @@ int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) + #endif + if (!ras_ns_ev_dec_list) { + ras_ns_ev_dec_list = ns_ev_decoder; ++ ras_ns_ev_dec_list->ref_count = 0; + } else { + list = ras_ns_ev_dec_list; + while (list->next) +@@ -85,6 +86,8 @@ int ras_ns_add_vendor_tables(struct ras_events *ras) + return -1; + + ns_ev_decoder = ras_ns_ev_dec_list; ++ if (ras_ns_ev_dec_list) ++ ras_ns_ev_dec_list->ref_count++; + while (ns_ev_decoder) { + if (ns_ev_decoder->add_table && !ns_ev_decoder->stmt_dec_record) { + error = ns_ev_decoder->add_table(ras, ns_ev_decoder); +@@ -127,6 +130,16 @@ void ras_ns_finalize_vendor_tables(void) + #ifdef HAVE_SQLITE3 + struct ras_ns_ev_decoder *ns_ev_decoder = ras_ns_ev_dec_list; + ++ if (!ras_ns_ev_dec_list) ++ return; ++ ++ if (ras_ns_ev_dec_list->ref_count > 0) ++ ras_ns_ev_dec_list->ref_count--; ++ else ++ return; ++ if (ras_ns_ev_dec_list->ref_count > 0) ++ return; ++ + while (ns_ev_decoder) { + if (ns_ev_decoder->stmt_dec_record) { + ras_mc_finalize_vendor_table(ns_ev_decoder->stmt_dec_record); +@@ -140,6 +153,9 @@ void ras_ns_finalize_vendor_tables(void) + static void unregister_ns_ev_decoder(void) + { + #ifdef HAVE_SQLITE3 ++ if (!ras_ns_ev_dec_list) ++ return; ++ ras_ns_ev_dec_list->ref_count = 1; + ras_ns_finalize_vendor_tables(); + #endif + ras_ns_ev_dec_list = NULL; +diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h +index 341206a..2777584 100644 +--- a/ras-non-standard-handler.h ++++ b/ras-non-standard-handler.h +@@ -22,6 +22,7 @@ + + struct ras_ns_ev_decoder { + struct ras_ns_ev_decoder *next; ++ uint16_t ref_count; + const char *sec_type; + int (*add_table)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder); + int (*decode)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder, +-- +2.30.0 + diff --git a/rasdaemon.spec b/rasdaemon.spec index d109fe2..04e1376 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 19 +Release: 20 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -58,6 +58,7 @@ Patch9007: fix-ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch Patch9008: 0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch Patch9009: add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch Patch9010: fix-rasdaemon-print-loading-config-logs-multiple-times.patch +Patch9011: 0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch %description The rasdaemon program is a daemon which monitors the platform @@ -111,6 +112,13 @@ if [ $1 -eq 0 ] ; then fi %changelog +* Tue Apr 23 2024 Bing Xia - 0.6.7-20 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:Fix for vendor errors are not recorded in the SQLite database if + some cpus are offline at the system start + * Mon Apr 8 2024 caixiaomeng - 0.6.7-19 - Type:bugfix - ID:NA -- Gitee