diff --git a/0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch b/0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch new file mode 100644 index 0000000000000000000000000000000000000000..68ae3a922339985b35cca6f0c5068b1e717a208d --- /dev/null +++ b/0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch @@ -0,0 +1,103 @@ +From 370ac83b39f09eda0fb8a5cfa40ecfc71846eb0d Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 20 Mar 2024 12:16:05 +0000 +Subject: [PATCH] rasdaemon: Fix for vendor errors are not recorded in the + SQLite database if some cpus are offline + +Fix for vendor errors are not recorded in the SQLite database if some cpus +are offline at the system start. + +Issue: + +This issue is reproducible by offline some cpus, run +./rasdaemon -f --record & and +inject vendor specific error supported in the rasdaemon. + +Reason: + +When the system starts with some of the cpus offline and then run +the rasdaemon, read_ras_event_all_cpus() exit with error and switch to +the multi thread way. However read() in read_ras_event() return error in +threads for each of the offline CPUs and does clean up including calling +ras_ns_finalize_vendor_tables(), which invokes sqlite3_finalize() on vendor +tables created. Thus the vendor error data does not stored in the SQLite +database when such error is reported next time. + +Solution: + +In ras_ns_add_vendor_tables() and ras_ns_finalize_vendor_tables() use +reference count and close vendor tables which created in +ras_ns_add_vendor_tables() based on the reference count. + +Reported-by: Junhao He +Signed-off-by: Shiju Jose +Signed-off-by: Junhao He +Signed-off-by: Bing Xia +--- + ras-non-standard-handler.c | 16 ++++++++++++++++ + ras-non-standard-handler.h | 1 + + 2 files changed, 17 insertions(+) + +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 20d514b..13e2acf 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -65,6 +65,7 @@ int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) + #endif + if (!ras_ns_ev_dec_list) { + ras_ns_ev_dec_list = ns_ev_decoder; ++ ras_ns_ev_dec_list->ref_count = 0; + } else { + list = ras_ns_ev_dec_list; + while (list->next) +@@ -85,6 +86,8 @@ int ras_ns_add_vendor_tables(struct ras_events *ras) + return -1; + + ns_ev_decoder = ras_ns_ev_dec_list; ++ if (ras_ns_ev_dec_list) ++ ras_ns_ev_dec_list->ref_count++; + while (ns_ev_decoder) { + if (ns_ev_decoder->add_table && !ns_ev_decoder->stmt_dec_record) { + error = ns_ev_decoder->add_table(ras, ns_ev_decoder); +@@ -127,6 +130,16 @@ void ras_ns_finalize_vendor_tables(void) + #ifdef HAVE_SQLITE3 + struct ras_ns_ev_decoder *ns_ev_decoder = ras_ns_ev_dec_list; + ++ if (!ras_ns_ev_dec_list) ++ return; ++ ++ if (ras_ns_ev_dec_list->ref_count > 0) ++ ras_ns_ev_dec_list->ref_count--; ++ else ++ return; ++ if (ras_ns_ev_dec_list->ref_count > 0) ++ return; ++ + while (ns_ev_decoder) { + if (ns_ev_decoder->stmt_dec_record) { + ras_mc_finalize_vendor_table(ns_ev_decoder->stmt_dec_record); +@@ -140,6 +153,9 @@ void ras_ns_finalize_vendor_tables(void) + static void unregister_ns_ev_decoder(void) + { + #ifdef HAVE_SQLITE3 ++ if (!ras_ns_ev_dec_list) ++ return; ++ ras_ns_ev_dec_list->ref_count = 1; + ras_ns_finalize_vendor_tables(); + #endif + ras_ns_ev_dec_list = NULL; +diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h +index 341206a..2777584 100644 +--- a/ras-non-standard-handler.h ++++ b/ras-non-standard-handler.h +@@ -22,6 +22,7 @@ + + struct ras_ns_ev_decoder { + struct ras_ns_ev_decoder *next; ++ uint16_t ref_count; + const char *sec_type; + int (*add_table)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder); + int (*decode)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder, +-- +2.30.0 + diff --git a/rasdaemon.spec b/rasdaemon.spec index c760347e8bcf7f10fe14c006ff8e6c8a714e44c0..b27a08d8e34565d3deeaeffafdb915b84e4673c5 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.8.0 -Release: 3 +Release: 4 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -32,6 +32,7 @@ Patch9005: 0002-rasdaemon-fix-issue-of-signed-and-unsigned-integer-c.patch Patch9006: 0003-rasdaemon-Add-support-for-creating-the-vendor-error-.patch Patch9007: backport-Check-CPUs-online-not-configured.patch Patch9008: backport-rasdaemon-diskerror-fix-incomplete-diskerror-log.patch +Patch9009: 0001-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch %description The rasdaemon program is a daemon which monitors the platform @@ -83,6 +84,13 @@ fi /usr/bin/systemctl disable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Fri Mar 29 2024 Bing Xia - 0.8.0-4 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:Fix for vendor errors are not recorded in the SQLite database if some cpus + are offline at the system start. + * Wed Mar 27 2024 zhuofeng - 0.8.0-3 - Type:bugfix - ID:NA