From 11e2aae6af5f593246f896e880592e7dd624de8f Mon Sep 17 00:00:00 2001 From: zhuofeng <1107893276@qq.com> Date: Thu, 26 Jun 2025 07:58:49 +0000 Subject: [PATCH] backport upstream patches Signed-off-by: zhuofeng <1107893276@qq.com> --- ...Add-dynamic-switch-of-ras-events-sup.patch | 471 ++++++++++++++++++ ...evel-from-err-to-warning-in-get-trac.patch | 97 ++++ ...emon-print-loading-config-logs-multi.patch | 56 +++ ...loop-in-read_ras_event-when-kbuf-dat.patch | 40 ++ rasdaemon.spec | 46 +- 5 files changed, 692 insertions(+), 18 deletions(-) create mode 100644 backport-Add-dynamic-switch-of-ras-events-sup.patch create mode 100644 change-the-log-level-from-err-to-warning-in-get-trac.patch create mode 100644 fix-rasdaemon-print-loading-config-logs-multi.patch create mode 100644 ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch diff --git a/backport-Add-dynamic-switch-of-ras-events-sup.patch b/backport-Add-dynamic-switch-of-ras-events-sup.patch new file mode 100644 index 0000000..9aea07b --- /dev/null +++ b/backport-Add-dynamic-switch-of-ras-events-sup.patch @@ -0,0 +1,471 @@ +From 75df4e8bee2f7c2b7e38561670f601947f349c87 Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Wed, 21 Feb 2024 15:25:11 +0800 +Subject: [PATCH] BACKPORT-Add-Dynamic-Switch + +--- + misc/rasdaemon.env | 5 +- + ras-disabled-events.h | 10 ++ + ras-events.c | 260 +++++++++++++++++++++++++++--------------- + rasdaemon.c | 36 ++++++ + 4 files changed, 216 insertions(+), 95 deletions(-) + create mode 100644 ras-disabled-events.h + +diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env +index d706883..f2ed519 100644 +--- a/misc/rasdaemon.env ++++ b/misc/rasdaemon.env +@@ -77,4 +77,7 @@ CPU_CE_THRESHOLD="18" + CPU_ISOLATION_CYCLE="24h" + + # Prevent excessive isolation from causing an avalanche effect +-CPU_ISOLATION_LIMIT="10" +\ No newline at end of file ++CPU_ISOLATION_LIMIT="10" ++ ++# Disable specified events by config ++DISABLE="block:block_rq_complete" +\ No newline at end of file +diff --git a/ras-disabled-events.h b/ras-disabled-events.h +new file mode 100644 +index 0000000..298a5f3 +--- /dev/null ++++ b/ras-disabled-events.h +@@ -0,0 +1,10 @@ ++#ifndef __RAS_DISABLED_EVENTS_H ++#define __RAS_DISABLED_EVENTS_H ++#define DISABLE "DISABLE" ++#define MAX_DISABLED_TRACEPOINTS_NUM 50 ++#define MAX_DISABLED_TRACEPOINTS_STR_LENGTH 255 ++#define MAX_TRACEPOINTS_STR_LENGTH 50 ++ ++extern char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH]; ++extern int disabled_tracepoints_num; ++#endif +\ No newline at end of file +diff --git a/ras-events.c b/ras-events.c +index fba22c8..899fb74 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -45,6 +45,7 @@ + #include "ras-logger.h" + #include "ras-page-isolation.h" + #include "ras-cpu-isolation.h" ++#include "ras-disabled-events.h" + + /* + * Polling time, if read() doesn't block. Currently, trace_pipe_raw never +@@ -174,6 +175,23 @@ static int get_tracing_dir(struct ras_events *ras) + return 0; + } + ++static bool is_disabled_event(char *group, char *event) { ++ char ras_event_name[MAX_PATH + 1]; ++ ++ snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s", ++ group, event); ++ ++ if (disabled_tracepoints_num == 0) { ++ return false; ++ } ++ for (int i = 0; i < disabled_tracepoints_num; ++i) { ++ if (strcmp(choices_disable[i], ras_event_name) == 0) { ++ return true; ++ } ++ } ++ return false; ++} ++ + /* + * Tracing enable/disable code + */ +@@ -230,42 +248,42 @@ int toggle_ras_mc_event(int enable) + goto free_ras; + } + +- rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable); ++ rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable > 0 ? (is_disabled_event("ras", "mc_event") ? 0 : 1) : enable); + + #ifdef HAVE_AER +- rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable > 0 ? (is_disabled_event("ras", "aer_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_MCE +- rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable); ++ rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable > 0 ? (is_disabled_event("mce", "mce_record") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_EXTLOG +- rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable > 0 ? (is_disabled_event("ras", "extlog_mem_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_NON_STANDARD +- rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable > 0 ? (is_disabled_event("ras", "non_standard_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_ARM +- rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable > 0 ? (is_disabled_event("ras", "arm_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_DEVLINK +- rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable); ++ rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable > 0 ? (is_disabled_event("devlink", "devlink_health_report") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_DISKERROR + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,18,0) +- rc |= __toggle_ras_mc_event(ras, "block", "block_rq_error", enable); ++ rc |= __toggle_ras_mc_event(ras, "block", "block_rq_error", enable > 0 ? (is_disabled_event("block", "block_rq_error") ? 0 : 1) : enable); + #else +- rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable); ++ rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable > 0 ? (is_disabled_event("block", "block_rq_complete") ? 0 : 1) : enable); + #endif + #endif + + #ifdef HAVE_MEMORY_FAILURE +- rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable > 0 ? (is_disabled_event("ras", "memory_failure_event") ? 0 : 1) : enable); + #endif + + free_ras: +@@ -935,43 +953,62 @@ int handle_ras_events(int record_events) + /* FIXME: enable memory isolation unconditionally */ + ras_page_account_init(); + #endif +- +- rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", +- ras_mc_event_handler, NULL, MC_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "ras", "mc_event"); ++ if (is_disabled_event("ras", "mc_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "mc_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", ++ ras_mc_event_handler, NULL, MC_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "ras", "mc_event"); ++ } + + #ifdef HAVE_AER +- rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", +- ras_aer_event_handler, NULL, AER_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "ras", "aer_event"); ++ if (is_disabled_event("ras", "aer_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "aer_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", ++ ras_aer_event_handler, NULL, AER_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "ras", "aer_event"); ++ } + #endif + + #ifdef HAVE_NON_STANDARD +- rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", +- ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "ras", "non_standard_event"); ++ if (is_disabled_event("ras", "non_standard_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "non_standard_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", ++ ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "ras", "non_standard_event"); ++ } + #endif + + #ifdef HAVE_ARM +- rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", +- ras_arm_event_handler, NULL, ARM_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "ras", "arm_event"); ++ if (is_disabled_event("ras", "arm_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "arm_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", ++ ras_arm_event_handler, NULL, ARM_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "ras", "arm_event"); ++ } + #endif + + cpus = get_num_cpus(ras); +@@ -981,83 +1018,118 @@ int handle_ras_events(int record_events) + #endif + + #ifdef HAVE_MCE +- rc = register_mce_handler(ras, cpus); +- if (rc) +- log(ALL, LOG_INFO, "Can't register mce handler\n"); +- if (ras->mce_priv) { +- rc = add_event_handler(ras, pevent, page_size, +- "mce", "mce_record", +- ras_mce_event_handler, NULL, MCE_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "mce", "mce_record"); ++ if (is_disabled_event("mce", "mce_record")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "mce", "mce_record"); ++ } else { ++ rc = register_mce_handler(ras, cpus); ++ if (rc) ++ log(ALL, LOG_INFO, "Can't register mce handler\n"); ++ if (ras->mce_priv) { ++ rc = add_event_handler(ras, pevent, page_size, ++ "mce", "mce_record", ++ ras_mce_event_handler, NULL, MCE_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "mce", "mce_record"); ++ } + } + #endif + + #ifdef HAVE_EXTLOG +- rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", +- ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT); +- if (!rc) { +- /* tell kernel we are listening, so don't printk to console */ +- (void)open("/sys/kernel/debug/ras/daemon_active", 0); +- num_events++; +- } else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "ras", "extlog_mem_event"); ++ if (is_disabled_event("ras", "extlog_mem_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "extlog_mem_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", ++ ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT); ++ if (!rc) { ++ /* tell kernel we are listening, so don't printk to console */ ++ (void)open("/sys/kernel/debug/ras/daemon_active", 0); ++ num_events++; ++ } else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "ras", "extlog_mem_event"); ++ } + #endif + + #ifdef HAVE_DEVLINK +- rc = add_event_handler(ras, pevent, page_size, "net", +- "net_dev_xmit_timeout", +- ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT); +- if (!rc) +- filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'"; +- +- rc = add_event_handler(ras, pevent, page_size, "devlink", +- "devlink_health_report", +- ras_devlink_event_handler, filter_str, DEVLINK_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "devlink", "devlink_health_report"); ++ if (is_disabled_event("net", "net_dev_xmit_timeout")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "net", "net_dev_xmit_timeout"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "net", ++ "net_dev_xmit_timeout", ++ ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT); ++ if (!rc) ++ filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'"; ++ ++ if (is_disabled_event("devlink", "devlink_health_report")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "devlink", "devlink_health_report"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "devlink", ++ "devlink_health_report", ++ ras_devlink_event_handler, filter_str, DEVLINK_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "devlink", "devlink_health_report"); ++ } ++ } + #endif + + #ifdef HAVE_DISKERROR + #if LINUX_VERSION_CODE >= KERNEL_VERSION(5,18,0) +- rc = add_event_handler(ras, pevent, page_size, "block", +- "block_rq_error", ras_diskerror_event_handler, +- NULL, DISKERROR_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "block", "block_rq_error"); +-#else +- rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0"); +- if (!rc) { ++ if (is_disabled_event("block", "block_rq_error")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "block", "block_rq_error"); ++ } else { + rc = add_event_handler(ras, pevent, page_size, "block", +- "block_rq_complete", ras_diskerror_event_handler, ++ "block_rq_error", ras_diskerror_event_handler, + NULL, DISKERROR_EVENT); + if (!rc) + num_events++; + else + log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "block", "block_rq_complete"); ++ "block", "block_rq_error"); ++ } ++#else ++ if (is_disabled_event("block", "block_rq_complete")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "block", "block_rq_complete"); ++ } else { ++ rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0"); ++ if (!rc) { ++ rc = add_event_handler(ras, pevent, page_size, "block", ++ "block_rq_complete", ras_diskerror_event_handler, ++ NULL, DISKERROR_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "block", "block_rq_complete"); ++ } + } + #endif + #endif + + #ifdef HAVE_MEMORY_FAILURE +- rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", +- ras_memory_failure_event_handler, NULL, MF_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", +- "ras", "memory_failure_event"); ++ if (is_disabled_event("ras", "memory_failure_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "memory_failure_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", ++ ras_memory_failure_event_handler, NULL, MF_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", ++ "ras", "memory_failure_event"); ++ } + #endif + + if (!num_events) { +diff --git a/rasdaemon.c b/rasdaemon.c +index 66f4dea..0437662 100644 +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -25,6 +25,7 @@ + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-events.h" ++#include "ras-disabled-events.h" + + /* + * Arguments(argp) handling logic and main +@@ -34,6 +35,9 @@ + #define TOOL_DESCRIPTION "RAS daemon to log the RAS events." + #define ARGS_DOC "" + ++char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH]; ++int disabled_tracepoints_num; ++ + const char *argp_program_version = TOOL_NAME " " VERSION; + const char *argp_program_bug_address = "Mauro Carvalho Chehab "; + +@@ -43,6 +47,36 @@ struct arguments { + int foreground; + }; + ++static void parse_disabled_choices() { ++ char disabled_tracepoints_str[MAX_DISABLED_TRACEPOINTS_STR_LENGTH]; ++ const char* sep = ";"; ++ char* tracepoint_str; ++ char* config_disabled_tracepoints = getenv(DISABLE); ++ if (config_disabled_tracepoints == NULL) { ++ return; ++ } ++ ++ if (strlen(config_disabled_tracepoints) >= MAX_DISABLED_TRACEPOINTS_STR_LENGTH) { ++ log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); ++ return; ++ } ++ strcpy(disabled_tracepoints_str, config_disabled_tracepoints); ++ ++ tracepoint_str = strtok(disabled_tracepoints_str, sep); ++ int index = 0; ++ ++ while(tracepoint_str != NULL && index < MAX_DISABLED_TRACEPOINTS_NUM) { ++ if (strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { ++ log(ALL, LOG_WARNING, "Failed to read disabled events config item %s string, length exceeds %d characters, skipped.\n", tracepoint_str, MAX_TRACEPOINTS_STR_LENGTH); ++ } ++ else { ++ strcpy(choices_disable[index++], tracepoint_str); ++ } ++ tracepoint_str = strtok(NULL, sep); ++ } ++ disabled_tracepoints_num = index; ++} ++ + static error_t parse_opt(int k, char *arg, struct argp_state *state) + { + struct arguments *args = state->input; +@@ -102,6 +136,8 @@ int main(int argc, char *argv[]) + return -1; + } + ++ parse_disabled_choices(); ++ + if (args.enable_ras) { + int enable; + +-- +2.39.1 + diff --git a/change-the-log-level-from-err-to-warning-in-get-trac.patch b/change-the-log-level-from-err-to-warning-in-get-trac.patch new file mode 100644 index 0000000..67cd8c3 --- /dev/null +++ b/change-the-log-level-from-err-to-warning-in-get-trac.patch @@ -0,0 +1,97 @@ +From 6062e5e726f520de36a97a21fce1ad6cd815e30a Mon Sep 17 00:00:00 2001 +From: zhuofeng +Date: Thu, 14 Dec 2023 14:40:00 +0800 +Subject: [PATCH 6/8] change the log level from err to warning in get trace + +--- + ras-events.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index ba36f8b..fba22c8 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -941,7 +941,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "ras", "mc_event"); + + #ifdef HAVE_AER +@@ -950,7 +950,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "ras", "aer_event"); + #endif + +@@ -960,7 +960,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "ras", "non_standard_event"); + #endif + +@@ -970,7 +970,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "ras", "arm_event"); + #endif + +@@ -991,7 +991,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "mce", "mce_record"); + } + #endif +@@ -1004,7 +1004,7 @@ int handle_ras_events(int record_events) + (void)open("/sys/kernel/debug/ras/daemon_active", 0); + num_events++; + } else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "ras", "extlog_mem_event"); + #endif + +@@ -1021,7 +1021,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "devlink", "devlink_health_report"); + #endif + +@@ -1044,7 +1044,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "block", "block_rq_complete"); + } + #endif +@@ -1056,7 +1056,7 @@ int handle_ras_events(int record_events) + if (!rc) + num_events++; + else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ log(ALL, LOG_WARNING, "Can't get traces from %s:%s\n", + "ras", "memory_failure_event"); + #endif + +-- +2.39.1 + diff --git a/fix-rasdaemon-print-loading-config-logs-multi.patch b/fix-rasdaemon-print-loading-config-logs-multi.patch new file mode 100644 index 0000000..10c5c1f --- /dev/null +++ b/fix-rasdaemon-print-loading-config-logs-multi.patch @@ -0,0 +1,56 @@ +From 39854de48dee528bffc914594411841c7462c626 Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Wed, 6 Mar 2024 14:21:41 +0800 +Subject: [PATCH 8/8] huawei-fix-rasdaemon-print-loading-config-logs-multi + +--- + rasdaemon.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/rasdaemon.c b/rasdaemon.c +index 0437662..7ece6c1 100644 +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -47,7 +47,7 @@ struct arguments { + int foreground; + }; + +-static void parse_disabled_choices() { ++static void parse_disabled_choices(int enable_ras) { + char disabled_tracepoints_str[MAX_DISABLED_TRACEPOINTS_STR_LENGTH]; + const char* sep = ";"; + char* tracepoint_str; +@@ -57,16 +57,18 @@ static void parse_disabled_choices() { + } + + if (strlen(config_disabled_tracepoints) >= MAX_DISABLED_TRACEPOINTS_STR_LENGTH) { +- log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); ++ if (enable_ras) { ++ log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); ++ } + return; + } + strcpy(disabled_tracepoints_str, config_disabled_tracepoints); +- ++ + tracepoint_str = strtok(disabled_tracepoints_str, sep); + int index = 0; + + while(tracepoint_str != NULL && index < MAX_DISABLED_TRACEPOINTS_NUM) { +- if (strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { ++ if (enable_ras && strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { + log(ALL, LOG_WARNING, "Failed to read disabled events config item %s string, length exceeds %d characters, skipped.\n", tracepoint_str, MAX_TRACEPOINTS_STR_LENGTH); + } + else { +@@ -136,7 +138,7 @@ int main(int argc, char *argv[]) + return -1; + } + +- parse_disabled_choices(); ++ parse_disabled_choices(args.enable_ras); + + if (args.enable_ras) { + int enable; +-- +2.39.1 + diff --git a/ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch b/ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch new file mode 100644 index 0000000..305efa1 --- /dev/null +++ b/ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch @@ -0,0 +1,40 @@ +From 2301bfbe7e589719eb9e44e3a83a7870b759eb71 Mon Sep 17 00:00:00 2001 +From: hubin +Date: Thu, 18 May 2023 16:14:41 +0800 +Subject: [PATCH 4/8] ras-events: quit loop in read_ras_event when kbuf data is + broken + +when kbuf data is broken, kbuffer_next_event() may move kbuf->index back to +the current kbuf->index position, causing dead loop. + +In this situation, rasdaemon will repeatedly parse an invalid event, and +print warning like "ug! negative record size -8!", pushing cpu utilization +rate to 100%. + +when kbuf data is broken, discard current page and continue reading next page +kbuf. + +Signed-off-by: hubin +--- + ras-events.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/ras-events.c b/ras-events.c +index adaa6e9..cbe0675 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -549,6 +549,11 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + kbuffer_load_subbuffer(kbuf, page); + + while ((data = kbuffer_read_event(kbuf, &time_stamp))) { ++ if (kbuffer_curr_size(kbuf) < 0) { ++ log(TERM, LOG_ERR, "invalid kbuf data, discard\n"); ++ break; ++ } ++ + parse_ras_data(&pdata[i], + kbuf, data, time_stamp); + +-- +2.39.1 + diff --git a/rasdaemon.spec b/rasdaemon.spec index f5053ad..91b846e 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.8.0 -Release: 8 +Release: 9 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -23,23 +23,27 @@ Requires(postun): systemd Patch0: backport-Fix-potential-overflow-with-some-arrays-at-page-isol.patch Patch1: fix-ras-mc-ctl.service-startup-failed-when-selinux-is-no.patch -Patch9000: bugfix-rasdaemon-wait-for-file-access.patch -Patch9001: bugfix-fix-fd-check.patch -Patch9002: bugfix-fix-disk-error-log-storm.patch -Patch9003: 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch -Patch9004: 0001-rasdaemon-fix-return-value-type-issue-of-read-write-.patch -Patch9005: 0002-rasdaemon-fix-issue-of-signed-and-unsigned-integer-c.patch -Patch9006: 0003-rasdaemon-Add-support-for-creating-the-vendor-error-.patch -Patch9007: backport-Check-CPUs-online-not-configured.patch -Patch9008: backport-rasdaemon-diskerror-fix-incomplete-diskerror-log.patch -Patch9009: bugfix-fix-cpu-isolate-errors-when-some-cpus-are-.patch -Patch9010: rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch -Patch9011: bugfix-set-to-default-when-param-is-overflow.patch -Patch9012: fix-ras-mc-ctl-summary-failed.patch -Patch9013: 0001-rasdaemon-Fix-some-compilation-alarms-in-ras-record..patch -Patch9014: 0002-rasdaemon-Fix-few-compilation-warnings-in-non-standa.patch -Patch9015: 0003-rasdaemon-Fix-some-static-check-warning.patch -Patch9016: 0004-rasdaemon-Add-new-modules-supported-by-HiSilicon-com.patch +Patch2: bugfix-rasdaemon-wait-for-file-access.patch +Patch3: bugfix-fix-fd-check.patch +Patch4: bugfix-fix-disk-error-log-storm.patch +Patch5: 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch +Patch6: 0001-rasdaemon-fix-return-value-type-issue-of-read-write-.patch +Patch7: 0002-rasdaemon-fix-issue-of-signed-and-unsigned-integer-c.patch +Patch8: 0003-rasdaemon-Add-support-for-creating-the-vendor-error-.patch +Patch9: backport-Check-CPUs-online-not-configured.patch +Patch10: backport-rasdaemon-diskerror-fix-incomplete-diskerror-log.patch +Patch11: bugfix-fix-cpu-isolate-errors-when-some-cpus-are-.patch +Patch12: rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch +Patch13: bugfix-set-to-default-when-param-is-overflow.patch +Patch14: fix-ras-mc-ctl-summary-failed.patch +Patch15: 0001-rasdaemon-Fix-some-compilation-alarms-in-ras-record..patch +Patch16: 0002-rasdaemon-Fix-few-compilation-warnings-in-non-standa.patch +Patch17: 0003-rasdaemon-Fix-some-static-check-warning.patch +Patch18: 0004-rasdaemon-Add-new-modules-supported-by-HiSilicon-com.patch +Patch19: ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch +Patch20: change-the-log-level-from-err-to-warning-in-get-trac.patch +Patch21: backport-Add-dynamic-switch-of-ras-events-sup.patch +Patch22: fix-rasdaemon-print-loading-config-logs-multi.patch %description The rasdaemon program is a daemon which monitors the platform @@ -91,6 +95,12 @@ fi /usr/bin/systemctl disable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Thu Jun 26 2025 zhuo <1107893276@qq.com> - 0.8.0-9 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:backport upstream patches + * Wed Apr 30 2025 wangjiang - 0.8.0-8 - Type:bugfix - ID:NA -- Gitee