diff --git a/add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch b/add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch new file mode 100644 index 0000000000000000000000000000000000000000..09a95807fbd974b3f63950a0dd4ff132226c4a2e --- /dev/null +++ b/add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch @@ -0,0 +1,450 @@ +From b26f624fbe12203b12b65e0674fea60c70e48a21 Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Wed, 21 Feb 2024 15:25:11 +0800 +Subject: [PATCH] BACKPORT-Add-Dynamic-Switch + +--- + misc/rasdaemon.env | 5 +- + ras-disabled-events.h | 10 ++ + ras-events.c | 247 +++++++++++++++++++++++++++--------------- + rasdaemon.c | 36 ++++++ + 4 files changed, 208 insertions(+), 90 deletions(-) + create mode 100644 ras-disabled-events.h + +diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env +index dc40af8..6780eb0 100644 +--- a/misc/rasdaemon.env ++++ b/misc/rasdaemon.env +@@ -51,4 +51,7 @@ CPU_CE_THRESHOLD="18" + CPU_ISOLATION_CYCLE="24h" + + # Prevent excessive isolation from causing an avalanche effect +-CPU_ISOLATION_LIMIT="10" +\ No newline at end of file ++CPU_ISOLATION_LIMIT="10" ++ ++# Disable specified events by config ++DISABLE="block:block_rq_complete" +\ No newline at end of file +diff --git a/ras-disabled-events.h b/ras-disabled-events.h +new file mode 100644 +index 0000000..298a5f3 +--- /dev/null ++++ b/ras-disabled-events.h +@@ -0,0 +1,10 @@ ++#ifndef __RAS_DISABLED_EVENTS_H ++#define __RAS_DISABLED_EVENTS_H ++#define DISABLE "DISABLE" ++#define MAX_DISABLED_TRACEPOINTS_NUM 50 ++#define MAX_DISABLED_TRACEPOINTS_STR_LENGTH 255 ++#define MAX_TRACEPOINTS_STR_LENGTH 50 ++ ++extern char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH]; ++extern int disabled_tracepoints_num; ++#endif +\ No newline at end of file +diff --git a/ras-events.c b/ras-events.c +index bc7da34..675d020 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -43,6 +43,7 @@ + #include "ras-logger.h" + #include "ras-page-isolation.h" + #include "ras-cpu-isolation.h" ++#include "ras-disabled-events.h" + + /* + * Polling time, if read() doesn't block. Currently, trace_pipe_raw never +@@ -172,6 +173,23 @@ static int get_tracing_dir(struct ras_events *ras) + return 0; + } + ++static bool is_disabled_event(char *group, char *event) { ++ char ras_event_name[MAX_PATH + 1]; ++ ++ snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s", ++ group, event); ++ ++ if (disabled_tracepoints_num == 0) { ++ return false; ++ } ++ for (int i = 0; i < disabled_tracepoints_num; ++i) { ++ if (strcmp(choices_disable[i], ras_event_name) == 0) { ++ return true; ++ } ++ } ++ return false; ++} ++ + /* + * Tracing enable/disable code + */ +@@ -228,40 +246,41 @@ int toggle_ras_mc_event(int enable) + goto free_ras; + } + +- rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable); ++ rc = __toggle_ras_mc_event(ras, "ras", "mc_event", enable > 0 ? (is_disabled_event("ras", "mc_event") ? 0 : 1) : enable); + + #ifdef HAVE_AER +- rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "aer_event", enable > 0 ? (is_disabled_event("ras", "aer_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_MCE +- rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable); ++ rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable > 0 ? (is_disabled_event("mce", "mce_record") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_EXTLOG +- rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable > 0 ? (is_disabled_event("ras", "extlog_mem_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_NON_STANDARD +- rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "non_standard_event", enable > 0 ? (is_disabled_event("ras", "non_standard_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_ARM +- rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable > 0 ? (is_disabled_event("ras", "arm_event") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_DEVLINK +- rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable); ++ rc |= __toggle_ras_mc_event(ras, "devlink", "devlink_health_report", enable > 0 ? (is_disabled_event("devlink", "devlink_health_report") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_DISKERROR +- rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable); ++ rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable > 0 ? (is_disabled_event("block", "block_rq_complete") ? 0 : 1) : enable); + #endif + + #ifdef HAVE_MEMORY_FAILURE +- rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); ++ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable > 0 ? (is_disabled_event("ras", "memory_failure_event") ? 0 : 1) : enable); + #endif + ++ + free_ras: + free(ras); + return rc; +@@ -870,42 +889,62 @@ int handle_ras_events(int record_events) + ras_page_account_init(); + #endif + +- rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", +- ras_mc_event_handler, NULL, MC_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "ras", "mc_event"); ++ if (is_disabled_event("ras", "mc_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "mc_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", ++ ras_mc_event_handler, NULL, MC_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "mc_event"); ++ } + + #ifdef HAVE_AER +- rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", +- ras_aer_event_handler, NULL, AER_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "ras", "aer_event"); ++ if (is_disabled_event("ras", "aer_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "aer_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "aer_event", ++ ras_aer_event_handler, NULL, AER_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "aer_event"); ++ } + #endif + + #ifdef HAVE_NON_STANDARD +- rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", +- ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "ras", "non_standard_event"); ++ if (is_disabled_event("ras", "non_standard_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "non_standard_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "non_standard_event", ++ ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "non_standard_event"); ++ } + #endif + + #ifdef HAVE_ARM +- rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", +- ras_arm_event_handler, NULL, ARM_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "ras", "arm_event"); ++ if (is_disabled_event("ras", "arm_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "arm_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "arm_event", ++ ras_arm_event_handler, NULL, ARM_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "arm_event"); ++ } + #endif + + cpus = get_num_cpus(ras); +@@ -915,72 +954,102 @@ int handle_ras_events(int record_events) + #endif + + #ifdef HAVE_MCE +- rc = register_mce_handler(ras, cpus); +- if (rc) +- log(ALL, LOG_INFO, "Can't register mce handler\n"); +- if (ras->mce_priv) { +- rc = add_event_handler(ras, pevent, page_size, +- "mce", "mce_record", +- ras_mce_event_handler, NULL, MCE_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "mce", "mce_record"); ++ if (is_disabled_event("mce", "mce_record")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "mce", "mce_record"); ++ } else { ++ rc = register_mce_handler(ras, cpus); ++ if (rc) ++ log(ALL, LOG_INFO, "Can't register mce handler\n"); ++ if (ras->mce_priv) { ++ rc = add_event_handler(ras, pevent, page_size, ++ "mce", "mce_record", ++ ras_mce_event_handler, NULL, MCE_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "mce", "mce_record"); ++ } + } + #endif + + #ifdef HAVE_EXTLOG +- rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", +- ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT); +- if (!rc) { +- /* tell kernel we are listening, so don't printk to console */ +- (void)open("/sys/kernel/debug/ras/daemon_active", 0); +- num_events++; +- } else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "ras", "extlog_mem_event"); ++ if (is_disabled_event("ras", "extlog_mem_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "extlog_mem_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event", ++ ras_extlog_mem_event_handler, NULL, EXTLOG_EVENT); ++ if (!rc) { ++ /* tell kernel we are listening, so don't printk to console */ ++ (void)open("/sys/kernel/debug/ras/daemon_active", 0); ++ num_events++; ++ } else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "extlog_mem_event"); ++ } + #endif + + #ifdef HAVE_DEVLINK +- rc = add_event_handler(ras, pevent, page_size, "net", +- "net_dev_xmit_timeout", +- ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT); +- if (!rc) +- filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'"; +- +- rc = add_event_handler(ras, pevent, page_size, "devlink", +- "devlink_health_report", +- ras_devlink_event_handler, filter_str, DEVLINK_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "devlink", "devlink_health_report"); ++ if (is_disabled_event("net", "net_dev_xmit_timeout")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "net", "net_dev_xmit_timeout"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "net", ++ "net_dev_xmit_timeout", ++ ras_net_xmit_timeout_handler, NULL, DEVLINK_EVENT); ++ if (!rc) ++ filter_str = "devlink/devlink_health_report:msg=~\'TX timeout*\'"; ++ ++ if (is_disabled_event("devlink", "devlink_health_report")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "devlink", "devlink_health_report"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "devlink", ++ "devlink_health_report", ++ ras_devlink_event_handler, filter_str, DEVLINK_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "devlink", "devlink_health_report"); ++ } ++ } + #endif + + #ifdef HAVE_DISKERROR +- rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0"); +- if (!rc) { +- rc = add_event_handler(ras, pevent, page_size, "block", +- "block_rq_complete", ras_diskerror_event_handler, +- NULL, DISKERROR_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "block", "block_rq_complete"); ++ if (is_disabled_event("block", "block_rq_complete")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "block", "block_rq_complete"); ++ } else { ++ rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0"); ++ if (!rc) { ++ rc = add_event_handler(ras, pevent, page_size, "block", ++ "block_rq_complete", ras_diskerror_event_handler, ++ NULL, DISKERROR_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "block", "block_rq_complete"); ++ } + } + #endif + + #ifdef HAVE_MEMORY_FAILURE +- rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", +- ras_memory_failure_event_handler, NULL, MF_EVENT); +- if (!rc) +- num_events++; +- else +- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", +- "ras", "memory_failure_event"); ++ if (is_disabled_event("ras", "memory_failure_event")) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ "ras", "memory_failure_event"); ++ } else { ++ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", ++ ras_memory_failure_event_handler, NULL, MF_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "memory_failure_event"); ++ } + #endif + + if (!num_events) { +diff --git a/rasdaemon.c b/rasdaemon.c +index 66f4dea..0437662 100644 +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -25,6 +25,7 @@ + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-events.h" ++#include "ras-disabled-events.h" + + /* + * Arguments(argp) handling logic and main +@@ -34,6 +35,9 @@ + #define TOOL_DESCRIPTION "RAS daemon to log the RAS events." + #define ARGS_DOC "" + ++char choices_disable[MAX_DISABLED_TRACEPOINTS_NUM][MAX_TRACEPOINTS_STR_LENGTH]; ++int disabled_tracepoints_num; ++ + const char *argp_program_version = TOOL_NAME " " VERSION; + const char *argp_program_bug_address = "Mauro Carvalho Chehab "; + +@@ -43,6 +47,36 @@ struct arguments { + int foreground; + }; + ++static void parse_disabled_choices() { ++ char disabled_tracepoints_str[MAX_DISABLED_TRACEPOINTS_STR_LENGTH]; ++ const char* sep = ";"; ++ char* tracepoint_str; ++ char* config_disabled_tracepoints = getenv(DISABLE); ++ if (config_disabled_tracepoints == NULL) { ++ return; ++ } ++ ++ if (strlen(config_disabled_tracepoints) >= MAX_DISABLED_TRACEPOINTS_STR_LENGTH) { ++ log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); ++ return; ++ } ++ strcpy(disabled_tracepoints_str, config_disabled_tracepoints); ++ ++ tracepoint_str = strtok(disabled_tracepoints_str, sep); ++ int index = 0; ++ ++ while(tracepoint_str != NULL && index < MAX_DISABLED_TRACEPOINTS_NUM) { ++ if (strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { ++ log(ALL, LOG_WARNING, "Failed to read disabled events config item %s string, length exceeds %d characters, skipped.\n", tracepoint_str, MAX_TRACEPOINTS_STR_LENGTH); ++ } ++ else { ++ strcpy(choices_disable[index++], tracepoint_str); ++ } ++ tracepoint_str = strtok(NULL, sep); ++ } ++ disabled_tracepoints_num = index; ++} ++ + static error_t parse_opt(int k, char *arg, struct argp_state *state) + { + struct arguments *args = state->input; +@@ -102,6 +136,8 @@ int main(int argc, char *argv[]) + return -1; + } + ++ parse_disabled_choices(); ++ + if (args.enable_ras) { + int enable; + +-- +2.33.0 + + diff --git a/fix-rasdaemon-print-loading-config-logs-multiple-times.patch b/fix-rasdaemon-print-loading-config-logs-multiple-times.patch new file mode 100644 index 0000000000000000000000000000000000000000..04cf7b7281b39206e1952db5c0cfe45c585714a0 --- /dev/null +++ b/fix-rasdaemon-print-loading-config-logs-multiple-times.patch @@ -0,0 +1,57 @@ +From 83f7052a8d8c9641809611d9485256d8ed843c31 Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Wed, 6 Mar 2024 14:21:41 +0800 +Subject: [PATCH] huawei-fix-rasdaemon-print-loading-config-logs-multi + +--- + rasdaemon.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/rasdaemon.c b/rasdaemon.c +index 0437662..7ece6c1 100644 +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -47,7 +47,7 @@ struct arguments { + int foreground; + }; + +-static void parse_disabled_choices() { ++static void parse_disabled_choices(int enable_ras) { + char disabled_tracepoints_str[MAX_DISABLED_TRACEPOINTS_STR_LENGTH]; + const char* sep = ";"; + char* tracepoint_str; +@@ -57,16 +57,18 @@ static void parse_disabled_choices() { + } + + if (strlen(config_disabled_tracepoints) >= MAX_DISABLED_TRACEPOINTS_STR_LENGTH) { +- log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); ++ if (enable_ras) { ++ log(ALL, LOG_WARNING, "Failed to read disabled events config string, length exceeds %d characters.\n", MAX_DISABLED_TRACEPOINTS_STR_LENGTH); ++ } + return; + } + strcpy(disabled_tracepoints_str, config_disabled_tracepoints); +- ++ + tracepoint_str = strtok(disabled_tracepoints_str, sep); + int index = 0; + + while(tracepoint_str != NULL && index < MAX_DISABLED_TRACEPOINTS_NUM) { +- if (strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { ++ if (enable_ras && strlen(tracepoint_str) >= MAX_TRACEPOINTS_STR_LENGTH) { + log(ALL, LOG_WARNING, "Failed to read disabled events config item %s string, length exceeds %d characters, skipped.\n", tracepoint_str, MAX_TRACEPOINTS_STR_LENGTH); + } + else { +@@ -136,7 +138,7 @@ int main(int argc, char *argv[]) + return -1; + } + +- parse_disabled_choices(); ++ parse_disabled_choices(args.enable_ras); + + if (args.enable_ras) { + int enable; +-- +2.33.0 + + diff --git a/rasdaemon.spec b/rasdaemon.spec index 551c5e7126f66115cac89722f0314e1b41a12ebe..f6b471829e82f8f01c102de566515bb33f2de707 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 16 +Release: 17 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -53,6 +53,8 @@ Patch6004: backport-rasdaemon-service_in-comment-out-syslog_target.patch Patch9000: fix-ras-mc-ctl.service-startup-failed-when-selinux-is-no.patch Patch9001: fix-ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch +Patch9002: add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete.patch +Patch9003: fix-rasdaemon-print-loading-config-logs-multiple-times.patch %description The rasdaemon program is a daemon which monitors the platform @@ -106,6 +108,12 @@ if [ $1 -eq 0 ] ; then fi %changelog +* Mon Apr 8 2024 caixiaomeng - 0.6.7-17 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:add-dynamic-switch-of-ras-events-support-and-disable-block-rq-complete + * Mon Mar 25 2024 zhangruifang - 0.6.7-16 - Type:bugfix - ID:NA