diff --git a/1001-rasdaemon-Add-notification-support-when-page-goes-of.patch b/1001-rasdaemon-Add-notification-support-when-page-goes-of.patch deleted file mode 100644 index 736fea37a87e52cd11ff11865817dd599b391e33..0000000000000000000000000000000000000000 --- a/1001-rasdaemon-Add-notification-support-when-page-goes-of.patch +++ /dev/null @@ -1,222 +0,0 @@ -diff -Nur rasdaemon-0.6.7/Makefile.am rasdaemon-0.6.7_new/Makefile.am ---- rasdaemon-0.6.7/Makefile.am 2023-06-02 15:14:06.995338446 +0800 -+++ rasdaemon-0.6.7_new/Makefile.am 2023-06-02 15:14:33.789545754 +0800 -@@ -2,7 +2,7 @@ - SUBDIRS = libtrace util man - SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) --EXTRA_DIST = $(SYSTEMD_SERVICES_IN) misc/rasdaemon.env -+EXTRA_DIST = $(SYSTEMD_SERVICES_IN) misc/rasdaemon.env misc/notices - - # This rule is needed because \@sbindir\@ is expanded to \${exec_prefix\}/sbin - # during ./configure phase, therefore it is not possible to add .service.in -diff -Nur rasdaemon-0.6.7/misc/notices/page-ce-offline-post-notice rasdaemon-0.6.7_new/misc/notices/page-ce-offline-post-notice ---- rasdaemon-0.6.7/misc/notices/page-ce-offline-post-notice 1970-01-01 08:00:00.000000000 +0800 -+++ rasdaemon-0.6.7_new/misc/notices/page-ce-offline-post-notice 2023-06-02 15:16:14.456324620 +0800 -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon after a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-post-notice.local ] && . ./page-ce-offline-post-notice.local $1 -+ -+if [ -d page-ce-offline-post-notice.extern ] -+then -+ ls page-ce-offline-post-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-post-notice.extern/$item ] && . ./page-ce-offline-post-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff -Nur rasdaemon-0.6.7/misc/notices/page-ce-offline-pre-notice rasdaemon-0.6.7_new/misc/notices/page-ce-offline-pre-notice ---- rasdaemon-0.6.7/misc/notices/page-ce-offline-pre-notice 1970-01-01 08:00:00.000000000 +0800 -+++ rasdaemon-0.6.7_new/misc/notices/page-ce-offline-pre-notice 2023-06-02 15:16:39.440517924 +0800 -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon before a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-pre-notice.local ] && . ./page-ce-offline-pre-notice.local $1 -+ -+if [ -d page-ce-offline-pre-notice.extern ] -+then -+ ls page-ce-offline-pre-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-pre-notice.extern/$item ] && . ./page-ce-offline-pre-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff -Nur rasdaemon-0.6.7/misc/rasdaemon.env rasdaemon-0.6.7_new/misc/rasdaemon.env ---- rasdaemon-0.6.7/misc/rasdaemon.env 2023-06-02 15:14:06.994338438 +0800 -+++ rasdaemon-0.6.7_new/misc/rasdaemon.env 2023-06-02 15:17:54.307097173 +0800 -@@ -27,3 +27,7 @@ - # soft-then-hard First try to soft offline, then try hard offlining. - # Note: default offline choice is "soft". - PAGE_CE_ACTION="soft" -+ -+# Notices script when doing memory offline -+PAGE_CE_OFFLINE_PRE_NOTICE="page-ce-offline-pre-notice" -+PAGE_CE_OFFLINE_POST_NOTICE="page-ce-offline-post-notice" -diff -Nur rasdaemon-0.6.7/misc/rasdaemon.spec.in rasdaemon-0.6.7_new/misc/rasdaemon.spec.in ---- rasdaemon-0.6.7/misc/rasdaemon.spec.in 2023-06-02 15:14:06.994338438 +0800 -+++ rasdaemon-0.6.7_new/misc/rasdaemon.spec.in 2023-06-02 15:19:03.105629470 +0800 -@@ -46,6 +46,8 @@ - make install DESTDIR=%{buildroot} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service -+install -d %{buildroot}%{_sysconfdir}/rasdaemon_notices/ -+install -D -p -m 0755 misc/notices/* %{buildroot}%{_sysconfdir}/rasdaemon_notices/ - rm INSTALL %{buildroot}/usr/include/*.h - - %files -@@ -57,6 +59,7 @@ - %{_sysconfdir}/ras/dimm_labels.d - @SYSCONFDEFDIR@/%{name} - %config(noreplace) @SYSCONFDEFDIR@/%{name} -+%config(noreplace) %{_sysconfdir}/rasdaemon_notices/* - - %changelog - -diff -Nur rasdaemon-0.6.7/ras-page-isolation.c rasdaemon-0.6.7_new/ras-page-isolation.c ---- rasdaemon-0.6.7/ras-page-isolation.c 2023-06-02 15:14:06.995338446 +0800 -+++ rasdaemon-0.6.7_new/ras-page-isolation.c 2023-06-02 16:06:28.020663355 +0800 -@@ -17,12 +17,16 @@ - #include - #include - #include -+#include -+#include -+#include - #include - #include - #include - #include "ras-logger.h" - #include "ras-page-isolation.h" - -+#define MAX_PATH_LEN 64 - #define PARSED_ENV_LEN 50 - static const struct config threshold_units[] = { - { "m", 1000 }, -@@ -76,6 +80,8 @@ - - static enum otype offline = OFFLINE_SOFT; - static struct rb_root page_records; -+static char pre_notice[MAX_PATH_LEN]; -+static char post_notice[MAX_PATH_LEN]; - - static void page_offline_init(void) - { -@@ -205,16 +211,94 @@ - threshold_string, cycle_string); - } - -+static void page_notice_init(void) -+{ -+ char *notice_root = "/etc/rasdaemon_notices"; -+ char *pre_re = getenv("PAGE_CE_OFFLINE_PRE_NOTICE"); -+ char *post_re = getenv("PAGE_CE_OFFLINE_POST_NOTICE"); -+ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ snprintf(pre_notice, sizeof(pre_notice), "%s/%s", notice_root, pre_re); -+ if (access(pre_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", pre_notice); -+ -+ snprintf(post_notice, sizeof(post_notice), "%s/%s", notice_root, post_re); -+ if (access(post_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", post_notice); -+} -+ - void ras_page_account_init(void) - { - page_offline_init(); - page_isolation_init(); -+ page_notice_init(); -+} -+ -+static void finish_child(pid_t child, int status) -+{ -+ if (WIFEXITED(status) && WEXITSTATUS(status)) { -+ log(TERM, LOG_INFO, "notice exited with status %d\n", WEXITSTATUS(status)); -+ } else if (WIFSIGNALED(status)) { -+ log(TERM, LOG_INFO,"notice died with signal %s\n", strsignal(WTERMSIG(status))); -+ } -+ -+ return; - } - -+static void __run_notice(char *argv[], char **env) -+{ -+ pid_t child; -+ int status; -+ -+ child = fork(); -+ if (child < 0) { -+ log(TERM, LOG_ERR, "Cannot create process for offline notice"); -+ return; -+ } -+ if (child == 0) { -+ execve(argv[0], argv, env); -+ _exit(127); -+ } -+ else { -+ waitpid(child, &status, 0); -+ finish_child(child, status); -+ } -+} -+ -+static void run_notice(char *argv[]) -+{ -+ int MAX_ENV = 20; -+ char *env[MAX_ENV]; -+ int ei = 0; -+ int i; -+ -+ asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin"); -+ env[ei] = NULL; -+ assert(ei < MAX_ENV); -+ -+ __run_notice(argv, env); -+ -+ for (i = 0; i < ei; i++) -+ free(env[i]); -+ } -+ - static int do_page_offline(unsigned long long addr, enum otype type) - { - int fd, rc; - char buf[20]; -+ char *args; -+ char *argv[] = { -+ NULL, -+ NULL, -+ NULL, -+ }; -+ -+ asprintf(&args, "%llu", addr); -+ argv[0] = (char*)&pre_notice; -+ argv[1] = args; -+ run_notice(argv); - - fd = open(kernel_offline[type], O_WRONLY); - if (fd == -1) { -@@ -228,6 +312,12 @@ - log(TERM, LOG_ERR, "page offline addr(%s) by %s failed, errno:%d\n", buf, kernel_offline[type], errno); - } - close(fd); -+ -+ argv[0] = (char*)&post_notice; -+ run_notice(argv); -+ -+ free(args); -+ - return rc; - } - diff --git a/1001-rasdaemon-mce-amd-smca-properly-limit-bank-types.patch b/1001-rasdaemon-mce-amd-smca-properly-limit-bank-types.patch new file mode 100644 index 0000000000000000000000000000000000000000..9255ac9858729353871f4044d75560725b15e553 --- /dev/null +++ b/1001-rasdaemon-mce-amd-smca-properly-limit-bank-types.patch @@ -0,0 +1,29 @@ +From 1eb161a1c0ed47d1e260956f9bd9fb4beff81d3c Mon Sep 17 00:00:00 2001 +From: Aristeu Rozanski +Date: Thu, 19 Jan 2023 08:45:57 -0500 +Subject: [PATCH 01/85] rasdaemon: mce-amd-smca: properly limit bank types + +Found with covscan. + +Signed-off-by: Aristeu Rozanski +Signed-off-by: Mauro Carvalho Chehab +--- + mce-amd-smca.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 7cc596e..233fa0a 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -931,7 +931,7 @@ void decode_smca_error(struct mce_event *e, struct mce_priv *m) + return; + } + +- if (bank_type >= MAX_NR_BANKS) { ++ if (bank_type >= N_SMCA_BANK_TYPES) { + strcpy(e->mcastatus_msg, "Don't know how to decode this bank"); + return; + } +-- +2.33.1 + diff --git a/1002-rasdaemon-ras-memory-failure-handler-handle-localtim.patch b/1002-rasdaemon-ras-memory-failure-handler-handle-localtim.patch new file mode 100644 index 0000000000000000000000000000000000000000..60dc329568587942766d7fd8f4fbc2bab7d4e00c --- /dev/null +++ b/1002-rasdaemon-ras-memory-failure-handler-handle-localtim.patch @@ -0,0 +1,34 @@ +From 76846ec3b8740794b5c75934e8a24c07e6cf70bd Mon Sep 17 00:00:00 2001 +From: Aristeu Rozanski +Date: Thu, 19 Jan 2023 08:45:57 -0500 +Subject: [PATCH 02/85] rasdaemon: ras-memory-failure-handler: handle + localtime() failure correctly + +We could just have an empty string but keeping the format could prevent +issues if someone is actually parsing this. +Found with covscan. + +v2: fixed the timestamp as pointed by Robert Elliott + +Signed-off-by: Aristeu Rozanski +Signed-off-by: Mauro Carvalho Chehab +--- + ras-memory-failure-handler.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index 9941e68..1951456 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -148,6 +148,8 @@ int ras_memory_failure_event_handler(struct trace_seq *s, + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); ++ else ++ strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp)); + trace_seq_printf(s, "%s ", ev.timestamp); + + if (pevent_get_field_val(s, event, "pfn", record, &val, 1) < 0) +-- +2.33.1 + diff --git a/1003-rasdaemon-Support-cpu-fault-isolation-for-corrected-.patch b/1003-rasdaemon-Support-cpu-fault-isolation-for-corrected-.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd982e9975c5fb8004fd5a5d5aee04f9dfe9d26c --- /dev/null +++ b/1003-rasdaemon-Support-cpu-fault-isolation-for-corrected-.patch @@ -0,0 +1,939 @@ +From 97a88dfbb3d1db9320618c2116e5dca06677c2ea Mon Sep 17 00:00:00 2001 +From: Shengwei Luo +Date: Wed, 23 Feb 2022 17:21:58 +0800 +Subject: [PATCH 03/85] rasdaemon: Support cpu fault isolation for corrected + errors + +When the corrected errors exceed the set limit in cycle, try to +offline the related cpu core. + +Signed-off-by: Shengwei Luo +Signed-off-by: Junchong Pan +Signed-off-by: Lei Feng +Signed-off-by: Xiaofei Tan +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + Makefile.am | 6 +- + configure.ac | 11 ++ + misc/rasdaemon.env | 17 ++ + queue.c | 119 ++++++++++++++ + queue.h | 39 +++++ + ras-arm-handler.c | 97 +++++++++++ + ras-arm-handler.h | 18 ++ + ras-cpu-isolation.c | 388 ++++++++++++++++++++++++++++++++++++++++++++ + ras-cpu-isolation.h | 67 ++++++++ + ras-events.c | 9 +- + 10 files changed, 769 insertions(+), 2 deletions(-) + create mode 100644 queue.c + create mode 100644 queue.h + create mode 100644 ras-cpu-isolation.c + create mode 100644 ras-cpu-isolation.h + +diff --git a/Makefile.am b/Makefile.am +index fabca78..242ceb7 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -63,13 +63,17 @@ endif + if WITH_AMP_NS_DECODE + rasdaemon_SOURCES += non-standard-ampere.c + endif ++if WITH_CPU_FAULT_ISOLATION ++ rasdaemon_SOURCES += ras-cpu-isolation.c queue.c ++endif + rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a + + include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ + ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ + ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ + ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ +- non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h ++ non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ ++ ras-cpu-isolation.h queue.h + + # This rule can't be called with more than one Makefile job (like make -j8) + # I can't figure out a way to fix that +diff --git a/configure.ac b/configure.ac +index 33b81fe..d098fcf 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -161,6 +161,16 @@ AS_IF([test "x$enable_amp_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], + AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all == xyes]) + AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"]) + ++AC_ARG_ENABLE([cpu_fault_isolation], ++ AS_HELP_STRING([--enable-cpu-fault-isolation], [enable cpu online fault isolation])) ++ ++AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" == "xyes"], [ ++ AC_DEFINE(HAVE_CPU_FAULT_ISOLATION,1,"have cpu online fault isolation") ++ AC_SUBST([WITH_CPU_FAULT_ISOLATION]) ++]) ++AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all == xyes]) ++AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"]) ++ + test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc + + CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" +@@ -201,4 +211,5 @@ compile time options summary + Memory Failure : $USE_MEMORY_FAILURE + Memory CE PFA : $USE_MEMORY_CE_PFA + AMP RAS errors : $USE_AMP_NS_DECODE ++ CPU fault isolation : $USE_CPU_FAULT_ISOLATION + EOF +diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env +index 12fd766..7cb18e8 100644 +--- a/misc/rasdaemon.env ++++ b/misc/rasdaemon.env +@@ -27,3 +27,20 @@ PAGE_CE_THRESHOLD="50" + # soft-then-hard First try to soft offline, then try hard offlining. + # Note: default offline choice is "soft". + PAGE_CE_ACTION="soft" ++ ++# CPU Online Fault Isolation ++# Whether to enable cpu online fault isolation (yes|no). ++CPU_ISOLATION_ENABLE="no" ++# Specify the threshold of CE numbers. ++# ++# Format: ++# [0-9]+[unit] ++# ++# Supported units: ++# CPU_CE_THRESHOLD: no unit ++# CPU_ISOLATION_CYCLE: D|d (day), H|h (hour), M|m (minute), S|s (second), default is in second ++CPU_CE_THRESHOLD="18" ++CPU_ISOLATION_CYCLE="24h" ++ ++# Prevent excessive isolation from causing an avalanche effect ++CPU_ISOLATION_LIMIT="10" +\ No newline at end of file +diff --git a/queue.c b/queue.c +new file mode 100644 +index 0000000..65b6fb8 +--- /dev/null ++++ b/queue.c +@@ -0,0 +1,119 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++#include ++#include ++#include "queue.h" ++#include "ras-logger.h" ++ ++int is_empty(struct link_queue *queue) ++{ ++ if (queue) ++ return queue->size == 0; ++ ++ return 1; ++} ++ ++struct link_queue *init_queue(void) ++{ ++ struct link_queue *queue = NULL; ++ ++ queue = (struct link_queue *)malloc(sizeof(struct link_queue)); ++ if (queue == NULL) { ++ log(TERM, LOG_ERR, "Failed to allocate memory for queue.\n"); ++ return NULL; ++ } ++ ++ queue->size = 0; ++ queue->head = NULL; ++ queue->tail = NULL; ++ ++ return queue; ++} ++ ++void clear_queue(struct link_queue *queue) ++{ ++ if (queue == NULL) ++ return; ++ ++ struct queue_node *node = queue->head; ++ struct queue_node *tmp = NULL; ++ ++ while (node != NULL) { ++ tmp = node; ++ node = node->next; ++ free(tmp); ++ } ++ ++ queue->head = NULL; ++ queue->tail = NULL; ++ queue->size = 0; ++} ++ ++void free_queue(struct link_queue *queue) ++{ ++ clear_queue(queue); ++ ++ if (queue) ++ free(queue); ++} ++ ++/* It should be guranteed that the param is not NULL */ ++void push(struct link_queue *queue, struct queue_node *node) ++{ ++ /* there is no element in the queue */ ++ if (queue->head == NULL) ++ queue->head = node; ++ else ++ queue->tail->next = node; ++ ++ queue->tail = node; ++ (queue->size)++; ++} ++ ++int pop(struct link_queue *queue) ++{ ++ struct queue_node *tmp = NULL; ++ ++ if (queue == NULL || is_empty(queue)) ++ return -1; ++ ++ tmp = queue->head; ++ queue->head = queue->head->next; ++ free(tmp); ++ (queue->size)--; ++ ++ return 0; ++} ++ ++struct queue_node *front(struct link_queue *queue) ++{ ++ if (queue == NULL) ++ return NULL; ++ ++ return queue->head; ++} ++ ++struct queue_node *node_create(time_t time, unsigned int value) ++{ ++ struct queue_node *node = NULL; ++ ++ node = (struct queue_node *)malloc(sizeof(struct queue_node)); ++ if (node != NULL) { ++ node->time = time; ++ node->value = value; ++ node->next = NULL; ++ } ++ ++ return node; ++} +diff --git a/queue.h b/queue.h +new file mode 100644 +index 0000000..5459f40 +--- /dev/null ++++ b/queue.h +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#ifndef __RAS_QUEUE_H ++#define __RAS_QUEUE_H ++ ++struct queue_node { ++ time_t time; ++ unsigned int value; ++ struct queue_node *next; ++}; ++ ++struct link_queue { ++ struct queue_node *head; ++ struct queue_node *tail; ++ int size; ++}; ++ ++int is_empty(struct link_queue *queue); ++struct link_queue *init_queue(void); ++void clear_queue(struct link_queue *queue); ++void free_queue(struct link_queue *queue); ++void push(struct link_queue *queue, struct queue_node *node); ++int pop(struct link_queue *queue); ++struct queue_node *front(struct link_queue *queue); ++struct queue_node *node_create(time_t time, unsigned int value); ++ ++#endif +diff --git a/ras-arm-handler.c b/ras-arm-handler.c +index 1149dc6..9c7a3c3 100644 +--- a/ras-arm-handler.c ++++ b/ras-arm-handler.c +@@ -22,6 +22,10 @@ + #include "ras-report.h" + #include "ras-non-standard-handler.h" + #include "non-standard-ampere.h" ++#include "ras-cpu-isolation.h" ++ ++#define ARM_ERR_VALID_ERROR_COUNT BIT(0) ++#define ARM_ERR_VALID_FLAGS BIT(1) + + void display_raw_data(struct trace_seq *s, + const uint8_t *buf, +@@ -42,6 +46,93 @@ void display_raw_data(struct trace_seq *s, + } + } + ++#ifdef HAVE_CPU_FAULT_ISOLATION ++static int count_errors(struct ras_arm_event *ev) ++{ ++ struct ras_arm_err_info *err_info; ++ int num_pei; ++ int err_info_size = sizeof(struct ras_arm_err_info); ++ int num = 0; ++ int i; ++ int error_count; ++ ++ if (ev->pei_len % err_info_size != 0) { ++ log(TERM, LOG_ERR, ++ "The event data does not match to the ARM Processor Error Information Structure\n"); ++ return num; ++ } ++ num_pei = ev->pei_len / err_info_size; ++ err_info = (struct ras_arm_err_info *)(ev->pei_error); ++ ++ for (i = 0; i < num_pei; ++i) { ++ error_count = 1; ++ if (err_info->validation_bits & ARM_ERR_VALID_ERROR_COUNT) { ++ /* ++ * The value of this field is defined as follows: ++ * 0: Single Error ++ * 1: Multiple Errors ++ * 2-65535: Error Count ++ */ ++ error_count = err_info->multiple_error + 1; ++ } ++ ++ num += error_count; ++ err_info += 1; ++ } ++ log(TERM, LOG_INFO, "%d error in cpu core catched\n", num); ++ return num; ++} ++ ++static int ras_handle_cpu_error(struct trace_seq *s, ++ struct pevent_record *record, ++ struct event_format *event, ++ struct ras_arm_event *ev, time_t now) ++{ ++ unsigned long long val; ++ int cpu; ++ char *severity; ++ struct error_info err_info; ++ ++ if (pevent_get_field_val(s, event, "cpu", record, &val, 1) < 0) ++ return -1; ++ cpu = val; ++ trace_seq_printf(s, "\n cpu: %d", cpu); ++ ++ /* record cpu error */ ++ if (pevent_get_field_val(s, event, "sev", record, &val, 1) < 0) ++ return -1; ++ /* refer to UEFI_2_9 specification chapter N2.2 Table N-5 */ ++ switch (val) { ++ case GHES_SEV_NO: ++ severity = "Informational"; ++ break; ++ case GHES_SEV_CORRECTED: ++ severity = "Corrected"; ++ break; ++ case GHES_SEV_RECOVERABLE: ++ severity = "Recoverable"; ++ break; ++ default: ++ case GHES_SEV_PANIC: ++ severity = "Fatal"; ++ } ++ trace_seq_printf(s, "\n severity: %s", severity); ++ ++ if (val == GHES_SEV_CORRECTED) { ++ int nums = count_errors(ev); ++ ++ if (nums > 0) { ++ err_info.nums = nums; ++ err_info.time = now; ++ err_info.err_type = val; ++ ras_record_cpu_error(&err_info, cpu); ++ } ++ } ++ ++ return 0; ++} ++#endif ++ + int ras_arm_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +@@ -52,6 +143,7 @@ int ras_arm_event_handler(struct trace_seq *s, + struct tm *tm; + struct ras_arm_event ev; + int len = 0; ++ + memset(&ev, 0, sizeof(ev)); + + /* +@@ -139,6 +231,11 @@ int ras_arm_event_handler(struct trace_seq *s, + display_raw_data(s, ev.vsei_error, ev.oem_len); + #endif + ++#ifdef HAVE_CPU_FAULT_ISOLATION ++ if (ras_handle_cpu_error(s, record, event, &ev, now) < 0) ++ return -1; ++#endif ++ + /* Insert data into the SGBD */ + #ifdef HAVE_SQLITE3 + ras_store_arm_record(ras, &ev); +diff --git a/ras-arm-handler.h b/ras-arm-handler.h +index 563a2d3..52813e7 100644 +--- a/ras-arm-handler.h ++++ b/ras-arm-handler.h +@@ -17,6 +17,24 @@ + #include "ras-events.h" + #include "libtrace/event-parse.h" + ++/* ++ * ARM Processor Error Information Structure, According to ++ * UEFI_2_9 specification chapter N2.4.4. ++ */ ++#pragma pack(1) ++struct ras_arm_err_info { ++ uint8_t version; ++ uint8_t length; ++ uint16_t validation_bits; ++ uint8_t type; ++ uint16_t multiple_error; ++ uint8_t flags; ++ uint64_t error_info; ++ uint64_t virt_fault_addr; ++ uint64_t physical_fault_addr; ++}; ++#pragma pack() ++ + int ras_arm_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context); +diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c +new file mode 100644 +index 0000000..1694a08 +--- /dev/null ++++ b/ras-cpu-isolation.c +@@ -0,0 +1,388 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "ras-logger.h" ++#include "ras-cpu-isolation.h" ++ ++#define SECOND_OF_MON (30 * 24 * 60 * 60) ++#define SECOND_OF_DAY (24 * 60 * 60) ++#define SECOND_OF_HOU (60 * 60) ++#define SECOND_OF_MIN (60) ++ ++#define LIMIT_OF_CPU_THRESHOLD 10000 ++#define INIT_OF_CPU_THRESHOLD 18 ++#define DEC_CHECK 10 ++#define LAST_BIT_OF_UL 5 ++ ++static struct cpu_info *cpu_infos; ++static unsigned int ncores; ++static unsigned int enabled = 1; ++static const char *cpu_path_format = "/sys/devices/system/cpu/cpu%d/online"; ++ ++static const struct param normal_units[] = { ++ {"", 1}, ++ {} ++}; ++ ++static const struct param cycle_units[] = { ++ {"d", SECOND_OF_DAY}, ++ {"h", SECOND_OF_HOU}, ++ {"m", SECOND_OF_MIN}, ++ {"s", 1}, ++ {} ++}; ++ ++static struct isolation_param threshold = { ++ .name = "CPU_CE_THRESHOLD", ++ .units = normal_units, ++ .value = INIT_OF_CPU_THRESHOLD, ++ .limit = LIMIT_OF_CPU_THRESHOLD ++}; ++ ++static struct isolation_param cpu_limit = { ++ .name = "CPU_ISOLATION_LIMIT", ++ .units = normal_units ++}; ++ ++static struct isolation_param cycle = { ++ .name = "CPU_ISOLATION_CYCLE", ++ .units = cycle_units, ++ .value = SECOND_OF_DAY, ++ .limit = SECOND_OF_MON ++}; ++ ++static const char * const cpu_state[] = { ++ [CPU_OFFLINE] = "offline", ++ [CPU_ONLINE] = "online", ++ [CPU_OFFLINE_FAILED] = "offline-failed", ++ [CPU_UNKNOWN] = "unknown" ++}; ++ ++static int open_sys_file(unsigned int cpu, int __oflag, const char *format) ++{ ++ int fd; ++ char path[PATH_MAX] = ""; ++ char real_path[PATH_MAX] = ""; ++ ++ snprintf(path, sizeof(path), format, cpu); ++ if (strlen(path) > PATH_MAX || realpath(path, real_path) == NULL) { ++ log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, path); ++ return -1; ++ } ++ fd = open(real_path, __oflag); ++ if (fd == -1) { ++ log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, real_path); ++ return -1; ++ } ++ ++ return fd; ++} ++ ++static int get_cpu_status(unsigned int cpu) ++{ ++ int fd, num; ++ char buf[2] = ""; ++ ++ fd = open_sys_file(cpu, O_RDONLY, cpu_path_format); ++ if (fd == -1) ++ return CPU_UNKNOWN; ++ ++ if (read(fd, buf, 1) <= 0 || sscanf(buf, "%d", &num) != 1) ++ num = CPU_UNKNOWN; ++ ++ close(fd); ++ ++ return (num < 0 || num > CPU_UNKNOWN) ? CPU_UNKNOWN : num; ++} ++ ++static int init_cpu_info(unsigned int cpus) ++{ ++ ncores = cpus; ++ cpu_infos = (struct cpu_info *)malloc(sizeof(*cpu_infos) * cpus); ++ if (!cpu_infos) { ++ log(TERM, LOG_ERR, ++ "Failed to allocate memory for cpu infos in %s.\n", __func__); ++ return -1; ++ } ++ ++ for (unsigned int i = 0; i < cpus; ++i) { ++ cpu_infos[i].ce_nums = 0; ++ cpu_infos[i].state = get_cpu_status(i); ++ cpu_infos[i].ce_queue = init_queue(); ++ ++ if (cpu_infos[i].ce_queue == NULL) { ++ log(TERM, LOG_ERR, ++ "Failed to allocate memory for cpu ce queue in %s.\n", __func__); ++ return -1; ++ } ++ } ++ /* set limit of offlined cpu limit according to number of cpu */ ++ cpu_limit.limit = cpus - 1; ++ cpu_limit.value = 0; ++ ++ return 0; ++} ++ ++static void check_config(struct isolation_param *config) ++{ ++ if (config->value > config->limit) { ++ log(TERM, LOG_WARNING, "Value: %lu exceed limit: %lu, set to limit\n", ++ config->value, config->limit); ++ config->value = config->limit; ++ } ++} ++ ++static int parse_ul_config(struct isolation_param *config, char *env, unsigned long *value) ++{ ++ char *unit = NULL; ++ int env_size, has_unit = 0; ++ ++ if (!env || strlen(env) == 0) ++ return -1; ++ ++ env_size = strlen(env); ++ unit = env + env_size - 1; ++ ++ if (isalpha(*unit)) { ++ has_unit = 1; ++ env_size--; ++ if (env_size <= 0) ++ return -1; ++ } ++ ++ for (int i = 0; i < env_size; ++i) { ++ if (isdigit(env[i])) { ++ if (*value > ULONG_MAX / DEC_CHECK || ++ (*value == ULONG_MAX / DEC_CHECK && env[i] - '0' > LAST_BIT_OF_UL)) { ++ log(TERM, LOG_ERR, "%s is out of range: %lu\n", env, ULONG_MAX); ++ return -1; ++ } ++ *value = DEC_CHECK * (*value) + (env[i] - '0'); ++ } else ++ return -1; ++ } ++ ++ if (!has_unit) ++ return 0; ++ ++ for (const struct param *units = config->units; units->name; units++) { ++ /* value character and unit character are both valid */ ++ if (!strcasecmp(unit, units->name)) { ++ if (*value > (ULONG_MAX / units->value)) { ++ log(TERM, LOG_ERR, ++ "%s is out of range: %lu\n", env, ULONG_MAX); ++ return -1; ++ } ++ *value = (*value) * units->value; ++ return 0; ++ } ++ } ++ log(TERM, LOG_ERR, "Invalid unit %s\n", unit); ++ return -1; ++} ++ ++static void init_config(struct isolation_param *config) ++{ ++ char *env = getenv(config->name); ++ unsigned long value = 0; ++ ++ if (parse_ul_config(config, env, &value) < 0) { ++ log(TERM, LOG_ERR, "Invalid %s: %s! Use default value %lu.\n", ++ config->name, env, config->value); ++ return; ++ } ++ ++ config->value = value; ++ check_config(config); ++} ++ ++static int check_config_status(void) ++{ ++ char *env = getenv("CPU_ISOLATION_ENABLE"); ++ ++ if (env == NULL || strcasecmp(env, "yes")) ++ return -1; ++ ++ return 0; ++} ++ ++void ras_cpu_isolation_init(unsigned int cpus) ++{ ++ if (init_cpu_info(cpus) < 0 || check_config_status() < 0) { ++ enabled = 0; ++ log(TERM, LOG_WARNING, "Cpu fault isolation is disabled\n"); ++ return; ++ } ++ ++ log(TERM, LOG_INFO, "Cpu fault isolation is enabled\n"); ++ init_config(&threshold); ++ init_config(&cpu_limit); ++ init_config(&cycle); ++} ++ ++void cpu_infos_free(void) ++{ ++ if (cpu_infos) { ++ for (int i = 0; i < ncores; ++i) ++ free_queue(cpu_infos[i].ce_queue); ++ ++ free(cpu_infos); ++ } ++} ++ ++static int do_cpu_offline(unsigned int cpu) ++{ ++ int fd, rc; ++ char buf[2] = ""; ++ ++ cpu_infos[cpu].state = CPU_OFFLINE_FAILED; ++ fd = open_sys_file(cpu, O_RDWR, cpu_path_format); ++ if (fd == -1) ++ return HANDLE_FAILED; ++ ++ strcpy(buf, "0"); ++ rc = write(fd, buf, strlen(buf)); ++ if (rc < 0) { ++ log(TERM, LOG_ERR, "cpu%u offline failed, errno:%d\n", cpu, errno); ++ close(fd); ++ return HANDLE_FAILED; ++ } ++ ++ close(fd); ++ /* check wthether the cpu is isolated successfully */ ++ cpu_infos[cpu].state = get_cpu_status(cpu); ++ ++ if (cpu_infos[cpu].state == CPU_OFFLINE) ++ return HANDLE_SUCCEED; ++ ++ return HANDLE_FAILED; ++} ++ ++static int do_ce_handler(unsigned int cpu) ++{ ++ struct link_queue *queue = cpu_infos[cpu].ce_queue; ++ unsigned int tmp; ++ /* ++ * Since we just count all error numbers in setted cycle, we store the time ++ * and error numbers from current event to the queue, then everytime we ++ * calculate the period from beginning time to ending time, if the period ++ * exceeds setted cycle, we pop the beginning time and error until the period ++ * from new beginning time to ending time is less than cycle. ++ */ ++ while (queue->head && queue->tail && queue->tail->time - queue->head->time > cycle.value) { ++ tmp = queue->head->value; ++ if (pop(queue) == 0) ++ cpu_infos[cpu].ce_nums -= tmp; ++ } ++ log(TERM, LOG_INFO, ++ "Current number of Corrected Errors in cpu%d in the cycle is %lu\n", ++ cpu, cpu_infos[cpu].ce_nums); ++ ++ if (cpu_infos[cpu].ce_nums >= threshold.value) { ++ log(TERM, LOG_INFO, ++ "Corrected Errors exceeded threshold %lu, try to offline cpu%u\n", ++ threshold.value, cpu); ++ return do_cpu_offline(cpu); ++ } ++ return HANDLE_NOTHING; ++} ++ ++static int error_handler(unsigned int cpu, struct error_info *err_info) ++{ ++ int ret = HANDLE_NOTHING; ++ ++ switch (err_info->err_type) { ++ case CE: ++ ret = do_ce_handler(cpu); ++ break; ++ default: ++ break; ++ } ++ ++ return ret; ++} ++ ++static void record_error_info(unsigned int cpu, struct error_info *err_info) ++{ ++ switch (err_info->err_type) { ++ case CE: ++ { ++ struct queue_node *node = node_create(err_info->time, err_info->nums); ++ ++ if (node == NULL) { ++ log(TERM, LOG_ERR, "Fail to allocate memory for queue node\n"); ++ return; ++ } ++ push(cpu_infos[cpu].ce_queue, node); ++ cpu_infos[cpu].ce_nums += err_info->nums; ++ break; ++ } ++ default: ++ break; ++ } ++} ++ ++void ras_record_cpu_error(struct error_info *err_info, int cpu) ++{ ++ int ret; ++ ++ if (enabled == 0) ++ return; ++ ++ if (cpu >= ncores || cpu < 0) { ++ log(TERM, LOG_ERR, ++ "The current cpu %d has exceed the total number of cpu:%u\n", cpu, ncores); ++ return; ++ } ++ ++ log(TERM, LOG_INFO, "Handling error on cpu%d\n", cpu); ++ cpu_infos[cpu].state = get_cpu_status(cpu); ++ ++ if (cpu_infos[cpu].state != CPU_ONLINE) { ++ log(TERM, LOG_INFO, "Cpu%d is not online or unknown, ignore\n", cpu); ++ return; ++ } ++ ++ record_error_info(cpu, err_info); ++ /* ++ * Since user may change cpu state, we get current offlined ++ * cpu numbers every recording time. ++ */ ++ if (ncores - sysconf(_SC_NPROCESSORS_ONLN) >= cpu_limit.value) { ++ log(TERM, LOG_WARNING, ++ "Offlined cpus have exceeded limit: %lu, choose to do nothing\n", ++ cpu_limit.value); ++ return; ++ } ++ ++ ret = error_handler(cpu, err_info); ++ if (ret == HANDLE_NOTHING) ++ log(TERM, LOG_WARNING, "Doing nothing in the cpu%d\n", cpu); ++ else if (ret == HANDLE_SUCCEED) { ++ log(TERM, LOG_INFO, "Offline cpu%d succeed, the state is %s\n", ++ cpu, cpu_state[cpu_infos[cpu].state]); ++ clear_queue(cpu_infos[cpu].ce_queue); ++ cpu_infos[cpu].ce_nums = 0; ++ } else ++ log(TERM, LOG_WARNING, "Offline cpu%d fail, the state is %s\n", ++ cpu, cpu_state[cpu_infos[cpu].state]); ++} +diff --git a/ras-cpu-isolation.h b/ras-cpu-isolation.h +new file mode 100644 +index 0000000..35b5225 +--- /dev/null ++++ b/ras-cpu-isolation.h +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#ifndef __RAS_CPU_ISOLATION_H ++#define __RAS_CPU_ISOLATION_H ++ ++#include "queue.h" ++ ++#define MAX_BUF_LEN 1024 ++ ++struct param { ++ char *name; ++ unsigned long value; ++}; ++ ++struct isolation_param { ++ char *name; ++ const struct param *units; ++ unsigned long value; ++ unsigned long limit; ++}; ++ ++enum cpu_state { ++ CPU_OFFLINE, ++ CPU_ONLINE, ++ CPU_OFFLINE_FAILED, ++ CPU_UNKNOWN, ++}; ++ ++enum error_handle_result { ++ HANDLE_FAILED = -1, ++ HANDLE_SUCCEED, ++ HANDLE_NOTHING, ++}; ++ ++enum error_type { ++ CE = 1 ++}; ++ ++struct cpu_info { ++ unsigned long ce_nums; ++ struct link_queue *ce_queue; ++ enum cpu_state state; ++}; ++ ++struct error_info { ++ unsigned long nums; ++ time_t time; ++ enum error_type err_type; ++}; ++ ++void ras_cpu_isolation_init(unsigned int cpus); ++void ras_record_cpu_error(struct error_info *err_info, int cpu); ++void cpu_infos_free(void); ++ ++#endif +diff --git a/ras-events.c b/ras-events.c +index fe4bd26..2a7d709 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -41,6 +41,7 @@ + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-page-isolation.h" ++#include "ras-cpu-isolation.h" + + /* + * Polling time, if read() doesn't block. Currently, trace_pipe_raw never +@@ -855,6 +856,10 @@ int handle_ras_events(int record_events) + + cpus = get_num_cpus(ras); + ++#ifdef HAVE_CPU_FAULT_ISOLATION ++ ras_cpu_isolation_init(cpus); ++#endif ++ + #ifdef HAVE_MCE + rc = register_mce_handler(ras, cpus); + if (rc) +@@ -981,6 +986,8 @@ err: + } + free(ras); + } +- ++#ifdef HAVE_CPU_FAULT_ISOLATION ++ cpu_infos_free(); ++#endif + return rc; + } +-- +2.33.1 + diff --git a/1004-rasdaemon-Support-cpu-fault-isolation-for-recoverabl.patch b/1004-rasdaemon-Support-cpu-fault-isolation-for-recoverabl.patch new file mode 100644 index 0000000000000000000000000000000000000000..88a1d1b48139a377b87b3258eaccb1d03be81548 --- /dev/null +++ b/1004-rasdaemon-Support-cpu-fault-isolation-for-recoverabl.patch @@ -0,0 +1,151 @@ +From 4b72881f3b264a4268a36741c8568f922557c4b4 Mon Sep 17 00:00:00 2001 +From: Shengwei Luo +Date: Wed, 23 Feb 2022 17:23:27 +0800 +Subject: [PATCH 04/85] rasdaemon: Support cpu fault isolation for recoverable + errors + +When the recoverable errors in cpu core occurred, try to offline +the related cpu core. + +Signed-off-by: Shengwei Luo +Signed-off-by: Junchong Pan +Signed-off-by: Lei Feng +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + ras-arm-handler.c | 22 +++++++++++++++++++--- + ras-cpu-isolation.c | 17 +++++++++++++++++ + ras-cpu-isolation.h | 4 +++- + 3 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/ras-arm-handler.c b/ras-arm-handler.c +index 9c7a3c3..a0dfc51 100644 +--- a/ras-arm-handler.c ++++ b/ras-arm-handler.c +@@ -26,6 +26,7 @@ + + #define ARM_ERR_VALID_ERROR_COUNT BIT(0) + #define ARM_ERR_VALID_FLAGS BIT(1) ++#define BIT2 2 + + void display_raw_data(struct trace_seq *s, + const uint8_t *buf, +@@ -47,7 +48,20 @@ void display_raw_data(struct trace_seq *s, + } + + #ifdef HAVE_CPU_FAULT_ISOLATION +-static int count_errors(struct ras_arm_event *ev) ++static int is_core_failure(struct ras_arm_err_info *err_info) ++{ ++ if (err_info->validation_bits & ARM_ERR_VALID_FLAGS) { ++ /* ++ * core failure: ++ * Bit 0\1\3: (at lease 1) ++ * Bit 2: 0 ++ */ ++ return (err_info->flags & 0xf) && !(err_info->flags & (0x1 << BIT2)); ++ } ++ return 0; ++} ++ ++static int count_errors(struct ras_arm_event *ev, int sev) + { + struct ras_arm_err_info *err_info; + int num_pei; +@@ -75,6 +89,8 @@ static int count_errors(struct ras_arm_event *ev) + */ + error_count = err_info->multiple_error + 1; + } ++ if (sev == GHES_SEV_RECOVERABLE && !is_core_failure(err_info)) ++ error_count = 0; + + num += error_count; + err_info += 1; +@@ -118,8 +134,8 @@ static int ras_handle_cpu_error(struct trace_seq *s, + } + trace_seq_printf(s, "\n severity: %s", severity); + +- if (val == GHES_SEV_CORRECTED) { +- int nums = count_errors(ev); ++ if (val == GHES_SEV_CORRECTED || val == GHES_SEV_RECOVERABLE) { ++ int nums = count_errors(ev, val); + + if (nums > 0) { + err_info.nums = nums; +diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c +index 1694a08..90633fd 100644 +--- a/ras-cpu-isolation.c ++++ b/ras-cpu-isolation.c +@@ -126,6 +126,7 @@ static int init_cpu_info(unsigned int cpus) + + for (unsigned int i = 0; i < cpus; ++i) { + cpu_infos[i].ce_nums = 0; ++ cpu_infos[i].uce_nums = 0; + cpu_infos[i].state = get_cpu_status(i); + cpu_infos[i].ce_queue = init_queue(); + +@@ -306,6 +307,15 @@ static int do_ce_handler(unsigned int cpu) + return HANDLE_NOTHING; + } + ++static int do_uce_handler(unsigned int cpu) ++{ ++ if (cpu_infos[cpu].uce_nums > 0) { ++ log(TERM, LOG_INFO, "Uncorrected Errors occurred, try to offline cpu%u\n", cpu); ++ return do_cpu_offline(cpu); ++ } ++ return HANDLE_NOTHING; ++} ++ + static int error_handler(unsigned int cpu, struct error_info *err_info) + { + int ret = HANDLE_NOTHING; +@@ -314,6 +324,9 @@ static int error_handler(unsigned int cpu, struct error_info *err_info) + case CE: + ret = do_ce_handler(cpu); + break; ++ case UCE: ++ ret = do_uce_handler(cpu); ++ break; + default: + break; + } +@@ -336,6 +349,9 @@ static void record_error_info(unsigned int cpu, struct error_info *err_info) + cpu_infos[cpu].ce_nums += err_info->nums; + break; + } ++ case UCE: ++ cpu_infos[cpu].uce_nums++; ++ break; + default: + break; + } +@@ -382,6 +398,7 @@ void ras_record_cpu_error(struct error_info *err_info, int cpu) + cpu, cpu_state[cpu_infos[cpu].state]); + clear_queue(cpu_infos[cpu].ce_queue); + cpu_infos[cpu].ce_nums = 0; ++ cpu_infos[cpu].uce_nums = 0; + } else + log(TERM, LOG_WARNING, "Offline cpu%d fail, the state is %s\n", + cpu, cpu_state[cpu_infos[cpu].state]); +diff --git a/ras-cpu-isolation.h b/ras-cpu-isolation.h +index 35b5225..5682106 100644 +--- a/ras-cpu-isolation.h ++++ b/ras-cpu-isolation.h +@@ -45,10 +45,12 @@ enum error_handle_result { + }; + + enum error_type { +- CE = 1 ++ CE = 1, ++ UCE + }; + + struct cpu_info { ++ unsigned long uce_nums; + unsigned long ce_nums; + struct link_queue *ce_queue; + enum cpu_state state; +-- +2.33.1 + diff --git a/1005-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch b/1005-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7b912754d9ed525135cce368ccfec333bc86086 --- /dev/null +++ b/1005-rasdaemon-Fix-some-print-format-issues-for-hisi-comm.patch @@ -0,0 +1,77 @@ +From 5ed8df237ee0bc7f882259a8d05d7bce5cd98dab Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Wed, 20 Oct 2021 14:33:39 +0800 +Subject: [PATCH 05/85] rasdaemon: Fix some print format issues for hisi common + error section + +It is not right to use '%d' to print uint8_t and uint16_t, although +there is no function issue. Change to use '%hhu' and '%hu' separately. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisilicon.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index a6f5e78..41a9632 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -192,7 +192,7 @@ static const char* get_soc_desc(uint8_t soc_id) + static void decode_module(struct hisi_event *event, uint8_t module_id) + { + if (module_id >= sizeof(module_name)/sizeof(char *)) +- HISI_SNPRINTF(event->error_msg, "module=unknown(id=%d) ", module_id); ++ HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id); + else + HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); + } +@@ -201,36 +201,36 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, + const struct hisi_common_error_section *err, + struct hisi_event *event) + { +- HISI_SNPRINTF(event->error_msg, "[ table_version=%d", err->version); ++ HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version); + if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) + HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id)); + + if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) +- HISI_SNPRINTF(event->error_msg, "socket_id=%d", err->socket_id); ++ HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) +- HISI_SNPRINTF(event->error_msg, "totem_id=%d", err->totem_id); ++ HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) +- HISI_SNPRINTF(event->error_msg, "nimbus_id=%d", err->nimbus_id); ++ HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) +- HISI_SNPRINTF(event->error_msg, "subsystem_id=%d", err->subsystem_id); ++ HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID)) + decode_module(event, err->module_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) +- HISI_SNPRINTF(event->error_msg, "submodule_id=%d", err->submodule_id); ++ HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) +- HISI_SNPRINTF(event->error_msg, "core_id=%d", err->core_id); ++ HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) +- HISI_SNPRINTF(event->error_msg, "port_id=%d", err->port_id); ++ HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id); + + if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) +- HISI_SNPRINTF(event->error_msg, "err_type=%d", err->err_type); ++ HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type); + + if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) + HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x", +-- +2.33.1 + diff --git a/1006-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch b/1006-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch new file mode 100644 index 0000000000000000000000000000000000000000..7d9563f6af0575418f00e49372cf095e99287c81 --- /dev/null +++ b/1006-rasdaemon-Modify-recording-Hisilicon-common-error-da.patch @@ -0,0 +1,229 @@ +From 29e82255ec841cc042e1f5733cfe267b02a78db8 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 2 Mar 2022 12:20:40 +0000 +Subject: [PATCH 06/85] rasdaemon: Modify recording Hisilicon common error data + +The error statistics for the Hisilicon common +error need to do based on module, error severity etc. + +Modify recording Hisilicon common error data as separate fields +in the sql db table instead of the combined single field. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisilicon.c | 126 ++++++++++++++++++++++++++++++++------- + 1 file changed, 104 insertions(+), 22 deletions(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 41a9632..cd0ab3f 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -17,6 +17,7 @@ + #include "non-standard-hisilicon.h" + + #define HISI_BUF_LEN 2048 ++#define HISI_PCIE_INFO_BUF_LEN 256 + + struct hisi_common_error_section { + uint32_t val_bits; +@@ -63,12 +64,25 @@ enum { + enum { + HISI_COMMON_FIELD_ID, + HISI_COMMON_FIELD_TIMESTAMP, +- HISI_COMMON_FIELD_ERR_INFO, ++ HISI_COMMON_FIELD_VERSION, ++ HISI_COMMON_FIELD_SOC_ID, ++ HISI_COMMON_FIELD_SOCKET_ID, ++ HISI_COMMON_FIELD_TOTEM_ID, ++ HISI_COMMON_FIELD_NIMBUS_ID, ++ HISI_COMMON_FIELD_SUB_SYSTEM_ID, ++ HISI_COMMON_FIELD_MODULE_ID, ++ HISI_COMMON_FIELD_SUB_MODULE_ID, ++ HISI_COMMON_FIELD_CORE_ID, ++ HISI_COMMON_FIELD_PORT_ID, ++ HISI_COMMON_FIELD_ERR_TYPE, ++ HISI_COMMON_FIELD_PCIE_INFO, ++ HISI_COMMON_FIELD_ERR_SEVERITY, + HISI_COMMON_FIELD_REGS_DUMP, + }; + + struct hisi_event { + char error_msg[HISI_BUF_LEN]; ++ char pcie_info[HISI_PCIE_INFO_BUF_LEN]; + char reg_msg[HISI_BUF_LEN]; + }; + +@@ -126,14 +140,26 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) + + #ifdef HAVE_SQLITE3 + static const struct db_fields hisi_common_section_fields[] = { +- { .name = "id", .type = "INTEGER PRIMARY KEY" }, +- { .name = "timestamp", .type = "TEXT" }, +- { .name = "err_info", .type = "TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "version", .type = "INTEGER" }, ++ { .name = "soc_id", .type = "INTEGER" }, ++ { .name = "socket_id", .type = "INTEGER" }, ++ { .name = "totem_id", .type = "INTEGER" }, ++ { .name = "nimbus_id", .type = "INTEGER" }, ++ { .name = "sub_system_id", .type = "INTEGER" }, ++ { .name = "module_id", .type = "TEXT" }, ++ { .name = "sub_module_id", .type = "INTEGER" }, ++ { .name = "core_id", .type = "INTEGER" }, ++ { .name = "port_id", .type = "INTEGER" }, ++ { .name = "err_type", .type = "INTEGER" }, ++ { .name = "pcie_info", .type = "TEXT" }, ++ { .name = "err_severity", .type = "TEXT" }, + { .name = "regs_dump", .type = "TEXT" }, + }; + + static const struct db_table_descriptor hisi_common_section_tab = { +- .name = "hisi_common_section", ++ .name = "hisi_common_section_v2", + .fields = hisi_common_section_fields, + .num_fields = ARRAY_SIZE(hisi_common_section_fields), + }; +@@ -189,12 +215,20 @@ static const char* get_soc_desc(uint8_t soc_id) + return soc_desc[soc_id]; + } + +-static void decode_module(struct hisi_event *event, uint8_t module_id) ++static void decode_module(struct ras_ns_ev_decoder *ev_decoder, ++ struct hisi_event *event, uint8_t module_id) + { +- if (module_id >= sizeof(module_name)/sizeof(char *)) ++ if (module_id >= sizeof(module_name)/sizeof(char *)) { + HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id); +- else ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_MODULE_ID, ++ 0, "unknown"); ++ } else { + HISI_SNPRINTF(event->error_msg, "module=%s ", module_name[module_id]); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_MODULE_ID, ++ 0, module_name[module_id]); ++ } + } + + static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, +@@ -202,43 +236,93 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, + struct hisi_event *event) + { + HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version); +- if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_VERSION, ++ err->version, NULL); ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SOC_ID)) { + HISI_SNPRINTF(event->error_msg, "soc=%s", get_soc_desc(err->soc_id)); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SOC_ID, ++ err->soc_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SOCKET_ID)) { + HISI_SNPRINTF(event->error_msg, "socket_id=%hhu", err->socket_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SOCKET_ID, ++ err->socket_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_TOTEM_ID)) { + HISI_SNPRINTF(event->error_msg, "totem_id=%hhu", err->totem_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_TOTEM_ID, ++ err->totem_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_NIMBUS_ID)) { + HISI_SNPRINTF(event->error_msg, "nimbus_id=%hhu", err->nimbus_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_NIMBUS_ID, ++ err->nimbus_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBSYSTEM_ID)) { + HISI_SNPRINTF(event->error_msg, "subsystem_id=%hhu", err->subsystem_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SUB_SYSTEM_ID, ++ err->subsystem_id, NULL); ++ } + + if (err->val_bits & BIT(HISI_COMMON_VALID_MODULE_ID)) +- decode_module(event, err->module_id); ++ decode_module(ev_decoder, event, err->module_id); + +- if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_SUBMODULE_ID)) { + HISI_SNPRINTF(event->error_msg, "submodule_id=%hhu", err->submodule_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_SUB_MODULE_ID, ++ err->submodule_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_CORE_ID)) { + HISI_SNPRINTF(event->error_msg, "core_id=%hhu", err->core_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_CORE_ID, ++ err->core_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_PORT_ID)) { + HISI_SNPRINTF(event->error_msg, "port_id=%hhu", err->port_id); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_PORT_ID, ++ err->port_id, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_TYPE)) { + HISI_SNPRINTF(event->error_msg, "err_type=%hu", err->err_type); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_INT, ++ HISI_COMMON_FIELD_ERR_TYPE, ++ err->err_type, NULL); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_PCIE_INFO)) { + HISI_SNPRINTF(event->error_msg, "pcie_device_id=%04x:%02x:%02x.%x", + err->pcie_info.segment, err->pcie_info.bus, + err->pcie_info.device, err->pcie_info.function); ++ HISI_SNPRINTF(event->pcie_info, "%04x:%02x:%02x.%x", ++ err->pcie_info.segment, err->pcie_info.bus, ++ err->pcie_info.device, err->pcie_info.function); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_PCIE_INFO, ++ 0, event->pcie_info); ++ } + +- if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) ++ if (err->val_bits & BIT(HISI_COMMON_VALID_ERR_SEVERITY)) { + HISI_SNPRINTF(event->error_msg, "err_severity=%s", err_severity(err->err_severity)); ++ record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, ++ HISI_COMMON_FIELD_ERR_SEVERITY, ++ 0, err_severity(err->err_severity)); ++ } + + HISI_SNPRINTF(event->error_msg, "]"); + } +@@ -283,8 +367,6 @@ static int decode_hisi_common_section(struct ras_events *ras, + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_TIMESTAMP, + 0, event->timestamp); +- record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, +- HISI_COMMON_FIELD_ERR_INFO, 0, hevent.error_msg); + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_REGS_DUMP, 0, hevent.reg_msg); + step_vendor_data_tab(ev_decoder, "hisi_common_section_tab"); +-- +2.33.1 + diff --git a/1007-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch b/1007-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch new file mode 100644 index 0000000000000000000000000000000000000000..351b5100f29a1f004be0eef5aa6d82efc4a27c4b --- /dev/null +++ b/1007-rasdaemon-ras-mc-ctl-Modify-error-statistics-for-HiS.patch @@ -0,0 +1,98 @@ +From ee109afe0bf76c436c19fa0dc8ec70ded87e2677 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 24 Feb 2022 18:02:14 +0000 +Subject: [PATCH 07/85] rasdaemon: ras-mc-ctl: Modify error statistics for + HiSilicon KunPeng9xx common errors + +Modify the error statistics for the HiSilicon KunPeng9xx platforms common errors +to display the statistics and error info based on the module and the error severity. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 40 +++++++++++++++++++++++++++++----------- + 1 file changed, 29 insertions(+), 11 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 0691f29..c4bef8f 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1537,7 +1537,7 @@ sub vendor_errors_summary + require DBI; + my ($num_args, $platform_id); + my ($query, $query_handle, $count, $out); +- my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info); ++ my ($module_id, $sub_module_id, $err_severity, $err_sev); + + $num_args = $#ARGV + 1; + $platform_id = 0; +@@ -1614,13 +1614,18 @@ sub vendor_errors_summary + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $query = "select err_info, count(*) from hisi_common_section"; ++ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($err_info, $count)); ++ $query_handle->bind_columns(\($err_severity, $module_id, $count)); + $out = ""; ++ $err_sev = ""; + while($query_handle->fetch()) { +- $out .= "\terrors: $count\n"; ++ if ($err_severity ne $err_sev) { ++ $out .= "$err_severity errors:\n"; ++ $err_sev = $err_severity; ++ } ++ $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events summary:\n$out\n"; +@@ -1638,8 +1643,8 @@ sub vendor_errors + require DBI; + my ($num_args, $platform_id); + my ($query, $query_handle, $id, $timestamp, $out); +- my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id); +- my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs); ++ my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id); ++ my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs); + + $num_args = $#ARGV + 1; + $platform_id = 0; +@@ -1727,15 +1732,28 @@ sub vendor_errors + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $query = "select id, timestamp, err_info, regs_dump from hisi_common_section order by id"; ++ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $timestamp, $err_info, $regs)); ++ $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp "; +- $out .= "Error Info:$err_info \n" if ($err_info); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "totem_id=$totem_id, " if ($totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "pcie_info=$pcie_info, " if ($pcie_info); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs" if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +-- +2.33.1 + diff --git a/1008-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch b/1008-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch new file mode 100644 index 0000000000000000000000000000000000000000..caee0f5803b859f69724ca93769c6091003de632 --- /dev/null +++ b/1008-rasdaemon-ras-mc-ctl-Reformat-error-info-of-the-HiSi.patch @@ -0,0 +1,57 @@ +From 5925333ff040bab348e2c8e439ba05421c307958 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 5 Mar 2022 16:18:55 +0000 +Subject: [PATCH 08/85] rasdaemon: ras-mc-ctl: Reformat error info of the + HiSilicon Kunpeng920 + +Reformat the code to display the error info of HiSilicon Kunpeng920. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index c4bef8f..00af3a7 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1671,8 +1671,9 @@ sub vendor_errors + $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); + $out .= "module_id=$module_id, " if ($module_id); + $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, \n" if ($err_severity); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n"; +@@ -1694,8 +1695,9 @@ sub vendor_errors + $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); + $out .= "module_id=$module_id, " if ($module_id); + $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, \n" if ($err_severity); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n"; +@@ -1719,8 +1721,9 @@ sub vendor_errors + $out .= "core_id=$core_id, " if ($core_id); + $out .= "port_id=$port_id, " if ($port_id); + $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "err_type=$err_type, \n" if ($err_type); +- $out .= "Error Registers: $regs\n\n" if ($regs); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; + } + if ($out ne "") { + print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n"; +-- +2.33.1 + diff --git a/1009-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch b/1009-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4392b2ebdd58fb3b509c38eebe12adf261f2781 --- /dev/null +++ b/1009-rasdaemon-ras-mc-ctl-Add-printing-usage-if-necessary.patch @@ -0,0 +1,38 @@ +From a35999326f4063d8cb0ed3813a3938acca09f41b Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 5 Mar 2022 17:01:35 +0000 +Subject: [PATCH 09/85] rasdaemon: ras-mc-ctl: Add printing usage if necessary + parameters are not passed for the vendor-error options + +Add printing usage if necessary parameters are not passed +for the vendor-errors options. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 00af3a7..0311e59 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1544,6 +1544,7 @@ sub vendor_errors_summary + if ($num_args ne 0) { + $platform_id = $ARGV[0]; + } else { ++ usage(1); + return; + } + +@@ -1651,6 +1652,7 @@ sub vendor_errors + if ($num_args ne 0) { + $platform_id = $ARGV[0]; + } else { ++ usage(1); + return; + } + +-- +2.33.1 + diff --git a/1010-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch b/1010-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a785dfda5d15a8cc4cc1195bc76b7a1ded1be54 --- /dev/null +++ b/1010-rasdaemon-ras-mc-ctl-Add-support-to-display-the-HiSi.patch @@ -0,0 +1,275 @@ +From c79b29bc63d32b8f80782a9860de31cb20b2c289 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 5 Mar 2022 18:19:38 +0000 +Subject: [PATCH 10/85] rasdaemon: ras-mc-ctl: Add support to display the + HiSilicon vendor errors for a specified module + +Add support to display the HiSilicon vendor errors for a specified module. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 145 +++++++++++++++++++++++++++------------------ + 1 file changed, 87 insertions(+), 58 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 0311e59..c23d93f 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -96,8 +96,9 @@ Usage: $prog [OPTIONS...] + --errors Shows the errors stored at the error database. + --error-count Shows the corrected and uncorrected error counts using sysfs. + --vendor-errors-summary Presents a summary of the vendor-specific logged errors. +- --vendor-errors Shows the vendor-specific errors stored in the error database. +- --vendor-platforms Shows the supported platforms with platform-ids for the vendor-specific errors. ++ --vendor-errors Shows the vendor-specific errors stored in the error database. ++ --vendor-errors Shows the vendor-specific errors for a specific module stored in the error database. ++ --vendor-platforms List the supported platforms with platform-ids for the vendor-specific errors. + --help This help message. + EOF + +@@ -1535,12 +1536,14 @@ use constant { + sub vendor_errors_summary + { + require DBI; +- my ($num_args, $platform_id); ++ my ($num_args, $platform_id, $found_platform); + my ($query, $query_handle, $count, $out); + my ($module_id, $sub_module_id, $err_severity, $err_sev); + + $num_args = $#ARGV + 1; + $platform_id = 0; ++ $found_platform = 0; ++ + if ($num_args ne 0) { + $platform_id = $ARGV[0]; + } else { +@@ -1552,6 +1555,7 @@ sub vendor_errors_summary + + # HiSilicon Kunpeng920 errors + if ($platform_id eq HISILICON_KUNPENG_920) { ++ $found_platform = 1; + $query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1615,6 +1619,7 @@ sub vendor_errors_summary + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { ++ $found_platform = 1; + $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1636,21 +1641,31 @@ sub vendor_errors_summary + $query_handle->finish; + } + ++ if ($platform_id && !($found_platform)) { ++ print "Platform ID $platform_id is not valid\n"; ++ } ++ + undef($dbh); + } + + sub vendor_errors + { + require DBI; +- my ($num_args, $platform_id); ++ my ($num_args, $platform_id, $found_platform, $module, $found_module); + my ($query, $query_handle, $id, $timestamp, $out); + my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id); + my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs); + + $num_args = $#ARGV + 1; + $platform_id = 0; ++ $found_platform = 0; ++ $module = 0; ++ $found_module = 0; + if ($num_args ne 0) { + $platform_id = $ARGV[0]; ++ if ($num_args gt 1) { ++ $module = $ARGV[1]; ++ } + } else { + usage(1); + return; +@@ -1660,27 +1675,29 @@ sub vendor_errors + + # HiSilicon Kunpeng920 errors + if ($platform_id eq HISILICON_KUNPENG_920) { ++ $found_platform = 1; + $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; ++ $found_module = 1; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type1 errors.\n"; + } + $query_handle->finish; + +@@ -1690,21 +1707,22 @@ sub vendor_errors + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; ++ $found_module = 1; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type2 errors.\n"; + } + $query_handle->finish; + +@@ -1714,51 +1732,56 @@ sub vendor_errors + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $sub_module_id, $core_id, $port_id, $err_severity, $err_type, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "Error Registers: $regs " if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($sub_module_id && uc($module) eq uc($sub_module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "Error Registers: $regs " if ($regs); ++ $out .= "\n\n"; ++ $found_module = 1; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 PCIe controller errors.\n"; + } + $query_handle->finish; + } + + # HiSilicon Kunpeng9xx common errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { ++ $found_platform = 1; + $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs)); + $out = ""; + while($query_handle->fetch()) { +- $out .= "$id. $timestamp Error Info: "; +- $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "totem_id=$totem_id, " if ($totem_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "pcie_info=$pcie_info, " if ($pcie_info); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs" if ($regs); +- $out .= "\n\n"; ++ if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { ++ $out .= "$id. $timestamp Error Info: "; ++ $out .= "version=$version, "; ++ $out .= "soc_id=$soc_id, " if ($soc_id); ++ $out .= "socket_id=$socket_id, " if ($socket_id); ++ $out .= "totem_id=$totem_id, " if ($totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); ++ $out .= "module_id=$module_id, " if ($module_id); ++ $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); ++ $out .= "core_id=$core_id, " if ($core_id); ++ $out .= "port_id=$port_id, " if ($port_id); ++ $out .= "err_type=$err_type, " if ($err_type); ++ $out .= "pcie_info=$pcie_info, " if ($pcie_info); ++ $out .= "err_severity=$err_severity, " if ($err_severity); ++ $out .= "Error Registers: $regs" if ($regs); ++ $out .= "\n\n"; ++ $found_module = 1; ++ } + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +@@ -1768,6 +1791,12 @@ sub vendor_errors + $query_handle->finish; + } + ++ if ($platform_id && !($found_platform)) { ++ print "Platform ID $platform_id is not valid\n"; ++ } elsif ($module && !($found_module)) { ++ print "No error record for the module $module\n"; ++ } ++ + undef($dbh); + } + +-- +2.33.1 + diff --git a/1011-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch b/1011-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d06290dd907412d8a85a87187791999fd72cc53 --- /dev/null +++ b/1011-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch @@ -0,0 +1,151 @@ +From 6a73fdf7beed1dafe4ea33018e047e36ce796815 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Mon, 7 Mar 2022 12:38:45 +0000 +Subject: [PATCH 11/85] rasdaemon: ras-mc-ctl: Relocate reading and display + Kunpeng920 errors to under Kunpeng9xx + +Relocate reading and display Kunpeng920 errors to under Kunpeng9xx. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 40 ++++++++++------------------------------ + 1 file changed, 10 insertions(+), 30 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index c23d93f..83ef9de 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1529,7 +1529,6 @@ sub errors + + # Definitions of the vendor platform IDs. + use constant { +- HISILICON_KUNPENG_920 => "Kunpeng920", + HISILICON_KUNPENG_9XX => "Kunpeng9xx", + }; + +@@ -1553,8 +1552,8 @@ sub vendor_errors_summary + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +- # HiSilicon Kunpeng920 errors +- if ($platform_id eq HISILICON_KUNPENG_920) { ++ # HiSilicon Kunpeng9xx errors ++ if ($platform_id eq HISILICON_KUNPENG_9XX) { + $found_platform = 1; + $query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); +@@ -1570,9 +1569,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type1 error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type1 errors.\n\n"; ++ print "HiSilicon Kunpeng9xx OEM type1 error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1590,9 +1587,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type2 error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 OEM type2 errors.\n\n"; ++ print "HiSilicon Kunpeng9xx OEM type2 error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1610,16 +1605,10 @@ sub vendor_errors_summary + $out .= "\t$sub_module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 PCIe controller error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng920 PCIe controller errors.\n\n"; ++ print "HiSilicon Kunpeng9xx PCIe controller error events summary:\n$out\n"; + } + $query_handle->finish; +- } + +- # HiSilicon Kunpeng9xx common errors +- if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $found_platform = 1; + $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1635,8 +1624,6 @@ sub vendor_errors_summary + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events summary:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng9xx common errors.\n\n"; + } + $query_handle->finish; + } +@@ -1673,8 +1660,8 @@ sub vendor_errors + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +- # HiSilicon Kunpeng920 errors +- if ($platform_id eq HISILICON_KUNPENG_920) { ++ # HiSilicon Kunpeng9xx errors ++ if ($platform_id eq HISILICON_KUNPENG_9XX) { + $found_platform = 1; + $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); +@@ -1697,7 +1684,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type1 error events:\n$out\n"; ++ print "HiSilicon Kunpeng9xx OEM type1 error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1722,7 +1709,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 OEM type2 error events:\n$out\n"; ++ print "HiSilicon Kunpeng9xx OEM type2 error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1749,14 +1736,10 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng920 PCIe controller error events:\n$out\n"; ++ print "HiSilicon Kunpeng9xx PCIe controller error events:\n$out\n"; + } + $query_handle->finish; +- } + +- # HiSilicon Kunpeng9xx common errors +- if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $found_platform = 1; + $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1785,8 +1768,6 @@ sub vendor_errors + } + if ($out ne "") { + print "HiSilicon Kunpeng9xx common error events:\n$out\n"; +- } else { +- print "No HiSilicon Kunpeng9xx common errors.\n"; + } + $query_handle->finish; + } +@@ -1803,7 +1784,6 @@ sub vendor_errors + sub vendor_platforms + { + print "\nSupported platforms for the vendor-specific errors:\n"; +- print "\tHiSilicon Kunpeng920, platform-id=\"", HISILICON_KUNPENG_920, "\"\n"; + print "\tHiSilicon Kunpeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n"; + print "\n"; + } +-- +2.33.1 + diff --git a/1012-rasdaemon-ras-mc-ctl-Updated-HiSilicon-platform-name.patch b/1012-rasdaemon-ras-mc-ctl-Updated-HiSilicon-platform-name.patch new file mode 100644 index 0000000000000000000000000000000000000000..a04f8e6fd90217810fb9af645b9f0794fa8259fc --- /dev/null +++ b/1012-rasdaemon-ras-mc-ctl-Updated-HiSilicon-platform-name.patch @@ -0,0 +1,128 @@ +From fb4f603ad5ac035df16569ec9aa6b7117301ebf8 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 28 Apr 2022 18:58:43 +0100 +Subject: [PATCH 12/85] rasdaemon: ras-mc-ctl: Updated HiSilicon platform name + +Updated the HiSilicon platform name as KunPeng9xx. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 83ef9de..e765519 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1529,7 +1529,7 @@ sub errors + + # Definitions of the vendor platform IDs. + use constant { +- HISILICON_KUNPENG_9XX => "Kunpeng9xx", ++ HISILICON_KUNPENG_9XX => "KunPeng9xx", + }; + + sub vendor_errors_summary +@@ -1552,7 +1552,7 @@ sub vendor_errors_summary + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +- # HiSilicon Kunpeng9xx errors ++ # HiSilicon KunPeng9xx errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { + $found_platform = 1; + $query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id"; +@@ -1569,7 +1569,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx OEM type1 error events summary:\n$out\n"; ++ print "HiSilicon KunPeng9xx OEM type1 error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1587,7 +1587,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx OEM type2 error events summary:\n$out\n"; ++ print "HiSilicon KunPeng9xx OEM type2 error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1605,7 +1605,7 @@ sub vendor_errors_summary + $out .= "\t$sub_module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx PCIe controller error events summary:\n$out\n"; ++ print "HiSilicon KunPeng9xx PCIe controller error events summary:\n$out\n"; + } + $query_handle->finish; + +@@ -1623,7 +1623,7 @@ sub vendor_errors_summary + $out .= "\t$module_id: $count\n"; + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx common error events summary:\n$out\n"; ++ print "HiSilicon KunPeng9xx common error events summary:\n$out\n"; + } + $query_handle->finish; + } +@@ -1660,7 +1660,7 @@ sub vendor_errors + + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + +- # HiSilicon Kunpeng9xx errors ++ # HiSilicon KunPeng9xx errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { + $found_platform = 1; + $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity"; +@@ -1684,7 +1684,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx OEM type1 error events:\n$out\n"; ++ print "HiSilicon KunPeng9xx OEM type1 error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1709,7 +1709,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx OEM type2 error events:\n$out\n"; ++ print "HiSilicon KunPeng9xx OEM type2 error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1736,7 +1736,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx PCIe controller error events:\n$out\n"; ++ print "HiSilicon KunPeng9xx PCIe controller error events:\n$out\n"; + } + $query_handle->finish; + +@@ -1767,7 +1767,7 @@ sub vendor_errors + } + } + if ($out ne "") { +- print "HiSilicon Kunpeng9xx common error events:\n$out\n"; ++ print "HiSilicon KunPeng9xx common error events:\n$out\n"; + } + $query_handle->finish; + } +@@ -1784,7 +1784,7 @@ sub vendor_errors + sub vendor_platforms + { + print "\nSupported platforms for the vendor-specific errors:\n"; +- print "\tHiSilicon Kunpeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n"; ++ print "\tHiSilicon KunPeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n"; + print "\n"; + } + +-- +2.33.1 + diff --git a/3003-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch b/1013-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch similarity index 90% rename from 3003-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch rename to 1013-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch index 229e0c4582e36006edc170cbb106edca5588d33f..b0ad59b7ceca23ef3fa172e6f9ecbcd5cc1894c1 100644 --- a/3003-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch +++ b/1013-rasdaemon-Fix-the-issue-of-sprintf-data-type-mismatc.patch @@ -1,8 +1,8 @@ -From dce53f6809c4fdab967ecc78f80c8ec2ebd89aca Mon Sep 17 00:00:00 2001 +From ce25490736f8596d13711700999c16424b3b2487 Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Wed, 20 Oct 2021 14:33:37 +0800 -Subject: [PATCH 3/6] rasdaemon: Fix the issue of sprintf data type mismatch in - uuid_le() +Subject: [PATCH 13/85] rasdaemon: Fix the issue of sprintf data type mismatch + in uuid_le() The data type of sprintf called in the function uuid_le() is mismatch. Arm64 compiler force it to unsigned char by default, and can work normally. @@ -30,7 +30,7 @@ index 5fd3580..1834687 100644 case 3: case 5: diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c -index 7818ed8..86178bf 100644 +index 6ccf5bc..6d5a6f8 100644 --- a/ras-non-standard-handler.c +++ b/ras-non-standard-handler.c @@ -36,7 +36,7 @@ static char *uuid_le(const char *uu) diff --git a/1014-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch b/1014-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch new file mode 100644 index 0000000000000000000000000000000000000000..2de55aa308e9bfac4eebaf206f99c3cb19c110d9 --- /dev/null +++ b/1014-rasdaemon-Fix-the-issue-of-command-option-r-for-hip0.patch @@ -0,0 +1,73 @@ +From 049fc251c32b9a9eaf15a183df451a6a5c937f43 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Wed, 20 Oct 2021 14:33:38 +0800 +Subject: [PATCH 14/85] rasdaemon: Fix the issue of command option -r for hip08 + +It will record event even the option -r is not provided for hip08. +It is not right, and fix it. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisi_hip08.c | 6 +++--- + non-standard-hisilicon.c | 6 ++++++ + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c +index ebf03e1..9092183 100644 +--- a/non-standard-hisi_hip08.c ++++ b/non-standard-hisi_hip08.c +@@ -670,7 +670,7 @@ static int decode_hip08_oem_type1_error(struct ras_events *ras, + } + + #ifdef HAVE_SQLITE3 +- if (!ev_decoder->stmt_dec_record) { ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &hip08_oem_type1_event_tab) + != SQLITE_OK) { +@@ -842,7 +842,7 @@ static int decode_hip08_oem_type2_error(struct ras_events *ras, + } + + #ifdef HAVE_SQLITE3 +- if (!ev_decoder->stmt_dec_record) { ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &hip08_oem_type2_event_tab) != SQLITE_OK) { + trace_seq_printf(s, +@@ -992,7 +992,7 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, + } + + #ifdef HAVE_SQLITE3 +- if (!ev_decoder->stmt_dec_record) { ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &hip08_pcie_local_event_tab) != SQLITE_OK) { + trace_seq_printf(s, +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index cd0ab3f..8da891f 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -91,6 +91,9 @@ void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, + enum hisi_oem_data_type data_type, + int id, int64_t data, const char *text) + { ++ if (ev_decoder->stmt_dec_record == NULL) ++ return; ++ + switch (data_type) { + case HISI_OEM_DATA_TYPE_INT: + sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data); +@@ -108,6 +111,9 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) + { + int rc; + ++ if (ev_decoder->stmt_dec_record == NULL) ++ return 0; ++ + rc = sqlite3_step(ev_decoder->stmt_dec_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, +-- +2.33.1 + diff --git a/1015-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch b/1015-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch new file mode 100644 index 0000000000000000000000000000000000000000..872eb5d977908479be60e996ace956b5248daa0d --- /dev/null +++ b/1015-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch @@ -0,0 +1,36 @@ +From 243b0d9bc40dc8cb10490eb14604cf750bc65e56 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Wed, 20 Oct 2021 14:33:40 +0800 +Subject: [PATCH 15/85] rasdaemon: Add some modules supported by hisi common + error section + +Add some modules supported by hisi common error section. Besides, +HHA is the module for some old platform, and it takes the same place +of MATA, so remove it. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisilicon.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 8da891f..d1e1774 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -210,7 +210,11 @@ static const char* module_name[] = { + "SEC", + "RDE", + "MEE", +- "HHA", ++ "L4D", ++ "Tsensor", ++ "ROH", ++ "BTC", ++ "HILINK" + }; + + static const char* get_soc_desc(uint8_t soc_id) +-- +2.33.1 + diff --git a/1016-rasdaemon-Fix-for-a-memory-out-of-bounds-issue-and-o.patch b/1016-rasdaemon-Fix-for-a-memory-out-of-bounds-issue-and-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..e08b2d2c4ea2c01d46017ade7555f932d267274c --- /dev/null +++ b/1016-rasdaemon-Fix-for-a-memory-out-of-bounds-issue-and-o.patch @@ -0,0 +1,91 @@ +From 7a9ec6f75efa7cb1e590c231900720ef6fe32b46 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 28 Apr 2022 22:59:04 +0100 +Subject: [PATCH 16/85] rasdaemon: Fix for a memory out-of-bounds issue and + optimized code to remove duplicate function. + +Fixed a memory out-of-bounds issue with string pointers and +optimized code structure to remove duplicate function. + +Signed-off-by: Lei Feng +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisi_hip08.c | 6 +++--- + non-standard-hisilicon.c | 2 +- + ras-non-standard-handler.c | 16 +--------------- + 3 files changed, 5 insertions(+), 19 deletions(-) + +diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c +index 9092183..4ef47ea 100644 +--- a/non-standard-hisi_hip08.c ++++ b/non-standard-hisi_hip08.c +@@ -1014,15 +1014,15 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, + + static struct ras_ns_ev_decoder hip08_ns_ev_decoder[] = { + { +- .sec_type = "1f8161e155d641e6bd107afd1dc5f7c5", ++ .sec_type = "1f8161e1-55d6-41e6-bd10-7afd1dc5f7c5", + .decode = decode_hip08_oem_type1_error, + }, + { +- .sec_type = "45534ea6ce2341158535e07ab3aef91d", ++ .sec_type = "45534ea6-ce23-4115-8535-e07ab3aef91d", + .decode = decode_hip08_oem_type2_error, + }, + { +- .sec_type = "b2889fc9e7d74f9da867af42e98be772", ++ .sec_type = "b2889fc9-e7d7-4f9d-a867-af42e98be772", + .decode = decode_hip08_pcie_local_error, + }, + }; +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index d1e1774..6ee9271 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -387,7 +387,7 @@ static int decode_hisi_common_section(struct ras_events *ras, + + static struct ras_ns_ev_decoder hisi_section_ns_ev_decoder[] = { + { +- .sec_type = "c8b328a899174af69a132e08ab2e7586", ++ .sec_type = "c8b328a8-9917-4af6-9a13-2e08ab2e7586", + .decode = decode_hisi_common_section, + }, + }; +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 6d5a6f8..6932e58 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -52,20 +52,6 @@ static char *uuid_le(const char *uu) + return uuid; + } + +-static int uuid_le_cmp(const char *sec_type, const char *uuid2) +-{ +- static char uuid1[32]; +- char *p = uuid1; +- int i; +- static const unsigned char le[16] = { +- 3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; +- +- for (i = 0; i < 16; i++) +- p += sprintf(p, "%.2x", (unsigned char) sec_type[le[i]]); +- *p = 0; +- return strncmp(uuid1, uuid2, 32); +-} +- + int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) + { + struct ras_ns_ev_decoder *list; +@@ -96,7 +82,7 @@ static int find_ns_ev_decoder(const char *sec_type, struct ras_ns_ev_decoder **p + + ns_ev_decoder = ras_ns_ev_dec_list; + while (ns_ev_decoder) { +- if (uuid_le_cmp(sec_type, ns_ev_decoder->sec_type) == 0) { ++ if (strcmp(uuid_le(sec_type), ns_ev_decoder->sec_type) == 0) { + *p_ns_ev_dec = ns_ev_decoder; + match = 1; + break; +-- +2.33.1 + diff --git a/1017-rasdaemon-Add-four-modules-supported-by-HiSilicon-co.patch b/1017-rasdaemon-Add-four-modules-supported-by-HiSilicon-co.patch new file mode 100644 index 0000000000000000000000000000000000000000..52218b4ae3981f88ae9a837c3b46c374527f2b60 --- /dev/null +++ b/1017-rasdaemon-Add-four-modules-supported-by-HiSilicon-co.patch @@ -0,0 +1,35 @@ +From eb51a91b6a0ceb22cb93439cb7e0aa013f82ff4f Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Mon, 31 Oct 2022 18:36:26 +0800 +Subject: [PATCH 17/85] rasdaemon: Add four modules supported by HiSilicon + common section + +Add four modules supported by HiSilicon common error section. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisilicon.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 6ee9271..2b00ed6 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -214,7 +214,11 @@ static const char* module_name[] = { + "Tsensor", + "ROH", + "BTC", +- "HILINK" ++ "HILINK", ++ "STARS", ++ "SDMA", ++ "UC", ++ "HBMC", + }; + + static const char* get_soc_desc(uint8_t soc_id) +-- +2.33.1 + diff --git a/1018-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch b/1018-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8ccc49b77b3a090b114c9ee97fbb1546c7540da --- /dev/null +++ b/1018-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch @@ -0,0 +1,85 @@ +From 5a8116e4d54de308fdab7734eebefa71efaf7a59 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 4 Feb 2023 19:15:55 +0000 +Subject: [PATCH 18/85] rasdaemon: Fix poll() on per_cpu trace_pipe_raw blocks + indefinitely + +The error events are not received in the rasdaemon since kernel 6.1-rc6. +This issue is firstly detected and reported, when testing the CXL error +events in the rasdaemon. + +Debugging showed, poll() on trace_pipe_raw in the ras-events.c do not +return and this issue is seen after the commit +42fb0a1e84ff525ebe560e2baf9451ab69127e2b ("tracing/ring-buffer: Have +polling block on watermark"). + +This issue is also verified using a test application for poll() +and select() on per_cpu trace_pipe_raw. + +There is also a bug reported on this issue, +https://lore.kernel.org/all/31eb3b12-3350-90a4-a0d9-d1494db7cf74@oracle.com/ + +This issue occurs for the per_cpu case, which calls the ring_buffer_poll_wait(), +in kernel/trace/ring_buffer.c, with the buffer_percent > 0 and then wait until +the percentage of pages are available. The default value set for the +buffer_percent is 50 in the kernel/trace/trace.c. However poll() does not return +even met the percentage of pages condition. + +As a fix, rasdaemon set buffer_percent as 0 through the +/sys/kernel/debug/tracing/instances/rasdaemon/buffer_percent, then the +task will wake up as soon as data is added to any of the specific cpu +buffer and poll() on per_cpu/cpuX/trace_pipe_raw does not block +indefinitely. + +Dependency on the kernel fix commit +3e46d910d8acf94e5360126593b68bf4fee4c4a1("tracing: Fix poll() and select() +do not work on per_cpu trace_pipe and trace_pipe_raw") + +Signed-off-by: Shiju Jose +--- + ras-events.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/ras-events.c b/ras-events.c +index 2a7d709..f9922d4 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -366,6 +366,8 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + int warnonce[n_cpus]; + char pipe_raw[PATH_MAX]; + int legacy_kernel = 0; ++ int fd; ++ char buf[16]; + #if 0 + int need_sleep = 0; + #endif +@@ -385,6 +387,26 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + return -ENOMEM; + } + ++ /* Fix for poll() on the per_cpu trace_pipe and trace_pipe_raw blocks ++ * indefinitely with the default buffer_percent in the kernel trace system, ++ * which is introduced by the following change in the kernel. ++ * https://lore.kernel.org/all/20221020231427.41be3f26@gandalf.local.home/T/#u. ++ * Set buffer_percent to 0 so that poll() will return immediately ++ * when the trace data is available in the ras per_cpu trace pipe_raw ++ */ ++ fd = open_trace(pdata[0].ras, "buffer_percent", O_WRONLY); ++ if (fd >= 0) { ++ /* For the backward compatibility to the old kernels, do not return ++ * if fail to set the buffer_percent. ++ */ ++ snprintf(buf, sizeof(buf), "0"); ++ size = write(fd, buf, strlen(buf)); ++ if (size <= 0) ++ log(TERM, LOG_WARNING, "can't write to buffer_percent\n"); ++ close(fd); ++ } else ++ log(TERM, LOG_WARNING, "Can't open buffer_percent\n"); ++ + for (i = 0; i < (n_cpus + 1); i++) + fds[i].fd = -1; + +-- +2.33.1 + diff --git a/1019-rasdaemon-Move-definition-for-BIT-and-BIT_ULL-to-a-c.patch b/1019-rasdaemon-Move-definition-for-BIT-and-BIT_ULL-to-a-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..dab5a4eab32babe75b7e8c0c96c059fca1b7152b --- /dev/null +++ b/1019-rasdaemon-Move-definition-for-BIT-and-BIT_ULL-to-a-c.patch @@ -0,0 +1,49 @@ +From 4d0017a4c4e45983b1090884160c7053398879bd Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Mon, 16 Jan 2023 17:13:32 +0000 +Subject: [PATCH 19/85] rasdaemon: Move definition for BIT and BIT_ULL to a + common file + +Move definition for BIT() and BIT_ULL() to the +common file ras-record.h + +Signed-off-by: Shiju Jose +Reviewed-by: Jonathan Cameron +Reviewed-by: Dave Jiang +Signed-off-by: Mauro Carvalho Chehab +--- + ras-non-standard-handler.h | 3 --- + ras-record.h | 3 +++ + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h +index 57d4cb5..393b756 100644 +--- a/ras-non-standard-handler.h ++++ b/ras-non-standard-handler.h +@@ -17,9 +17,6 @@ + #include "ras-events.h" + #include "libtrace/event-parse.h" + +-#define BIT(nr) (1UL << (nr)) +-#define BIT_ULL(nr) (1ULL << (nr)) +- + struct ras_ns_ev_decoder { + struct ras_ns_ev_decoder *next; + const char *sec_type; +diff --git a/ras-record.h b/ras-record.h +index 1d1046f..cc49ae2 100644 +--- a/ras-record.h ++++ b/ras-record.h +@@ -26,6 +26,9 @@ + + #define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x))) + ++#define BIT(nr) (1UL << (nr)) ++#define BIT_ULL(nr) (1ULL << (nr)) ++ + extern long user_hz; + + struct ras_events; +-- +2.33.1 + diff --git a/1020-Check-CPUs-online-not-configured.patch b/1020-Check-CPUs-online-not-configured.patch new file mode 100644 index 0000000000000000000000000000000000000000..d5cfd5bbc2f7815790d884a3a04118dd9cd420f0 --- /dev/null +++ b/1020-Check-CPUs-online-not-configured.patch @@ -0,0 +1,38 @@ +From 3b6f8473b12885db7e1ac2e467ccbdac913c629c Mon Sep 17 00:00:00 2001 +From: Zeph / Liz Loss-Cutler-Hull +Date: Sun, 9 Jul 2023 04:57:19 -0700 +Subject: [PATCH 20/85] Check CPUs online, not configured. + +When the number of CPUs detected is greater than the number of CPUs in +the system, rasdaemon will crash when it receives some events. + +Looking deeper, we also fail to use the poll method for similar reasons +in this case. + +All of this can be prevented by checking to see how many CPUs are +currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many +CPUs the current kernel was configured to support +(sysconf(_SC_NPROCESSORS_CONF)). + +For the kernel side of the discussion, see https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rQh9DA@mail.gmail.com/T/ +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ras-events.c b/ras-events.c +index f9922d4..9ad34f8 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -328,7 +328,7 @@ static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf, + + static int get_num_cpus(struct ras_events *ras) + { +- return sysconf(_SC_NPROCESSORS_CONF); ++ return sysconf(_SC_NPROCESSORS_ONLN); + #if 0 + char fname[MAX_PATH + 1]; + int num_cpus = 0; +-- +2.33.1 + diff --git a/2002-rasdaemon-log-non_standard_event-at-just-one-line.patch b/1021-rasdaemon-log-non_standard_event-at-just-one-line.patch similarity index 77% rename from 2002-rasdaemon-log-non_standard_event-at-just-one-line.patch rename to 1021-rasdaemon-log-non_standard_event-at-just-one-line.patch index 1d85b12a2cca2660563fe47994ac989eb848d11d..c841092878d34e1b29a08d56d00029d9b1387639 100644 --- a/2002-rasdaemon-log-non_standard_event-at-just-one-line.patch +++ b/1021-rasdaemon-log-non_standard_event-at-just-one-line.patch @@ -1,22 +1,23 @@ -From 9e407134b86f7a176970be70121e08cac6cad3ff Mon Sep 17 00:00:00 2001 +From 75223164b507d89c8f90d7ca8e1034ef36d550fb Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 7 Sep 2023 18:19:40 +0800 -Subject: [PATCH 2/4] rasdaemon: log non_standard_event at just one line +Subject: [PATCH 21/85] rasdaemon: log non_standard_event at just one line It is more reasonable log non_standard_event in one line exclude errors dump. So you can easily to get decoded non_standard_event log in one line if you implement a decoder like other event. Signed-off-by: Ruidong Tian +Signed-off-by: Mauro Carvalho Chehab --- ras-non-standard-handler.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c -index 6ccf5bc..7818ed8 100644 +index 6932e58..8672b16 100644 --- a/ras-non-standard-handler.c +++ b/ras-non-standard-handler.c -@@ -174,7 +174,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, +@@ -160,7 +160,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, case GHES_SEV_PANIC: ev.severity = "Fatal"; } @@ -25,7 +26,7 @@ index 6ccf5bc..7818ed8 100644 ev.sec_type = pevent_get_field_raw(s, event, "sec_type", record, &len, 1); -@@ -185,7 +185,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, +@@ -171,7 +171,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, trace_seq_printf(s, "\n section type: %s", "Ampere Specific Error\n"); else @@ -34,7 +35,7 @@ index 6ccf5bc..7818ed8 100644 uuid_le(ev.sec_type)); ev.fru_text = pevent_get_field_raw(s, event, "fru_text", record, &len, 1); -@@ -198,7 +198,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, +@@ -184,7 +184,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, if (pevent_get_field_val(s, event, "len", record, &val, 1) < 0) return -1; ev.length = val; diff --git a/2003-rasdaemon-add-support-for-THead-Yitian-non-standard-.patch b/1022-rasdaemon-add-support-for-THead-Yitian-non-standard-.patch similarity index 89% rename from 2003-rasdaemon-add-support-for-THead-Yitian-non-standard-.patch rename to 1022-rasdaemon-add-support-for-THead-Yitian-non-standard-.patch index 519f4d701837658b4b1c848f377d4c54ac3d9fad..153f22959955d7e909b9c8305be025953a2601fd 100644 --- a/2003-rasdaemon-add-support-for-THead-Yitian-non-standard-.patch +++ b/1022-rasdaemon-add-support-for-THead-Yitian-non-standard-.patch @@ -1,29 +1,31 @@ -From dbc5d5a9ba57ef3f84eb09c9ca658c96219a1736 Mon Sep 17 00:00:00 2001 +From a017a508b00e6d817539e9d9e53533b45e7d1da4 Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 7 Sep 2023 18:21:05 +0800 -Subject: [PATCH 3/4] rasdaemon: add support for THead Yitian non-standard +Subject: [PATCH 22/85] rasdaemon: add support for THead Yitian non-standard error decoder Add a new non-standard error decoder to decode THead YiTian error section. Put all related code to a new source file. Signed-off-by: Ruidong Tian +Signed-off-by: Mauro Carvalho Chehab +[Ruidong: fix conlict with cxl] --- - Makefile.am | 7 +- + Makefile.am | 6 +- configure.ac | 11 ++ non-standard-yitian.c | 251 ++++++++++++++++++++++++++++++++++++++++++ non-standard-yitian.h | 73 ++++++++++++ - 4 files changed, 341 insertions(+), 1 deletion(-) + 4 files changed, 340 insertions(+), 1 deletion(-) create mode 100644 non-standard-yitian.c create mode 100644 non-standard-yitian.h diff --git a/Makefile.am b/Makefile.am -index fabca78..7cbc81e 100644 +index 242ceb7..b16cf34 100644 --- a/Makefile.am +++ b/Makefile.am -@@ -63,13 +63,18 @@ endif - if WITH_AMP_NS_DECODE - rasdaemon_SOURCES += non-standard-ampere.c +@@ -66,6 +66,10 @@ endif + if WITH_CPU_FAULT_ISOLATION + rasdaemon_SOURCES += ras-cpu-isolation.c queue.c endif +if WITH_YITIAN_NS_DECODE + rasdaemon_SOURCES += non-standard-yitian.c @@ -32,22 +34,22 @@ index fabca78..7cbc81e 100644 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ +@@ -73,7 +77,7 @@ include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ -- non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h -+ non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ -+ non-standard-yitian.h + non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ +- ras-cpu-isolation.h queue.h ++ ras-cpu-isolation.h queue.h non-standard-yitian.h # This rule can't be called with more than one Makefile job (like make -j8) # I can't figure out a way to fix that diff --git a/configure.ac b/configure.ac -index 33b81fe..a02cca3 100644 +index d098fcf..135af9c 100644 --- a/configure.ac +++ b/configure.ac -@@ -161,6 +161,16 @@ AS_IF([test "x$enable_amp_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], - AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all == xyes]) - AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"]) +@@ -171,6 +171,16 @@ AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" == "x + AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all == xyes]) + AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"]) +AC_ARG_ENABLE([yitian_ns_decode], + AS_HELP_STRING([--enable-yitian-ns-decode], [enable YITIAN_NS_DECODE events (currently experimental)])) @@ -62,10 +64,10 @@ index 33b81fe..a02cca3 100644 test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" -@@ -201,4 +211,5 @@ compile time options summary - Memory Failure : $USE_MEMORY_FAILURE +@@ -212,4 +222,5 @@ compile time options summary Memory CE PFA : $USE_MEMORY_CE_PFA AMP RAS errors : $USE_AMP_NS_DECODE + CPU fault isolation : $USE_CPU_FAULT_ISOLATION + YITIAN RAS errors : $USE_YITIAN_NS_DECODE EOF diff --git a/non-standard-yitian.c b/non-standard-yitian.c @@ -314,7 +316,7 @@ index 0000000..99cea47 + +struct ras_ns_ev_decoder yitian_ns_oem_decoder[] = { + { -+ .sec_type = "a698081116ea4e4db936fb00a23ff29c", ++ .sec_type = "a6980811-16ea-4e4d-b936-fb00a23ff29c", + .decode = decode_yitian710_ns_error, + }, +}; diff --git a/1023-ras-mc-ctl-add-option-to-exclude-old-events-from-rep.patch b/1023-ras-mc-ctl-add-option-to-exclude-old-events-from-rep.patch new file mode 100644 index 0000000000000000000000000000000000000000..07cfe1b4ea1941714115496c3a534289c2f432c6 --- /dev/null +++ b/1023-ras-mc-ctl-add-option-to-exclude-old-events-from-rep.patch @@ -0,0 +1,273 @@ +From b052d9bad784ba10bc1281f027808ef4cb0d00eb Mon Sep 17 00:00:00 2001 +From: Marcus Sundman +Date: Thu, 20 Apr 2023 18:17:17 +0300 +Subject: [PATCH 23/85] ras-mc-ctl: add option to exclude old events from + reports + +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 59 +++++++++++++++++++++++++++------------------- + 1 file changed, 35 insertions(+), 24 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index e765519..13078c2 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -95,6 +95,7 @@ Usage: $prog [OPTIONS...] + --summary Presents a summary of the logged errors. + --errors Shows the errors stored at the error database. + --error-count Shows the corrected and uncorrected error counts using sysfs. ++ --since=YYYY-MM-DD Only include events since the date YYYY-MM-DD. + --vendor-errors-summary Presents a summary of the vendor-specific logged errors. + --vendor-errors Shows the vendor-specific errors stored in the error database. + --vendor-errors Shows the vendor-specific errors for a specific module stored in the error database. +@@ -175,6 +176,7 @@ sub parse_cmdline + $conf{opt}{error_count} = 0; + $conf{opt}{vendor_errors_summary} = 0; + $conf{opt}{vendor_errors} = 0; ++ $conf{opt}{since} = ''; + $conf{opt}{vendor_platforms} = 0; + + my $rref = \$conf{opt}{report}; +@@ -196,6 +198,7 @@ sub parse_cmdline + "error-count" => \$conf{opt}{error_count}, + "vendor-errors-summary" => \$conf{opt}{vendor_errors_summary}, + "vendor-errors" => \$conf{opt}{vendor_errors}, ++ "since=s" => \$conf{opt}{since}, + "vendor-platforms" => \$conf{opt}{vendor_platforms}, + ); + +@@ -207,6 +210,14 @@ sub parse_cmdline + log_error ("Only use --delay with --register-labels\n"); + exit (1); + } ++ ++ if ($conf{opt}{since}) { ++ if ($conf{opt}{since} !~ /^20\d\d-[01]\d-[0-3]\d/) { ++ log_error ("--since requires a date like yyyy-mm-dd where yyyy is the year, mm the month, and dd the day\n"); ++ exit (1); ++ } ++ $conf{opt}{since} = " where timestamp>='$conf{opt}{since}'"; ++ } + } + + sub usage +@@ -1168,7 +1179,7 @@ sub summary + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + + # Memory controller mc_event errors +- $query = "select err_type, label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event group by err_type, label, mc, top_layer, middle_layer, lower_layer"; ++ $query = "select err_type, label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event$conf{opt}{since} group by err_type, label, mc, top_layer, middle_layer, lower_layer"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($err_type, $label, $mc, $top, $mid, $low, $count)); +@@ -1185,7 +1196,7 @@ sub summary + + # PCIe AER aer_event errors + if ($has_aer == 1) { +- $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; ++ $query = "select err_type, err_msg, count(*) from aer_event$conf{opt}{since} group by err_type, err_msg"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($err_type, $msg, $count)); +@@ -1203,7 +1214,7 @@ sub summary + + # ARM processor arm_event errors + if ($has_arm == 1) { +- $query = "select mpidr, count(*) from arm_event group by mpidr"; ++ $query = "select mpidr, count(*) from arm_event$conf{opt}{since} group by mpidr"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($mpidr, $count)); +@@ -1221,7 +1232,7 @@ sub summary + + # extlog errors + if ($has_extlog == 1) { +- $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; ++ $query = "select etype, severity, count(*) from extlog_event$conf{opt}{since} group by etype, severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($etype, $severity, $count)); +@@ -1241,7 +1252,7 @@ sub summary + + # devlink errors + if ($has_devlink == 1) { +- $query = "select dev_name, count(*) from devlink_event group by dev_name"; ++ $query = "select dev_name, count(*) from devlink_event$conf{opt}{since} group by dev_name"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($dev_name, $count)); +@@ -1259,7 +1270,7 @@ sub summary + + # Disk errors + if ($has_disk_errors == 1) { +- $query = "select dev, count(*) from disk_errors group by dev"; ++ $query = "select dev, count(*) from disk_errors$conf{opt}{since} group by dev"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($dev, $count)); +@@ -1277,7 +1288,7 @@ sub summary + + # Memory failure errors + if ($has_mem_failure == 1) { +- $query = "select action_result, count(*) from memory_failure_event group by action_result"; ++ $query = "select action_result, count(*) from memory_failure_event$conf{opt}{since} group by action_result"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($action_result, $count)); +@@ -1295,7 +1306,7 @@ sub summary + + # MCE mce_record errors + if ($has_mce == 1) { +- $query = "select error_msg, count(*) from mce_record group by error_msg"; ++ $query = "select error_msg, count(*) from mce_record$conf{opt}{since} group by error_msg"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($msg, $count)); +@@ -1328,7 +1339,7 @@ sub errors + my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); + + # Memory controller mc_event errors +- $query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event order by id"; ++ $query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + if (!$query_handle) { + log_error ("mc_event table missing from $dbname. Run 'rasdaemon --record'.\n"); +@@ -1349,7 +1360,7 @@ sub errors + + # PCIe AER aer_event errors + if ($has_aer == 1) { +- $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id"; ++ $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $time, $devname, $type, $msg)); +@@ -1367,7 +1378,7 @@ sub errors + + # ARM processor arm_event errors + if ($has_arm == 1) { +- $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; ++ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); +@@ -1391,7 +1402,7 @@ sub errors + + # Extlog errors + if ($has_extlog == 1) { +- $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; ++ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); +@@ -1418,7 +1429,7 @@ sub errors + + # devlink errors + if ($has_devlink == 1) { +- $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id"; ++ $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg)); +@@ -1442,7 +1453,7 @@ sub errors + + # Disk errors + if ($has_disk_errors == 1) { +- $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors order by id"; ++ $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $dev, $sector, $nr_sector, $error, $rwbs, $cmd)); +@@ -1467,7 +1478,7 @@ sub errors + + # Memory failure errors + if ($has_mem_failure == 1) { +- $query = "select id, timestamp, pfn, page_type, action_result from memory_failure_event order by id"; ++ $query = "select id, timestamp, pfn, page_type, action_result from memory_failure_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $pfn, $page_type, $action_result)); +@@ -1486,7 +1497,7 @@ sub errors + + # MCE mce_record errors + if ($has_mce == 1) { +- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; ++ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); +@@ -1555,7 +1566,7 @@ sub vendor_errors_summary + # HiSilicon KunPeng9xx errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { + $found_platform = 1; +- $query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2 group by err_severity, module_id"; ++ $query = "select err_severity, module_id, count(*) from hip08_oem_type1_event_v2$conf{opt}{since} group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($err_severity, $module_id, $count)); +@@ -1573,7 +1584,7 @@ sub vendor_errors_summary + } + $query_handle->finish; + +- $query = "select err_severity, module_id, count(*) from hip08_oem_type2_event_v2 group by err_severity, module_id"; ++ $query = "select err_severity, module_id, count(*) from hip08_oem_type2_event_v2$conf{opt}{since} group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($err_severity, $module_id, $count)); +@@ -1591,7 +1602,7 @@ sub vendor_errors_summary + } + $query_handle->finish; + +- $query = "select err_severity, sub_module_id, count(*) from hip08_pcie_local_event_v2 group by err_severity, sub_module_id"; ++ $query = "select err_severity, sub_module_id, count(*) from hip08_pcie_local_event_v2$conf{opt}{since} group by err_severity, sub_module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($err_severity, $sub_module_id, $count)); +@@ -1609,7 +1620,7 @@ sub vendor_errors_summary + } + $query_handle->finish; + +- $query = "select err_severity, module_id, count(*) from hisi_common_section_v2 group by err_severity, module_id"; ++ $query = "select err_severity, module_id, count(*) from hisi_common_section_v2$conf{opt}{since} group by err_severity, module_id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($err_severity, $module_id, $count)); +@@ -1663,7 +1674,7 @@ sub vendor_errors + # HiSilicon KunPeng9xx errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { + $found_platform = 1; +- $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2 order by id, module_id, err_severity"; ++ $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2$conf{opt}{since} order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs)); +@@ -1688,7 +1699,7 @@ sub vendor_errors + } + $query_handle->finish; + +- $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type2_event_v2 order by id, module_id, err_severity"; ++ $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type2_event_v2$conf{opt}{since} order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $module_id, $sub_module_id, $err_severity, $regs)); +@@ -1713,7 +1724,7 @@ sub vendor_errors + } + $query_handle->finish; + +- $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, sub_module_id, core_id, port_id, err_severity, err_type, regs_dump from hip08_pcie_local_event_v2 order by id, sub_module_id, err_severity"; ++ $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, sub_module_id, core_id, port_id, err_severity, err_type, regs_dump from hip08_pcie_local_event_v2$conf{opt}{since} order by id, sub_module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $nimbus_id, $sub_module_id, $core_id, $port_id, $err_severity, $err_type, $regs)); +@@ -1740,7 +1751,7 @@ sub vendor_errors + } + $query_handle->finish; + +- $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2 order by id, module_id, err_severity"; ++ $query = "select id, timestamp, version, soc_id, socket_id, totem_id, nimbus_id, sub_system_id, module_id, sub_module_id, core_id, port_id, err_type, pcie_info, err_severity, regs_dump from hisi_common_section_v2$conf{opt}{since} order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $module_id, $sub_module_id, $core_id, $port_id, $err_type, $pcie_info, $err_severity, $regs)); +-- +2.33.1 + diff --git a/2004-rasdaemon-ras-mc-ctl-Add-support-to-display-the-THea.patch b/1024-rasdaemon-ras-mc-ctl-Add-support-to-display-the-THea.patch similarity index 58% rename from 2004-rasdaemon-ras-mc-ctl-Add-support-to-display-the-THea.patch rename to 1024-rasdaemon-ras-mc-ctl-Add-support-to-display-the-THea.patch index b508066afbbb362e64c435d1a247613f58804836..6243b59afb0775735c02e7d668f1c83e36599fc7 100644 --- a/2004-rasdaemon-ras-mc-ctl-Add-support-to-display-the-THea.patch +++ b/1024-rasdaemon-ras-mc-ctl-Add-support-to-display-the-THea.patch @@ -1,42 +1,44 @@ -From 2e30517b9584ee8ae99553400168e07afce8ff9c Mon Sep 17 00:00:00 2001 +From 2a202d970dfc76e26b5d423fc10572fd0dd80164 Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 7 Sep 2023 18:22:06 +0800 -Subject: [PATCH 4/4] rasdaemon: ras-mc-ctl: Add support to display the THead +Subject: [PATCH 24/85] rasdaemon: ras-mc-ctl: Add support to display the THead vendor errors Add support for the THead YiTian DDRC register dump event. Signed-off-by: Ruidong Tian +Signed-off-by: Mauro Carvalho Chehab --- - util/ras-mc-ctl.in | 43 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 43 insertions(+) + util/ras-mc-ctl.in | 48 ++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index 1e3aeb7..d30fca4 100755 +index 13078c2..5d8b46c 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in -@@ -1528,6 +1528,7 @@ sub errors +@@ -1541,6 +1541,7 @@ sub errors + # Definitions of the vendor platform IDs. use constant { - HISILICON_KUNPENG_920 => "Kunpeng920", - HISILICON_KUNPENG_9XX => "Kunpeng9xx", + HISILICON_KUNPENG_9XX => "KunPeng9xx", + THEAD_YITIAN_7XX => "YiTian7XX", }; sub vendor_errors_summary -@@ -1536,6 +1537,7 @@ sub vendor_errors_summary - my ($num_args, $platform_id); +@@ -1549,6 +1550,7 @@ sub vendor_errors_summary + my ($num_args, $platform_id, $found_platform); my ($query, $query_handle, $count, $out); - my ($module_id, $sub_module_id, $err_severity, $err_sev, $err_info); + my ($module_id, $sub_module_id, $err_severity, $err_sev); + my ($address); $num_args = $#ARGV + 1; $platform_id = 0; -@@ -1628,6 +1630,24 @@ sub vendor_errors_summary +@@ -1639,6 +1641,25 @@ sub vendor_errors_summary $query_handle->finish; } + # THead Yitian710 DDR errors + if ($platform_id eq THEAD_YITIAN_7XX) { ++ $found_platform = 1; + $query = "select address, count(*) from yitian_ddr_reg_dump_event"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); @@ -53,23 +55,33 @@ index 1e3aeb7..d30fca4 100755 + $query_handle->finish; + } + - undef($dbh); - } - -@@ -1638,6 +1658,7 @@ sub vendor_errors + if ($platform_id && !($found_platform)) { + print "Platform ID $platform_id is not valid\n"; + } +@@ -1653,6 +1674,7 @@ sub vendor_errors my ($query, $query_handle, $id, $timestamp, $out); - my ($version, $soc_id, $socket_id, $nimbus_id, $core_id, $port_id); - my ($module_id, $sub_module_id, $err_severity, $err_type, $err_info, $regs); + my ($version, $soc_id, $socket_id, $totem_id, $nimbus_id, $sub_system_id, $core_id, $port_id); + my ($module_id, $sub_module_id, $err_severity, $err_type, $pcie_info, $regs); + my ($address, $regs_dump); $num_args = $#ARGV + 1; $platform_id = 0; -@@ -1743,6 +1764,27 @@ sub vendor_errors +@@ -1673,7 +1695,7 @@ sub vendor_errors + + # HiSilicon KunPeng9xx errors + if ($platform_id eq HISILICON_KUNPENG_9XX) { +- $found_platform = 1; ++ $found_platform = 1; + $query = "select id, timestamp, version, soc_id, socket_id, nimbus_id, module_id, sub_module_id, err_severity, regs_dump from hip08_oem_type1_event_v2$conf{opt}{since} order by id, module_id, err_severity"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +@@ -1783,12 +1805,33 @@ sub vendor_errors $query_handle->finish; } + # THead Yitian7xx ddr errors + if ($platform_id eq THEAD_YITIAN_7XX) { ++ $found_platform = 1; + $query = "select id, timestamp, address, regs_dump from yitian_ddr_reg_dump_event order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); @@ -89,13 +101,19 @@ index 1e3aeb7..d30fca4 100755 + $query_handle->finish; + } + + if ($platform_id && !($found_platform)) { + print "Platform ID $platform_id is not valid\n"; + } elsif ($module && !($found_module)) { + print "No error record for the module $module\n"; + } +- undef($dbh); } -@@ -1751,6 +1793,7 @@ sub vendor_platforms +@@ -1796,6 +1839,7 @@ sub vendor_platforms + { print "\nSupported platforms for the vendor-specific errors:\n"; - print "\tHiSilicon Kunpeng920, platform-id=\"", HISILICON_KUNPENG_920, "\"\n"; - print "\tHiSilicon Kunpeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n"; + print "\tHiSilicon KunPeng9xx, platform-id=\"", HISILICON_KUNPENG_9XX, "\"\n"; + print "\tTHead Yitian7xx, platform-id=\"", THEAD_YITIAN_7XX, "\"\n"; print "\n"; } diff --git a/1025-rasdaemon-Add-Emerald-Rapids-support.patch b/1025-rasdaemon-Add-Emerald-Rapids-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c8534f4a4b902bf3358da825594afb92bc1f3e7 --- /dev/null +++ b/1025-rasdaemon-Add-Emerald-Rapids-support.patch @@ -0,0 +1,74 @@ +From 2f9f335ff3a7c70d87b435e43df775e3a73606a7 Mon Sep 17 00:00:00 2001 +From: "Delgado Vargas, Daniel" +Date: Fri, 20 Oct 2023 10:57:11 -0600 +Subject: [PATCH 25/85] rasdaemon: Add Emerald Rapids support + +Signed-off-by: Delgado Vargas, Daniel +Signed-off-by: Mauro Carvalho Chehab +--- + mce-intel-i10nm.c | 1 + + mce-intel.c | 1 + + ras-mce-handler.c | 3 +++ + ras-mce-handler.h | 1 + + 4 files changed, 6 insertions(+) + +diff --git a/mce-intel-i10nm.c b/mce-intel-i10nm.c +index 3c5d22f..c4ace56 100644 +--- a/mce-intel-i10nm.c ++++ b/mce-intel-i10nm.c +@@ -380,6 +380,7 @@ void i10nm_decode_model(enum cputype cputype, struct ras_events *ras, + banktype = tremont[e->bank]; + break; + case CPU_SAPPHIRERAPIDS: ++ case CPU_EMERALDRAPIDS: + banktype = sapphire[e->bank]; + break; + default: +diff --git a/mce-intel.c b/mce-intel.c +index e083e9c..18a9072 100644 +--- a/mce-intel.c ++++ b/mce-intel.c +@@ -415,6 +415,7 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) + case CPU_ICELAKE_DE: + case CPU_TREMONT_D: + case CPU_SAPPHIRERAPIDS: ++ case CPU_EMERALDRAPIDS: + i10nm_decode_model(mce->cputype, ras, e); + default: + break; +diff --git a/ras-mce-handler.c b/ras-mce-handler.c +index d09829d..370e68a 100644 +--- a/ras-mce-handler.c ++++ b/ras-mce-handler.c +@@ -61,6 +61,7 @@ static char *cputype_name[] = { + [CPU_ICELAKE_DE] = "Icelake server D Family", + [CPU_TREMONT_D] = "Tremont microserver", + [CPU_SAPPHIRERAPIDS] = "Sapphirerapids server", ++ [CPU_EMERALDRAPIDS] = "Emeraldrapids server", + }; + + static enum cputype select_intel_cputype(struct mce_priv *mce) +@@ -118,6 +119,8 @@ static enum cputype select_intel_cputype(struct mce_priv *mce) + return CPU_TREMONT_D; + else if (mce->model == 0x8f) + return CPU_SAPPHIRERAPIDS; ++ else if (mce->model == 0xcf) ++ return CPU_EMERALDRAPIDS; + + if (mce->model > 0x1a) { + log(ALL, LOG_INFO, +diff --git a/ras-mce-handler.h b/ras-mce-handler.h +index b4babf3..68147f4 100644 +--- a/ras-mce-handler.h ++++ b/ras-mce-handler.h +@@ -53,6 +53,7 @@ enum cputype { + CPU_ICELAKE_DE, + CPU_TREMONT_D, + CPU_SAPPHIRERAPIDS, ++ CPU_EMERALDRAPIDS, + }; + + struct mce_event { +-- +2.33.1 + diff --git a/1026-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch b/1026-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch new file mode 100644 index 0000000000000000000000000000000000000000..c1820f17931ceb9b43eadd23aa1baacce1eba660 --- /dev/null +++ b/1026-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch @@ -0,0 +1,122 @@ +From 4a9931b5bbd13bdc8911fc6041251f53618fb6d3 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 24 Aug 2023 13:07:17 +0100 +Subject: [PATCH 26/85] rasdaemon: ras-mc-ctl: Modify check for HiSilicon + KunPeng9xx error fields + +Modify check for valid HiSilicon KunPeng9xx error fields. +Fixes an error data is not printed when it's value is 0. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 72 +++++++++++++++++++++++----------------------- + 1 file changed, 36 insertions(+), 36 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 5d8b46c..fb35afe 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1705,13 +1705,13 @@ sub vendor_errors + if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "Error Registers: $regs " if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +@@ -1730,13 +1730,13 @@ sub vendor_errors + if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "Error Registers: $regs " if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +@@ -1755,15 +1755,15 @@ sub vendor_errors + if ($module eq 0 || ($sub_module_id && uc($module) eq uc($sub_module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "Error Registers: $regs " if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "core_id=$core_id, " if (defined $core_id && length $core_id); ++ $out .= "port_id=$port_id, " if (defined $port_id && length $port_id); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "err_type=$err_type, " if (defined $err_type && length $err_type); ++ $out .= "Error Registers: $regs " if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +@@ -1782,19 +1782,19 @@ sub vendor_errors + if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "totem_id=$totem_id, " if ($totem_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "pcie_info=$pcie_info, " if ($pcie_info); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs" if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "totem_id=$totem_id, " if (defined $totem_id && length $totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if (defined $sub_system_id && length $sub_system_id); ++ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "core_id=$core_id, " if (defined $core_id && length $core_id ); ++ $out .= "port_id=$port_id, " if (defined $port_id && length $port_id); ++ $out .= "err_type=$err_type, " if (defined $err_type && length $err_type); ++ $out .= "pcie_info=$pcie_info, " if (defined $pcie_info && length $pcie_info); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "Error Registers: $regs" if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +-- +2.33.1 + diff --git a/1027-anolis-Add-dynamic-switch-of-ras-events-support.patch b/1027-anolis-Add-dynamic-switch-of-ras-events-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d17a1955a50f8d24c5b686c554fa1fea15429d5 --- /dev/null +++ b/1027-anolis-Add-dynamic-switch-of-ras-events-support.patch @@ -0,0 +1,162 @@ +From 80e534e597163ef2fd4fc3bff3d441420914e0d2 Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Wed, 29 Nov 2023 14:31:46 +0800 +Subject: [PATCH 27/85] anolis: Add dynamic switch of ras events support. + +Rasdaemon does not support a way to disable some events by config. +If user want to disable specified event(eg:block_rq_complete), he +should recompile rasdaemon, which is not so convenient. + +This patch add dynamic switch of ras event support.You can add +events you want to disabled in /etc/sysconfig/rasdaemon.For example, +`DISABLE="ras:mc_event,block:block_rq_complete"`.Then restart +rasdaemon, these two events will be disabled without recompilation. + +[mchehab: make is_disabled_event() static] +Signed-off-by: Mauro Carvalho Chehab +[Ruidong: delete cxl code] +--- + ras-events.c | 35 ++++++++++++++++++++++++++++------- + rasdaemon.c | 3 +++ + 2 files changed, 31 insertions(+), 7 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index 9ad34f8..31a4e0b 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -57,6 +57,8 @@ + #define ENDIAN KBUFFER_ENDIAN_BIG + #endif + ++extern char* choices_disable; ++ + static int get_debugfs_dir(char *tracing_dir, size_t len) + { + FILE *fp; +@@ -147,6 +149,18 @@ static int get_tracing_dir(struct ras_events *ras) + return 0; + } + ++static int is_disabled_event(char *group, char *event) { ++ char ras_event_name[MAX_PATH + 1]; ++ ++ snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s", ++ group, event); ++ ++ if (choices_disable != NULL && strlen(choices_disable) != 0 && strstr(choices_disable, ras_event_name)) { ++ return 1; ++ } ++ return 0; ++} ++ + /* + * Tracing enable/disable code + */ +@@ -155,6 +169,7 @@ static int __toggle_ras_mc_event(struct ras_events *ras, + { + int fd, rc; + char fname[MAX_PATH + 1]; ++ enable = is_disabled_event(group, event) ? 0 : 1; + + snprintf(fname, sizeof(fname), "%s%s:%s\n", + enable ? "" : "!", +@@ -775,6 +790,12 @@ static int add_event_handler(struct ras_events *ras, struct pevent *pevent, + + ras->filters[id] = filter; + ++ if (is_disabled_event(group, event)) { ++ log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", ++ group, event); ++ return -EINVAL; ++ } ++ + /* Enable RAS events */ + rc = __toggle_ras_mc_event(ras, group, event, 1); + free(page); +@@ -842,7 +863,7 @@ int handle_ras_events(int record_events) + ras_mc_event_handler, NULL, MC_EVENT); + if (!rc) + num_events++; +- else ++ else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "mc_event"); + +@@ -851,7 +872,7 @@ int handle_ras_events(int record_events) + ras_aer_event_handler, NULL, AER_EVENT); + if (!rc) + num_events++; +- else ++ else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "aer_event"); + #endif +@@ -861,7 +882,7 @@ int handle_ras_events(int record_events) + ras_non_standard_event_handler, NULL, NON_STANDARD_EVENT); + if (!rc) + num_events++; +- else ++ else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "non_standard_event"); + #endif +@@ -871,7 +892,7 @@ int handle_ras_events(int record_events) + ras_arm_event_handler, NULL, ARM_EVENT); + if (!rc) + num_events++; +- else ++ else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "arm_event"); + #endif +@@ -905,7 +926,7 @@ int handle_ras_events(int record_events) + /* tell kernel we are listening, so don't printk to console */ + (void)open("/sys/kernel/debug/ras/daemon_active", 0); + num_events++; +- } else ++ } else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "extlog_mem_event"); + #endif +@@ -922,7 +943,7 @@ int handle_ras_events(int record_events) + ras_devlink_event_handler, filter_str, DEVLINK_EVENT); + if (!rc) + num_events++; +- else ++ else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "devlink", "devlink_health_report"); + #endif +@@ -946,7 +967,7 @@ int handle_ras_events(int record_events) + ras_memory_failure_event_handler, NULL, MF_EVENT); + if (!rc) + num_events++; +- else ++ else if (rc != -EINVAL) + log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", + "ras", "memory_failure_event"); + #endif +diff --git a/rasdaemon.c b/rasdaemon.c +index e9a3a4d..0db51c9 100644 +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -33,6 +33,8 @@ + #define TOOL_NAME "rasdaemon" + #define TOOL_DESCRIPTION "RAS daemon to log the RAS events." + #define ARGS_DOC "" ++#define DISABLE "DISABLE" ++char *choices_disable = NULL; + + const char *argp_program_version = TOOL_NAME " " VERSION; + const char *argp_program_bug_address = "Mauro Carvalho Chehab "; +@@ -127,6 +129,7 @@ int main(int argc, char *argv[]) + { + struct arguments args; + int idx = -1; ++ choices_disable = getenv(DISABLE); + + #ifdef HAVE_MCE + const struct argp_option offline_options[] = { +-- +2.33.1 + diff --git a/1028-rasdaemon-Add-support-for-creating-vendor-tables-at-.patch b/1028-rasdaemon-Add-support-for-creating-vendor-tables-at-.patch new file mode 100644 index 0000000000000000000000000000000000000000..26757a16e1e6d76f0b3e23aed74f035ea298850e --- /dev/null +++ b/1028-rasdaemon-Add-support-for-creating-vendor-tables-at-.patch @@ -0,0 +1,72 @@ +From 83149843435ffa5f22bc12ca67cd17b1b94fd3c0 Mon Sep 17 00:00:00 2001 +From: Hunter He +Date: Wed, 6 Dec 2023 14:52:03 +0800 +Subject: [PATCH 28/85] rasdaemon:Add support for creating vendor tables at + startup. + +When rasdaemon is running without non-standard error, those +tables are not created in the database file. Then ras-mc-ctl +script breaks trying to query data from non-existent tables. + +Add support for creating vendor tables at startup. + +Signed-off-by: Hunter He +--- + non-standard-yitian.c | 27 +++++++++++++++++---------- + 1 file changed, 17 insertions(+), 10 deletions(-) + +diff --git a/non-standard-yitian.c b/non-standard-yitian.c +index 99cea47..4c30514 100644 +--- a/non-standard-yitian.c ++++ b/non-standard-yitian.c +@@ -164,16 +164,6 @@ void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, + const char *subtype_str = oem_subtype_name(yitian_payload_error_type, + header->type, header->subtype); + +-#ifdef HAVE_SQLITE3 +- if (ras->record_events && !ev_decoder->stmt_dec_record) { +- if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, +- &yitian_ddr_payload_section_tab) != SQLITE_OK) { +- trace_seq_printf(s, "create sql fail\n"); +- return; +- } +- } +-#endif +- + now = time(NULL); + tm = localtime(&now); + if (tm) +@@ -217,6 +207,22 @@ void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, + + } + ++static int add_yitian_common_table(struct ras_events *ras, ++ struct ras_ns_ev_decoder *ev_decoder) ++{ ++#ifdef HAVE_SQLITE3 ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &yitian_ddr_payload_section_tab) != SQLITE_OK) { ++ log(TERM, LOG_WARNING, ++ "Failed to create sql yitian_ddr_payload_section_tab\n"); ++ return -1; ++ } ++ } ++#endif ++ return 0; ++} ++ + /* error data decoding functions */ + static int decode_yitian710_ns_error(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, +@@ -239,6 +245,7 @@ static int decode_yitian710_ns_error(struct ras_events *ras, + struct ras_ns_ev_decoder yitian_ns_oem_decoder[] = { + { + .sec_type = "a6980811-16ea-4e4d-b936-fb00a23ff29c", ++ .add_table = add_yitian_common_table, + .decode = decode_yitian710_ns_error, + }, + }; +-- +2.33.1 + diff --git a/1029-Fix-potential-overflow-with-some-arrays-at-page-isol.patch b/1029-Fix-potential-overflow-with-some-arrays-at-page-isol.patch new file mode 100644 index 0000000000000000000000000000000000000000..215c36fbcc113794d6d91a284f79018ddacc5f5c --- /dev/null +++ b/1029-Fix-potential-overflow-with-some-arrays-at-page-isol.patch @@ -0,0 +1,118 @@ +From c497bd6b0d18efa9f7cf4fe49e183fe971868754 Mon Sep 17 00:00:00 2001 +From: zhuofeng +Date: Thu, 7 Dec 2023 10:26:56 +0800 +Subject: [PATCH 29/85] Fix potential overflow with some arrays at + page-isolation logic + +Overflows may happen in the `threshold_string` and `cycle_string` arrays. + +If the PAGE_CE_THRESHOLD value in page isolation is set to 50 bits, +there is a risk of array overflow. Because sprintf is an insecure +function, use snprintf instead. + +An error is reported when the AddressSanitizer is used. + +rasdaemon: Improper PAGE_CE_ACTION, set to default soft +rasdaemon: Page offline choice on Corrected Errors is soft +================================================================= +==221920==ERROR: AddressSanitizer: stack-buffer-overflow on address 0xffffdd91d932 at pc 0xffffa24071c4 bp 0xffffdd91d720 sp 0xffffdd91ced8 +WRITE of size 55 at 0xffffdd91d932 thread T0 + #0 0xffffa24071c0 in vsprintf (/usr/lib64/libasan.so.6+0x5c1c0) + #1 0xffffa24073cc in sprintf (/usr/lib64/libasan.so.6+0x5c3cc) + #2 0x459558 in parse_env_string /home/rasdaemon/ras-page-isolation.c:185 + #3 0x4596f4 in page_isolation_init /home/rasdaemon/ras-page-isolation.c:202 + #4 0x459934 in ras_page_account_init /home/rasdaemon/ras-page-isolation.c:211 + #5 0x40f700 in handle_ras_events /home/rasdaemon/ras-events.c:902 + #6 0x405b8c in main /home/rasdaemon/rasdaemon.c:211 + #7 0xffffa20b6f38 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 + #8 0xffffa20b7004 in __libc_start_main_impl ../csu/libc-start.c:409 + #9 0x4038ec in _start (/home/rasdaemon/rasdaemon+0x4038ec) + +Address 0xffffdd91d932 is located in stack of thread T0 at offset 82 in frame + #0 0x459574 in page_isolation_init /home/rasdaemon/ras-page-isolation.c:190 + + This frame has 2 object(s): + [32, 82) 'threshold_string' (line 191) + [128, 178) 'cycle_string' (line 192) <== Memory access at offset 82 partially underflows this variable +HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork + (longjmp and C++ exceptions *are* supported) +SUMMARY: AddressSanitizer: stack-buffer-overflow (/usr/lib64/libasan.so.6+0x5c1c0) in vsprintf +Shadow bytes around the buggy address: + 0x200ffbb23ad0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x200ffbb23ae0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x200ffbb23af0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x200ffbb23b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x200ffbb23b10: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 +=>0x200ffbb23b20: 00 00 00 00 00 00[02]f2 f2 f2 f2 f2 00 00 00 00 + 0x200ffbb23b30: 00 00 02 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 + 0x200ffbb23b40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x200ffbb23b50: f1 f1 f1 f1 f1 f1 04 f2 00 00 f2 f2 00 00 00 00 + 0x200ffbb23b60: 00 00 00 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 f2 + 0x200ffbb23b70: f2 f2 f2 f2 00 00 00 00 00 00 00 00 f2 f2 f2 f2 +Shadow byte legend (one shadow byte represents 8 application bytes): + Addressable: 00 + Partially addressable: 01 02 03 04 05 06 07 + Heap left redzone: fa + Freed heap region: fd + Stack left redzone: f1 + Stack mid redzone: f2 + Stack right redzone: f3 + Stack after return: f5 + Stack use after scope: f8 + Global redzone: f9 + Global init order: f6 + Poisoned by user: f7 + Container overflow: fc + Array cookie: ac + Intra object redzone: bb + ASan internal: fe + Left alloca redzone: ca + Right alloca redzone: cb + Shadow gap: cc +==221920==ABORTING + +Signed-off-by: Mauro Carvalho Chehab +--- + ras-page-isolation.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/ras-page-isolation.c b/ras-page-isolation.c +index fd7bd70..caa8c31 100644 +--- a/ras-page-isolation.c ++++ b/ras-page-isolation.c +@@ -171,18 +171,18 @@ parse: + config->unit = no_unit ? config->unit : ""; + } + +-static void parse_env_string(struct isolation *config, char *str) ++static void parse_env_string(struct isolation *config, char *str, unsigned int size) + { + int i; + + if (config->overflow) { + /* when overflow, use basic unit */ + for (i = 0; config->units[i].name; i++) ; +- sprintf(str, "%lu%s", config->val, config->units[i-1].name); ++ snprintf(str, size, "%lu%s", config->val, config->units[i-1].name); + log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n", + config->name, config->env); + } else { +- sprintf(str, "%s%s", config->env, config->unit); ++ snprintf(str, size, "%s%s", config->env, config->unit); + } + } + +@@ -199,8 +199,8 @@ static void page_isolation_init(void) + + parse_isolation_env(&threshold); + parse_isolation_env(&cycle); +- parse_env_string(&threshold, threshold_string); +- parse_env_string(&cycle, cycle_string); ++ parse_env_string(&threshold, threshold_string, sizeof(threshold_string)); ++ parse_env_string(&cycle, cycle_string, sizeof(cycle_string)); + log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n", + threshold_string, cycle_string); + } +-- +2.33.1 + diff --git a/1030-anolis-print-logs-in-the-same-line.patch b/1030-anolis-print-logs-in-the-same-line.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f4f295b69d00e193d5834ef4a40ce5beeacc6ec --- /dev/null +++ b/1030-anolis-print-logs-in-the-same-line.patch @@ -0,0 +1,94 @@ +From 45306973e2c357e9c39f4a423371d2c1ce9ebc12 Mon Sep 17 00:00:00 2001 +From: zhuofeng +Date: Tue, 12 Mar 2024 14:28:55 +0800 +Subject: [PATCH 30/85] anolis: print logs in the same line + +Signed-off-by: Mauro Carvalho Chehab +[Ruidong: fix conflict with clean code] +--- + ras-arm-handler.c | 16 ++++++++-------- + ras-non-standard-handler.c | 4 ++-- + 2 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/ras-arm-handler.c b/ras-arm-handler.c +index a0dfc51..abd8c9b 100644 +--- a/ras-arm-handler.c ++++ b/ras-arm-handler.c +@@ -180,7 +180,7 @@ int ras_arm_event_handler(struct trace_seq *s, + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); +- trace_seq_printf(s, "%s\n", ev.timestamp); ++ trace_seq_printf(s, "%s", ev.timestamp); + + if (pevent_get_field_val(s, event, "affinity", record, &val, 1) < 0) + return -1; +@@ -190,27 +190,27 @@ int ras_arm_event_handler(struct trace_seq *s, + if (pevent_get_field_val(s, event, "mpidr", record, &val, 1) < 0) + return -1; + ev.mpidr = val; +- trace_seq_printf(s, "\n MPIDR: 0x%llx", (unsigned long long)ev.mpidr); ++ trace_seq_printf(s, " MPIDR: 0x%llx", (unsigned long long)ev.mpidr); + + if (pevent_get_field_val(s, event, "midr", record, &val, 1) < 0) + return -1; + ev.midr = val; +- trace_seq_printf(s, "\n MIDR: 0x%llx", (unsigned long long)ev.midr); ++ trace_seq_printf(s, " MIDR: 0x%llx", (unsigned long long)ev.midr); + + if (pevent_get_field_val(s, event, "running_state", record, &val, 1) < 0) + return -1; + ev.running_state = val; +- trace_seq_printf(s, "\n running_state: %d", ev.running_state); ++ trace_seq_printf(s, " running_state: %d", ev.running_state); + + if (pevent_get_field_val(s, event, "psci_state", record, &val, 1) < 0) + return -1; + ev.psci_state = val; +- trace_seq_printf(s, "\n psci_state: %d", ev.psci_state); ++ trace_seq_printf(s, " psci_state: %d", ev.psci_state); + + if (pevent_get_field_val(s, event, "pei_len", record, &val, 1) < 0) + return -1; + ev.pei_len = val; +- trace_seq_printf(s, "\n ARM Processor Err Info data len: %d\n", ++ trace_seq_printf(s, " ARM Processor Err Info data len: %d\n", + ev.pei_len); + + ev.pei_error = pevent_get_field_raw(s, event, "buf", record, &len, 1); +@@ -221,7 +221,7 @@ int ras_arm_event_handler(struct trace_seq *s, + if (pevent_get_field_val(s, event, "ctx_len", record, &val, 1) < 0) + return -1; + ev.ctx_len = val; +- trace_seq_printf(s, "\n ARM Processor Err Context Info data len: %d\n", ++ trace_seq_printf(s, " ARM Processor Err Context Info data len: %d\n", + ev.ctx_len); + + ev.ctx_error = pevent_get_field_raw(s, event, "buf1", record, &len, 1); +@@ -232,7 +232,7 @@ int ras_arm_event_handler(struct trace_seq *s, + if (pevent_get_field_val(s, event, "oem_len", record, &val, 1) < 0) + return -1; + ev.oem_len = val; +- trace_seq_printf(s, "\n Vendor Specific Err Info data len: %d\n", ++ trace_seq_printf(s, " Vendor Specific Err Info data len: %d\n", + ev.oem_len); + + ev.vsei_error = pevent_get_field_raw(s, event, "buf2", record, &len, 1); +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 8672b16..8efb660 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -168,8 +168,8 @@ int ras_non_standard_event_handler(struct trace_seq *s, + return -1; + if (strcmp(uuid_le(ev.sec_type), + "e8ed898d-df16-43cc-8ecc-54f060ef157f") == 0) +- trace_seq_printf(s, "\n section type: %s", +- "Ampere Specific Error\n"); ++ trace_seq_printf(s, " section type: %s", ++ "Ampere Specific Error"); + else + trace_seq_printf(s, " section type: %s", + uuid_le(ev.sec_type)); +-- +2.33.1 + diff --git a/1031-rasdaemon-ras-memory-failure-handler-update-memory-f.patch b/1031-rasdaemon-ras-memory-failure-handler-update-memory-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..16d936953febcd3eebd76d1fc015e62a4c845018 --- /dev/null +++ b/1031-rasdaemon-ras-memory-failure-handler-update-memory-f.patch @@ -0,0 +1,60 @@ +From bf1839dcc477ee8065c2edfbe07b685cbaf4274e Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Tue, 6 Feb 2024 12:08:00 +0000 +Subject: [PATCH 31/85] rasdaemon: ras-memory-failure-handler: update memory + failure action page types + +Update memory failure action page types corresponding to the same in +mm/memory-failure.c in the kernel. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + ras-memory-failure-handler.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index 1951456..adbd736 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -27,10 +27,8 @@ enum mf_action_page_type { + MF_MSG_KERNEL_HIGH_ORDER, + MF_MSG_SLAB, + MF_MSG_DIFFERENT_COMPOUND, +- MF_MSG_POISONED_HUGE, + MF_MSG_HUGE, + MF_MSG_FREE_HUGE, +- MF_MSG_NON_PMD_HUGE, + MF_MSG_UNMAP_FAILED, + MF_MSG_DIRTY_SWAPCACHE, + MF_MSG_CLEAN_SWAPCACHE, +@@ -42,7 +40,6 @@ enum mf_action_page_type { + MF_MSG_CLEAN_LRU, + MF_MSG_TRUNCATED_LRU, + MF_MSG_BUDDY, +- MF_MSG_BUDDY_2ND, + MF_MSG_DAX, + MF_MSG_UNSPLIT_THP, + MF_MSG_UNKNOWN, +@@ -65,10 +62,8 @@ static const struct { + { MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"}, + { MF_MSG_SLAB, "kernel slab page"}, + { MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"}, +- { MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"}, + { MF_MSG_HUGE, "huge page"}, + { MF_MSG_FREE_HUGE, "free huge page"}, +- { MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"}, + { MF_MSG_UNMAP_FAILED, "unmapping failed page"}, + { MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"}, + { MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"}, +@@ -80,7 +75,6 @@ static const struct { + { MF_MSG_CLEAN_LRU, "clean LRU page"}, + { MF_MSG_TRUNCATED_LRU, "already truncated LRU page"}, + { MF_MSG_BUDDY, "free buddy page"}, +- { MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"}, + { MF_MSG_DAX, "dax page"}, + { MF_MSG_UNSPLIT_THP, "unsplit thp"}, + { MF_MSG_UNKNOWN, "unknown page"}, +-- +2.33.1 + diff --git a/1032-rasdaemon-ras-mc-ctl-Add-support-to-display-mcastatu.patch b/1032-rasdaemon-ras-mc-ctl-Add-support-to-display-mcastatu.patch new file mode 100644 index 0000000000000000000000000000000000000000..5b8fe9091ad4b28d5212526b0bdf89a95002063a --- /dev/null +++ b/1032-rasdaemon-ras-mc-ctl-Add-support-to-display-mcastatu.patch @@ -0,0 +1,60 @@ +From e2efcbe5b1627c577f8d133b958114c34539e459 Mon Sep 17 00:00:00 2001 +From: Avadhut Naik +Date: Mon, 25 Mar 2024 23:06:08 -0500 +Subject: [PATCH 32/85] rasdaemon: ras-mc-ctl: Add support to display + mcastatus_msg string + +Currently, the mcastatus_msg string of struct mce_event is added to the +SQLite database by the rasdaemon when it is recording errors. The same +however, is not outputted by the ras-mc-ctl utility. + +The string provides important error information relating to the received +MCE. For example, on AMD SMCA systems, the string outputs extended error +code and description. As such, the string should be present in the +output of ras-mc-ctl utility. + +Add support to output the string through the ras-mc-ctl utility. + +Signed-off-by: Avadhut Naik +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index fb35afe..725d215 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1329,7 +1329,7 @@ sub errors + { + require DBI; + my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); +- my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); ++ my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location); + my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); + my ($bus_name, $dev_name, $driver_name, $reporter_name); + my ($dev, $sector, $nr_sector, $error, $rwbs, $cmd); +@@ -1497,10 +1497,10 @@ sub errors + + # MCE mce_record errors + if ($has_mce == 1) { +- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; ++ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, mcastatus_msg, user_action, mc_location from mce_record$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); +- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); ++ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $mcastatus_msg, $user_action, $mc_location)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $time error: $msg"; +@@ -1508,6 +1508,7 @@ sub errors + $out .= ", bank $bank_name" if ($bank_name); + $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); + $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); ++ $out .= ", mca $mcastatus_msg" if ($mcastatus_msg); + $out .= ", $mc_location" if ($mc_location); + $out .= ", $user_action" if ($user_action); + $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); +-- +2.33.1 + diff --git a/1033-rasdaemon-fix-table-create-if-some-cpus-are-offline.patch b/1033-rasdaemon-fix-table-create-if-some-cpus-are-offline.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a0bfd5c3bc962cf47fd76c88bcb2fa71a257295 --- /dev/null +++ b/1033-rasdaemon-fix-table-create-if-some-cpus-are-offline.patch @@ -0,0 +1,177 @@ +From bbcf65669b1efda78bb4a4762bfb4e3886d4f371 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sun, 5 Mar 2023 23:14:42 +0000 +Subject: [PATCH 33/85] rasdaemon: fix table create if some cpus are offline + +Fix for regression in ras_mc_create_table() if some cpus are offline +at the system start + +Issue: + +Regression in the ras_mc_create_table() if some of the cpus are offline +at the system start when run the rasdaemon. + +This issue is reproducible in ras_mc_create_table() with decode and +record non-standard events and reproducible sometimes with +ras_mc_create_table() for the standard events. + +Also in the multi thread way, there is memory leak in ras_mc_event_opendb() +as struct sqlite3_priv *priv and sqlite3 *db allocated/initialized per +thread, but stored in the common struct ras_events ras in pthread data, +which is shared across the threads. + +Reason: + +when the system starts with some of the cpus offline and then run +the rasdaemon, read_ras_event_all_cpus() exit with error and switch to +the multi thread way. However read() in read_ras_event() return error in +threads for each of the offline CPUs and does clean up including calling +ras_mc_event_closedb(). + +Since the 'struct ras_events ras' passed in the pthread_data to each of the +threads is common, struct sqlite3_priv *priv and sqlite3 *db allocated/ +initialized per thread and stored in the common 'struct ras_events ras', +are getting overwritten in each ras_mc_event_opendb()(which called from +pthread per cpu), result memory leak. + +Also when ras_mc_event_closedb() is called in the above error case from +the threads corresponding to the offline cpus, close the sqlite3 *db and +free sqlite3_priv *priv stored in the common 'struct ras_events ras', +result regression when accessing priv->db in the ras_mc_create_table() +from another context later. + +Solution: + +In ras_mc_event_opendb(), allocate struct sqlite3_priv *priv, +init sqlite3 *db and create tables common for the threads with shared +'struct ras_events ras' based on a reference count and free them in the +same way. + +Also protect critical code ras_mc_event_opendb() and ras_mc_event_closedb() +using mutex in the multi thread case from any regression caused by the +thread pre-emption. + +Reported-by: Lei Feng +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 16 +++++++++++++++- + ras-events.h | 4 +++- + ras-record.c | 12 ++++++++++++ + 3 files changed, 30 insertions(+), 2 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index 31a4e0b..a5ff661 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -630,19 +630,25 @@ static void *handle_ras_events_cpu(void *priv) + + log(TERM, LOG_INFO, "Listening to events on cpu %d\n", pdata->cpu); + if (pdata->ras->record_events) { ++ pthread_mutex_lock(&pdata->ras->db_lock); + if (ras_mc_event_opendb(pdata->cpu, pdata->ras)) { ++ pthread_mutex_unlock(&pdata->ras->db_lock); + log(TERM, LOG_ERR, "Can't open database\n"); + close(fd); + kbuffer_free(kbuf); + free(page); + return 0; + } ++ pthread_mutex_unlock(&pdata->ras->db_lock); + } + + read_ras_event(fd, pdata, kbuf, page); + +- if (pdata->ras->record_events) ++ if (pdata->ras->record_events) { ++ pthread_mutex_lock(&pdata->ras->db_lock); + ras_mc_event_closedb(pdata->cpu, pdata->ras); ++ pthread_mutex_unlock(&pdata->ras->db_lock); ++ } + + close(fd); + kbuffer_free(kbuf); +@@ -992,6 +998,11 @@ int handle_ras_events(int record_events) + + /* Poll doesn't work on this kernel. Fallback to pthread way */ + if (rc == -255) { ++ if (pthread_mutex_init(&ras->db_lock, NULL) != 0) { ++ log(SYSLOG, LOG_INFO, "sqlite db lock init has failed\n"); ++ goto err; ++ } ++ + log(SYSLOG, LOG_INFO, + "Opening one thread per cpu (%d threads)\n", cpus); + for (i = 0; i < cpus; i++) { +@@ -1004,6 +1015,8 @@ int handle_ras_events(int record_events) + i); + while (--i) + pthread_cancel(data[i].thread); ++ ++ pthread_mutex_destroy(&ras->db_lock); + goto err; + } + } +@@ -1011,6 +1024,7 @@ int handle_ras_events(int record_events) + /* Wait for all threads to complete */ + for (i = 0; i < cpus; i++) + pthread_join(data[i].thread, NULL); ++ pthread_mutex_destroy(&ras->db_lock); + } + + log(SYSLOG, LOG_INFO, "Huh! something got wrong. Aborting.\n"); +diff --git a/ras-events.h b/ras-events.h +index 4e36726..73f6bbb 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -56,7 +56,9 @@ struct ras_events { + time_t uptime_diff; + + /* For ras-record */ +- void *db_priv; ++ void *db_priv; ++ int db_ref_count; ++ pthread_mutex_t db_lock; + + /* For the mce handler */ + struct mce_priv *mce_priv; +diff --git a/ras-record.c b/ras-record.c +index 8f61d40..adb00ca 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -763,6 +763,10 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + + printf("Calling %s()\n", __FUNCTION__); + ++ ras->db_ref_count++; ++ if (ras->db_ref_count > 1) ++ return 0; ++ + ras->db_priv = NULL; + + priv = calloc(1, sizeof(*priv)); +@@ -912,6 +916,13 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + + printf("Calling %s()\n", __func__); + ++ if (ras->db_ref_count > 0) ++ ras->db_ref_count--; ++ else ++ return -1; ++ if (ras->db_ref_count > 0) ++ return 0; ++ + if (!priv) + return -1; + +@@ -1018,6 +1029,7 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + log(TERM, LOG_ERR, + "cpu %u: Failed to shutdown sqlite: error = %d\n", cpu, rc); + free(priv); ++ ras->db_priv = NULL; + + return 0; + } +-- +2.33.1 + diff --git a/1034-rasdaemon-fix-return-value-type-issue-of-read-write-.patch b/1034-rasdaemon-fix-return-value-type-issue-of-read-write-.patch new file mode 100644 index 0000000000000000000000000000000000000000..e4bc686d9e26434f8b2fcf7337779466107ebac3 --- /dev/null +++ b/1034-rasdaemon-fix-return-value-type-issue-of-read-write-.patch @@ -0,0 +1,96 @@ +From 2ef01c6e146f9a806fad6d4bdc17578c85e76d34 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Thu, 11 May 2023 10:54:26 +0800 +Subject: [PATCH 34/85] rasdaemon: fix return value type issue of read/write + function from unistd.h + +The return value type of read/write function from unistd.h is ssize_t. +It's signed normally, and return -1 on error. Fix incorrect use in the +function read_ras_event_all_cpus(). + +BTW, make setting buffer_percent as a separate function. + +Fixes: 94750bcf9309 ("rasdaemon: Fix poll() on per_cpu trace_pipe_raw blocks indefinitely") +Signed-off-by: Xiaofei Tan +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 45 ++++++++++++++++++++++++++++++--------------- + 1 file changed, 30 insertions(+), 15 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index a5ff661..fc8faec 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -366,10 +366,37 @@ static int get_num_cpus(struct ras_events *ras) + #endif + } + ++static int set_buffer_percent(struct ras_events *ras, int percent) ++{ ++ char buf[16]; ++ ssize_t size; ++ int res = 0; ++ int fd; ++ ++ fd = open_trace(ras, "buffer_percent", O_WRONLY); ++ if (fd >= 0) { ++ /* For the backward compatibility to the old kernels, do not return ++ * if fail to set the buffer_percent. ++ */ ++ snprintf(buf, sizeof(buf), "%d", percent); ++ size = write(fd, buf, strlen(buf)); ++ if (size <= 0) { ++ log(TERM, LOG_WARNING, "can't write to buffer_percent\n"); ++ res = -1; ++ } ++ close(fd); ++ } else { ++ log(TERM, LOG_WARNING, "Can't open buffer_percent\n"); ++ res = -1; ++ } ++ ++ return res; ++} ++ + static int read_ras_event_all_cpus(struct pthread_data *pdata, + unsigned n_cpus) + { +- unsigned size; ++ ssize_t size; + unsigned long long time_stamp; + void *data; + int ready, i, count_nready; +@@ -381,8 +408,6 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + int warnonce[n_cpus]; + char pipe_raw[PATH_MAX]; + int legacy_kernel = 0; +- int fd; +- char buf[16]; + #if 0 + int need_sleep = 0; + #endif +@@ -409,18 +434,8 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + * Set buffer_percent to 0 so that poll() will return immediately + * when the trace data is available in the ras per_cpu trace pipe_raw + */ +- fd = open_trace(pdata[0].ras, "buffer_percent", O_WRONLY); +- if (fd >= 0) { +- /* For the backward compatibility to the old kernels, do not return +- * if fail to set the buffer_percent. +- */ +- snprintf(buf, sizeof(buf), "0"); +- size = write(fd, buf, strlen(buf)); +- if (size <= 0) +- log(TERM, LOG_WARNING, "can't write to buffer_percent\n"); +- close(fd); +- } else +- log(TERM, LOG_WARNING, "Can't open buffer_percent\n"); ++ if (set_buffer_percent(pdata[0].ras, 0)) ++ log(TERM, LOG_WARNING, "Set buffer_percent failed\n"); + + for (i = 0; i < (n_cpus + 1); i++) + fds[i].fd = -1; +-- +2.33.1 + diff --git a/1035-rasdaemon-fix-issue-of-signed-and-unsigned-integer-c.patch b/1035-rasdaemon-fix-issue-of-signed-and-unsigned-integer-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..594b0c03d75c0ff41f766702a6a37b58ba021c59 --- /dev/null +++ b/1035-rasdaemon-fix-issue-of-signed-and-unsigned-integer-c.patch @@ -0,0 +1,110 @@ +From 5727ef175dcafa012e04c8bc991d876ea29bbc66 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Tue, 30 May 2023 11:44:12 +0100 +Subject: [PATCH 35/85] rasdaemon: fix issue of signed and unsigned integer + comparison and remove redundant header file + +1. The return value of ARRAY_SIZE() is unsigned integer. It isn't right to +compare it with a signed integer. This patch fix them. + +2. Remove redundant header file and adjust the header files sequence. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisi_hip08.c | 2 +- + non-standard-hisilicon.c | 8 ++++---- + ras-diskerror-handler.c | 2 +- + ras-memory-failure-handler.c | 5 ++--- + 4 files changed, 8 insertions(+), 9 deletions(-) + +diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c +index 4ef47ea..61f12eb 100644 +--- a/non-standard-hisi_hip08.c ++++ b/non-standard-hisi_hip08.c +@@ -1029,7 +1029,7 @@ static struct ras_ns_ev_decoder hip08_ns_ev_decoder[] = { + + static void __attribute__((constructor)) hip08_init(void) + { +- int i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(hip08_ns_ev_decoder); i++) + register_ns_ev_decoder(&hip08_ns_ev_decoder[i]); +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 2b00ed6..721821e 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -366,13 +366,13 @@ static int decode_hisi_common_section(struct ras_events *ras, + trace_seq_printf(s, "%s\n", hevent.error_msg); + + if (err->val_bits & BIT(HISI_COMMON_VALID_REG_ARRAY_SIZE) && err->reg_array_size > 0) { +- int i; ++ unsigned int i; + + trace_seq_printf(s, "Register Dump:\n"); + for (i = 0; i < err->reg_array_size / sizeof(uint32_t); i++) { +- trace_seq_printf(s, "reg%02d=0x%08x\n", i, ++ trace_seq_printf(s, "reg%02u=0x%08x\n", i, + err->reg_array[i]); +- HISI_SNPRINTF(hevent.reg_msg, "reg%02d=0x%08x", ++ HISI_SNPRINTF(hevent.reg_msg, "reg%02u=0x%08x", + i, err->reg_array[i]); + } + } +@@ -398,7 +398,7 @@ static struct ras_ns_ev_decoder hisi_section_ns_ev_decoder[] = { + + static void __attribute__((constructor)) hisi_ns_init(void) + { +- int i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(hisi_section_ns_ev_decoder); i++) + register_ns_ev_decoder(&hisi_section_ns_ev_decoder[i]); +diff --git a/ras-diskerror-handler.c b/ras-diskerror-handler.c +index b16319f..b46f859 100644 +--- a/ras-diskerror-handler.c ++++ b/ras-diskerror-handler.c +@@ -52,7 +52,7 @@ static const struct { + + static const char *get_blk_error(int err) + { +- int i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(blk_errors); i++) + if (blk_errors[i].error == err) +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index adbd736..4798ead 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -16,7 +16,6 @@ + #include + #include + #include "libtrace/kbuffer.h" +-#include "ras-memory-failure-handler.h" + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-report.h" +@@ -93,7 +92,7 @@ static const struct { + + static const char *get_page_type(int page_type) + { +- int i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(mf_page_type); i++) + if (mf_page_type[i].type == page_type) +@@ -104,7 +103,7 @@ static const char *get_page_type(int page_type) + + static const char *get_action_result(int result) + { +- int i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(mf_action_result); i++) + if (mf_action_result[i].result == result) +-- +2.33.1 + diff --git a/1036-rasdaemon-Add-support-for-creating-the-vendor-error-.patch b/1036-rasdaemon-Add-support-for-creating-the-vendor-error-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8b995cf06163e03d75c2c6294facd9b841907d29 --- /dev/null +++ b/1036-rasdaemon-Add-support-for-creating-the-vendor-error-.patch @@ -0,0 +1,343 @@ +From eae25e65c6dbbd21797c42c490c427754943d688 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 31 May 2023 16:24:36 +0100 +Subject: [PATCH 36/85] rasdaemon: Add support for creating the vendor error + tables at startup + +1. Support for create/open the vendor error tables at rasdaemon startup. +2. Make changes in the HiSilicon error handling code for the same. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + non-standard-hisi_hip08.c | 76 ++++++++++++++++++++++---------------- + non-standard-hisilicon.c | 28 +++++++++----- + ras-events.c | 17 ++++++++- + ras-non-standard-handler.c | 35 +++++++++++++++++- + ras-non-standard-handler.h | 3 ++ + 5 files changed, 116 insertions(+), 43 deletions(-) + +diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c +index 61f12eb..0899812 100644 +--- a/non-standard-hisi_hip08.c ++++ b/non-standard-hisi_hip08.c +@@ -654,6 +654,20 @@ static void decode_oem_type1_err_regs(struct ras_ns_ev_decoder *ev_decoder, + step_vendor_data_tab(ev_decoder, "hip08_oem_type1_event_tab"); + } + ++static int add_hip08_oem_type1_table(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder) ++{ ++#ifdef HAVE_SQLITE3 ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &hip08_oem_type1_event_tab) != SQLITE_OK) { ++ log(TERM, LOG_WARNING, "Failed to create sql hip08_oem_type1_event_tab\n"); ++ return -1; ++ } ++ } ++#endif ++ return 0; ++} ++ + /* error data decoding functions */ + static int decode_hip08_oem_type1_error(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, +@@ -669,17 +683,6 @@ static int decode_hip08_oem_type1_error(struct ras_events *ras, + return -1; + } + +-#ifdef HAVE_SQLITE3 +- if (ras->record_events && !ev_decoder->stmt_dec_record) { +- if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, +- &hip08_oem_type1_event_tab) +- != SQLITE_OK) { +- trace_seq_printf(s, +- "create sql hip08_oem_type1_event_tab fail\n"); +- return -1; +- } +- } +-#endif + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HIP08_OEM_TYPE1_FIELD_TIMESTAMP, + 0, event->timestamp); +@@ -827,6 +830,20 @@ static void decode_oem_type2_err_regs(struct ras_ns_ev_decoder *ev_decoder, + step_vendor_data_tab(ev_decoder, "hip08_oem_type2_event_tab"); + } + ++static int add_hip08_oem_type2_table(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder) ++{ ++#ifdef HAVE_SQLITE3 ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &hip08_oem_type2_event_tab) != SQLITE_OK) { ++ log(TERM, LOG_WARNING, "Failed to create sql hip08_oem_type2_event_tab\n"); ++ return -1; ++ } ++ } ++#endif ++ return 0; ++} ++ + static int decode_hip08_oem_type2_error(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, +@@ -841,16 +858,6 @@ static int decode_hip08_oem_type2_error(struct ras_events *ras, + return -1; + } + +-#ifdef HAVE_SQLITE3 +- if (ras->record_events && !ev_decoder->stmt_dec_record) { +- if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, +- &hip08_oem_type2_event_tab) != SQLITE_OK) { +- trace_seq_printf(s, +- "create sql hip08_oem_type2_event_tab fail\n"); +- return -1; +- } +- } +-#endif + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HIP08_OEM_TYPE2_FIELD_TIMESTAMP, + 0, event->timestamp); +@@ -977,6 +984,20 @@ static void decode_pcie_local_err_regs(struct ras_ns_ev_decoder *ev_decoder, + step_vendor_data_tab(ev_decoder, "hip08_pcie_local_event_tab"); + } + ++static int add_hip08_pcie_local_table(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder) ++{ ++#ifdef HAVE_SQLITE3 ++ if (ras->record_events && !ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &hip08_pcie_local_event_tab) != SQLITE_OK) { ++ log(TERM, LOG_WARNING, "Failed to create sql hip08_pcie_local_event_tab\n"); ++ return -1; ++ } ++ } ++#endif ++ return 0; ++} ++ + static int decode_hip08_pcie_local_error(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, +@@ -991,16 +1012,6 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, + return -1; + } + +-#ifdef HAVE_SQLITE3 +- if (ras->record_events && !ev_decoder->stmt_dec_record) { +- if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, +- &hip08_pcie_local_event_tab) != SQLITE_OK) { +- trace_seq_printf(s, +- "create sql hip08_pcie_local_event_tab fail\n"); +- return -1; +- } +- } +-#endif + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HIP08_PCIE_LOCAL_FIELD_TIMESTAMP, + 0, event->timestamp); +@@ -1015,14 +1026,17 @@ static int decode_hip08_pcie_local_error(struct ras_events *ras, + static struct ras_ns_ev_decoder hip08_ns_ev_decoder[] = { + { + .sec_type = "1f8161e1-55d6-41e6-bd10-7afd1dc5f7c5", ++ .add_table = add_hip08_oem_type1_table, + .decode = decode_hip08_oem_type1_error, + }, + { + .sec_type = "45534ea6-ce23-4115-8535-e07ab3aef91d", ++ .add_table = add_hip08_oem_type2_table, + .decode = decode_hip08_oem_type2_error, + }, + { + .sec_type = "b2889fc9-e7d7-4f9d-a867-af42e98be772", ++ .add_table = add_hip08_pcie_local_table, + .decode = decode_hip08_pcie_local_error, + }, + }; +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 721821e..7296d28 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -341,6 +341,23 @@ static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, + HISI_SNPRINTF(event->error_msg, "]"); + } + ++static int add_hisi_common_table(struct ras_events *ras, ++ struct ras_ns_ev_decoder *ev_decoder) ++{ ++#ifdef HAVE_SQLITE3 ++ if (ras->record_events && ++ !ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &hisi_common_section_tab) != SQLITE_OK) { ++ log(TERM, LOG_WARNING, "Failed to create sql hisi_common_section_tab\n"); ++ return -1; ++ } ++ } ++#endif ++ ++ return 0; ++} ++ + static int decode_hisi_common_section(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, +@@ -350,16 +367,6 @@ static int decode_hisi_common_section(struct ras_events *ras, + (struct hisi_common_error_section *)event->error; + struct hisi_event hevent; + +-#ifdef HAVE_SQLITE3 +- if (ras->record_events && !ev_decoder->stmt_dec_record) { +- if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, +- &hisi_common_section_tab) != SQLITE_OK) { +- trace_seq_printf(s, "create sql hisi_common_section_tab fail\n"); +- return -1; +- } +- } +-#endif +- + memset(&hevent, 0, sizeof(struct hisi_event)); + trace_seq_printf(s, "\nHisilicon Common Error Section:\n"); + decode_hisi_common_section_hdr(ev_decoder, err, &hevent); +@@ -392,6 +399,7 @@ static int decode_hisi_common_section(struct ras_events *ras, + static struct ras_ns_ev_decoder hisi_section_ns_ev_decoder[] = { + { + .sec_type = "c8b328a8-9917-4af6-9a13-2e08ab2e7586", ++ .add_table = add_hisi_common_table, + .decode = decode_hisi_common_section, + }, + }; +diff --git a/ras-events.c b/ras-events.c +index fc8faec..8d70e43 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -472,6 +472,10 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + if (pdata[0].ras->record_events) { + if (ras_mc_event_opendb(pdata[0].cpu, pdata[0].ras)) + goto error; ++#ifdef HAVE_NON_STANDARD ++ if (ras_ns_add_vendor_tables(pdata[0].ras)) ++ log(TERM, LOG_ERR, "Can't add vendor table\n"); ++#endif + } + + do { +@@ -556,8 +560,12 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + "Old kernel detected. Stop listening and fall back to pthread way.\n"); + + cleanup: +- if (pdata[0].ras->record_events) ++ if (pdata[0].ras->record_events) { ++#ifdef HAVE_NON_STANDARD ++ ras_ns_finalize_vendor_tables(); ++#endif + ras_mc_event_closedb(pdata[0].cpu, pdata[0].ras); ++ } + + error: + kbuffer_free(kbuf); +@@ -654,6 +662,10 @@ static void *handle_ras_events_cpu(void *priv) + free(page); + return 0; + } ++#ifdef HAVE_NON_STANDARD ++ if (ras_ns_add_vendor_tables(pdata->ras)) ++ log(TERM, LOG_ERR, "Can't add vendor table\n"); ++#endif + pthread_mutex_unlock(&pdata->ras->db_lock); + } + +@@ -661,6 +673,9 @@ static void *handle_ras_events_cpu(void *priv) + + if (pdata->ras->record_events) { + pthread_mutex_lock(&pdata->ras->db_lock); ++#ifdef HAVE_NON_STANDARD ++ ras_ns_finalize_vendor_tables(); ++#endif + ras_mc_event_closedb(pdata->cpu, pdata->ras); + pthread_mutex_unlock(&pdata->ras->db_lock); + } +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 8efb660..762993b 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -75,6 +75,32 @@ int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) + return 0; + } + ++int ras_ns_add_vendor_tables(struct ras_events *ras) ++{ ++ struct ras_ns_ev_decoder *ns_ev_decoder; ++ int error = 0; ++ ++#ifdef HAVE_SQLITE3 ++ if (!ras) ++ return -1; ++ ++ ns_ev_decoder = ras_ns_ev_dec_list; ++ while (ns_ev_decoder) { ++ if (ns_ev_decoder->add_table && !ns_ev_decoder->stmt_dec_record) { ++ error = ns_ev_decoder->add_table(ras, ns_ev_decoder); ++ if (error) ++ break; ++ } ++ ns_ev_decoder = ns_ev_decoder->next; ++ } ++ ++ if (error) ++ return -1; ++#endif ++ ++ return 0; ++} ++ + static int find_ns_ev_decoder(const char *sec_type, struct ras_ns_ev_decoder **p_ns_ev_dec) + { + struct ras_ns_ev_decoder *ns_ev_decoder; +@@ -96,7 +122,7 @@ static int find_ns_ev_decoder(const char *sec_type, struct ras_ns_ev_decoder **p + return 0; + } + +-static void unregister_ns_ev_decoder(void) ++void ras_ns_finalize_vendor_tables(void) + { + #ifdef HAVE_SQLITE3 + struct ras_ns_ev_decoder *ns_ev_decoder = ras_ns_ev_dec_list; +@@ -108,6 +134,13 @@ static void unregister_ns_ev_decoder(void) + } + ns_ev_decoder = ns_ev_decoder->next; + } ++#endif ++} ++ ++static void unregister_ns_ev_decoder(void) ++{ ++#ifdef HAVE_SQLITE3 ++ ras_ns_finalize_vendor_tables(); + #endif + ras_ns_ev_dec_list = NULL; + } +diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h +index 393b756..834f84a 100644 +--- a/ras-non-standard-handler.h ++++ b/ras-non-standard-handler.h +@@ -20,6 +20,7 @@ + struct ras_ns_ev_decoder { + struct ras_ns_ev_decoder *next; + const char *sec_type; ++ int (*add_table)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder); + int (*decode)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, struct ras_non_standard_event *event); + #ifdef HAVE_SQLITE3 +@@ -36,6 +37,8 @@ void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index); + + #ifdef HAVE_NON_STANDARD + int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder); ++int ras_ns_add_vendor_tables(struct ras_events *ras); ++void ras_ns_finalize_vendor_tables(void); + #else + static inline int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) { return 0; }; + #endif +-- +2.33.1 + diff --git a/1037-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch b/1037-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch new file mode 100644 index 0000000000000000000000000000000000000000..95d7ce5bad0b51039a3f71c4ab0f8997b757fc6b --- /dev/null +++ b/1037-rasdaemon-Fix-for-vendor-errors-are-not-recorded-in-.patch @@ -0,0 +1,102 @@ +From de5ee630f0009195e115e478e3bb79f6e9fc3a9a Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Wed, 20 Mar 2024 12:16:05 +0000 +Subject: [PATCH 37/85] rasdaemon: Fix for vendor errors are not recorded in + the SQLite database if some cpus are offline + +Fix for vendor errors are not recorded in the SQLite database if some cpus +are offline at the system start. + +Issue: + +This issue is reproducible by offline some cpus, run +./rasdaemon -f --record & and +inject vendor specific error supported in the rasdaemon. + +Reason: + +When the system starts with some of the cpus offline and then run +the rasdaemon, read_ras_event_all_cpus() exit with error and switch to +the multi thread way. However read() in read_ras_event() return error in +threads for each of the offline CPUs and does clean up including calling +ras_ns_finalize_vendor_tables(), which invokes sqlite3_finalize() on vendor +tables created. Thus the vendor error data does not stored in the SQLite +database when such error is reported next time. + +Solution: + +In ras_ns_add_vendor_tables() and ras_ns_finalize_vendor_tables() use +reference count and close vendor tables which created in ras_ns_add_vendor_tables() +based on the reference count. + +Reported-by: Junhao He +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + ras-non-standard-handler.c | 16 ++++++++++++++++ + ras-non-standard-handler.h | 1 + + 2 files changed, 17 insertions(+) + +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 762993b..3a4e300 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -65,6 +65,7 @@ int register_ns_ev_decoder(struct ras_ns_ev_decoder *ns_ev_decoder) + #endif + if (!ras_ns_ev_dec_list) { + ras_ns_ev_dec_list = ns_ev_decoder; ++ ras_ns_ev_dec_list->ref_count = 0; + } else { + list = ras_ns_ev_dec_list; + while (list->next) +@@ -85,6 +86,8 @@ int ras_ns_add_vendor_tables(struct ras_events *ras) + return -1; + + ns_ev_decoder = ras_ns_ev_dec_list; ++ if (ras_ns_ev_dec_list) ++ ras_ns_ev_dec_list->ref_count++; + while (ns_ev_decoder) { + if (ns_ev_decoder->add_table && !ns_ev_decoder->stmt_dec_record) { + error = ns_ev_decoder->add_table(ras, ns_ev_decoder); +@@ -127,6 +130,16 @@ void ras_ns_finalize_vendor_tables(void) + #ifdef HAVE_SQLITE3 + struct ras_ns_ev_decoder *ns_ev_decoder = ras_ns_ev_dec_list; + ++ if (!ras_ns_ev_dec_list) ++ return; ++ ++ if (ras_ns_ev_dec_list->ref_count > 0) ++ ras_ns_ev_dec_list->ref_count--; ++ else ++ return; ++ if (ras_ns_ev_dec_list->ref_count > 0) ++ return; ++ + while (ns_ev_decoder) { + if (ns_ev_decoder->stmt_dec_record) { + ras_mc_finalize_vendor_table(ns_ev_decoder->stmt_dec_record); +@@ -140,6 +153,9 @@ void ras_ns_finalize_vendor_tables(void) + static void unregister_ns_ev_decoder(void) + { + #ifdef HAVE_SQLITE3 ++ if (!ras_ns_ev_dec_list) ++ return; ++ ras_ns_ev_dec_list->ref_count = 1; + ras_ns_finalize_vendor_tables(); + #endif + ras_ns_ev_dec_list = NULL; +diff --git a/ras-non-standard-handler.h b/ras-non-standard-handler.h +index 834f84a..735ea76 100644 +--- a/ras-non-standard-handler.h ++++ b/ras-non-standard-handler.h +@@ -19,6 +19,7 @@ + + struct ras_ns_ev_decoder { + struct ras_ns_ev_decoder *next; ++ uint16_t ref_count; + const char *sec_type; + int (*add_table)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder); + int (*decode)(struct ras_events *ras, struct ras_ns_ev_decoder *ev_decoder, +-- +2.33.1 + diff --git a/1038-rasdaemon-Update-SMCA-bank-error-descriptions.patch b/1038-rasdaemon-Update-SMCA-bank-error-descriptions.patch new file mode 100644 index 0000000000000000000000000000000000000000..076231e65bffb69d58e86ee0ed442cca5b44d07c --- /dev/null +++ b/1038-rasdaemon-Update-SMCA-bank-error-descriptions.patch @@ -0,0 +1,139 @@ +From 70ce037486c1c166a253bc0015f6efd1e7f0955a Mon Sep 17 00:00:00 2001 +From: Avadhut Naik +Date: Fri, 10 May 2024 13:20:19 -0500 +Subject: [PATCH 38/85] rasdaemon: Update SMCA bank error descriptions + +Update error descriptions of SMCA bank types to support AMD's new Family +1Ah-based processors. +Also, modify some existing error descriptions to better reflect the error +received. + +Signed-off-by: Avadhut Naik +Signed-off-by: Mauro Carvalho Chehab +--- + mce-amd-smca.c | 32 +++++++++++++++++++++++++++++--- + 1 file changed, 29 insertions(+), 3 deletions(-) + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 233fa0a..3fd97e0 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -108,7 +108,7 @@ static const char * const smca_ls_mce_desc[] = { + "Store queue parity", + "Miss address buffer payload parity", + "L1 TLB parity", +- "Reserved", ++ "DC Tag error type 5", + "DC tag error type 6", + "DC tag error type 1", + "Internal error type 1", +@@ -125,6 +125,12 @@ static const char * const smca_ls_mce_desc[] = { + "DC tag error type 3", + "DC tag error type 5", + "L2 fill data error", ++ "Error on SCB cacheline state or address field", ++ "Error on SCB data, commit pipe 0", ++ "Error on SCB data, commit pipe 1", ++ "Error on SCB data for non-cacheable DRAM or IO", ++ "System Read Data Error detected by write combine buffer", ++ "Hardware Asserts", + }; + + static const char * const smca_ls2_mce_desc[] = { +@@ -168,7 +174,7 @@ static const char * const smca_if_mce_desc[] = { + "BP L1-BTB Multi-Hit Error", + "BP L2-BTB Multi-Hit Error", + "L2 Cache Response Poison error", +- "L2 Cache Error Response", ++ "System Read Data error", + "Hardware Assertion Error", + "L1-TLB Multi-Hit", + "L2-TLB Multi-Hit", +@@ -182,6 +188,7 @@ static const char * const smca_l2_mce_desc[] = { + "L2M Data Array ECC Error", + "Hardware Assert Error", + "SDP Read Response Parity Error", ++ "Error initiated by programmable state machine", + }; + + static const char * const smca_de_mce_desc[] = { +@@ -193,7 +200,7 @@ static const char * const smca_de_mce_desc[] = { + "Fetch address FIFO parity error", + "Patch RAM data parity error", + "Patch RAM sequencer parity error", +- "Micro-op buffer parity error", ++ "Micro-op fetch queue parity error", + "Hardware Assertion MCA Error", + }; + +@@ -235,6 +242,7 @@ static const char * const smca_l3_mce_desc[] = { + "L3 victim queue Data Fabric error", + "L3 Hardware Assertion", + "XI WCB Parity Poison Creation event", ++ "Machine check error initiated by DSM action", + }; + + static const char * const smca_cs_mce_desc[] = { +@@ -268,6 +276,9 @@ static const char * const smca_cs2_mce_desc[] = { + "Address Violation on the no data channel", + "Security Violation on the no data channel", + "Hardware Assert Error", ++ "Shadow Tag Array Protocol Error", ++ "Shadow Tag ECC Error", ++ "Shadow Tag Transaction Error", + }; + + /* +@@ -303,6 +314,8 @@ static const char * const smca_pie_mce_desc[] = { + "A deferred error was detected in the DF", + "Watch Dog Timer", + "An SRAM ECC error was detected in the CNLI block", ++ "Register access during DF Cstate", ++ "DSM Error", + }; + + static const char * const smca_umc_mce_desc[] = { +@@ -318,6 +331,11 @@ static const char * const smca_umc_mce_desc[] = { + "ECS Error", + "UMC Throttling Error", + "Read CRC Error", ++ "Reserved", ++ "Reserved", ++ "Reserved", ++ "Reserved", ++ "RFM SRAM ECC error", + }; + + static const char * const smca_umc_quirk_mce_desc[] = { +@@ -391,6 +409,12 @@ static const char * const smca_psp2_mce_desc[] = { + "TLB Bank 0 parity error", + "TLB Bank 1 parity error", + "System Hub Read Buffer ECC or parity error", ++ "FUSE IP SRAM ECC or parity error", ++ "PCRU FUSE SRAM ECC or parity error", ++ "SIB SRAM parity error", ++ "mpASP SECEMC Error", ++ "mpASP A5 Hang", ++ "SIB WDT error", + }; + + static const char * const smca_smu_mce_desc[] = { +@@ -430,6 +454,7 @@ static const char * const smca_mp5_mce_desc[] = { + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", ++ "Fuse SRAM ECC or parity error", + }; + + static const char * const smca_mpdma_mce_desc[] = { +@@ -482,6 +507,7 @@ static const char * const smca_mpdma_mce_desc[] = { + "MPDMA PTE Internal Data FIFO ECC or parity error", + "MPDMA PTE Command Memory DMA ECC or parity error", + "MPDMA PTE Command Memory Internal ECC or parity error", ++ "MPDMA TVF SDP Master Memory 7 ECC or parity error", + }; + + static const char * const smca_nbio_mce_desc[] = { +-- +2.33.1 + diff --git a/1039-rasdaemon-Add-Corrected-Internal-Error-for-aer_cor_e.patch b/1039-rasdaemon-Add-Corrected-Internal-Error-for-aer_cor_e.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2bb30e577a6e8b8f72f015f3cb958980ee0feda --- /dev/null +++ b/1039-rasdaemon-Add-Corrected-Internal-Error-for-aer_cor_e.patch @@ -0,0 +1,30 @@ +From dd0f10dd1a7c128b7c20f4d9bb1e469e3cbabacc Mon Sep 17 00:00:00 2001 +From: Jesus Esquivel +Date: Mon, 3 Jun 2024 16:47:20 -0600 +Subject: [PATCH 39/85] rasdaemon: Add Corrected Internal Error for + aer_cor_errors + +Add "Corrected Internal Error" for aer_cor_errors to decode +the error reported in status register in bit 14. + +Signed-off-by: Jesus Esquivel +Signed-off-by: Mauro Carvalho Chehab +--- + ras-aer-handler.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/ras-aer-handler.c b/ras-aer-handler.c +index 6f4cb2b..d6898e0 100644 +--- a/ras-aer-handler.c ++++ b/ras-aer-handler.c +@@ -35,6 +35,7 @@ static const char *aer_cor_errors[32] = { + [8] = "RELAY_NUM Rollover", + [12] = "Replay Timer Timeout", + [13] = "Advisory Non-Fatal", ++ [14] = "Corrected Internal Error", + }; + + /* bit field meaning for uncorrectable error */ +-- +2.33.1 + diff --git a/1040-anolis-ras-arm-handler-be-compatible-with-upstream-K.patch b/1040-anolis-ras-arm-handler-be-compatible-with-upstream-K.patch new file mode 100644 index 0000000000000000000000000000000000000000..7970d2fcfc544b8d989a9b53baa9c6ba2a5e9bd7 --- /dev/null +++ b/1040-anolis-ras-arm-handler-be-compatible-with-upstream-K.patch @@ -0,0 +1,123 @@ +From 26fa7f2122282920f4d3963db5f664d5f1a381a3 Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Tue, 25 Jun 2024 10:05:45 +0200 +Subject: [PATCH 40/85] anolis: ras-arm-handler: be compatible with upstream + Kernel + +Changeset e37eb2f11a82 ("Add code to decode Ampere specific error") +broke ARM event record with upstream Kernel, as it requires a different +trace event than the one that it is on upstream Kernel, and it is +part of a pending pull request: + + https://lore.kernel.org/all/20240321-b4-arm-ras-error-vendor-info-v5-rc3-v5-0-850f9bfb97a8@os.amperecomputing.com/ + +Restore its behavior by making parsing the UEFI 2.6+ N.17 and N.16 +table extra fields to be optional. That should make it compatible +with current upstream Kernels again. + +Fixes: e37eb2f11a82 ("Add code to decode Ampere specific error") +Signed-off-by: Mauro Carvalho Chehab +[Ruidong: do not use libtraceevent] +--- + ras-arm-handler.c | 78 ++++++++++++++++++++++++----------------------- + 1 file changed, 40 insertions(+), 38 deletions(-) + +diff --git a/ras-arm-handler.c b/ras-arm-handler.c +index abd8c9b..731176d 100644 +--- a/ras-arm-handler.c ++++ b/ras-arm-handler.c +@@ -207,51 +207,53 @@ int ras_arm_event_handler(struct trace_seq *s, + ev.psci_state = val; + trace_seq_printf(s, " psci_state: %d", ev.psci_state); + +- if (pevent_get_field_val(s, event, "pei_len", record, &val, 1) < 0) +- return -1; +- ev.pei_len = val; +- trace_seq_printf(s, " ARM Processor Err Info data len: %d\n", +- ev.pei_len); +- +- ev.pei_error = pevent_get_field_raw(s, event, "buf", record, &len, 1); +- if (!ev.pei_error) +- return -1; +- display_raw_data(s, ev.pei_error, ev.pei_len); +- +- if (pevent_get_field_val(s, event, "ctx_len", record, &val, 1) < 0) +- return -1; +- ev.ctx_len = val; +- trace_seq_printf(s, " ARM Processor Err Context Info data len: %d\n", +- ev.ctx_len); +- +- ev.ctx_error = pevent_get_field_raw(s, event, "buf1", record, &len, 1); +- if (!ev.ctx_error) +- return -1; +- display_raw_data(s, ev.ctx_error, ev.ctx_len); +- +- if (pevent_get_field_val(s, event, "oem_len", record, &val, 1) < 0) +- return -1; +- ev.oem_len = val; +- trace_seq_printf(s, " Vendor Specific Err Info data len: %d\n", +- ev.oem_len); +- +- ev.vsei_error = pevent_get_field_raw(s, event, "buf2", record, &len, 1); +- if (!ev.vsei_error) +- return -1; ++ // Upstream kKernels up to version 6.10 don't decode UEFI 2.6+ N.17 table ++ if (pevent_get_field_val(s, event, "pei_len", record, &val, 1) >= 0) { ++ ++ ev.pei_len = val; ++ trace_seq_printf(s, " ARM Processor Err Info data len: %d\n", ++ ev.pei_len); ++ ++ ev.pei_error = pevent_get_field_raw(s, event, "buf", record, &len, 1); ++ if (!ev.pei_error) ++ return -1; ++ display_raw_data(s, ev.pei_error, ev.pei_len); ++ ++ if (pevent_get_field_val(s, event, "ctx_len", record, &val, 1) < 0) ++ return -1; ++ ev.ctx_len = val; ++ trace_seq_printf(s, " ARM Processor Err Context Info data len: %d\n", ++ ev.ctx_len); ++ ++ ev.ctx_error = pevent_get_field_raw(s, event, "buf1", record, &len, 1); ++ if (!ev.ctx_error) ++ return -1; ++ display_raw_data(s, ev.ctx_error, ev.ctx_len); ++ ++ if (pevent_get_field_val(s, event, "oem_len", record, &val, 1) < 0) ++ return -1; ++ ev.oem_len = val; ++ trace_seq_printf(s, " Vendor Specific Err Info data len: %d\n", ++ ev.oem_len); ++ ++ ev.vsei_error = pevent_get_field_raw(s, event, "buf2", record, &len, 1); ++ if (!ev.vsei_error) ++ return -1; + + #ifdef HAVE_AMP_NS_DECODE +- //decode ampere specific error +- decode_amp_payload0_err_regs(NULL, s, +- (struct amp_payload0_type_sec *)ev.vsei_error); ++ //decode ampere specific error ++ decode_amp_payload0_err_regs(NULL, s, ++ (struct amp_payload0_type_sec *)ev.vsei_error); + #else +- display_raw_data(s, ev.vsei_error, ev.oem_len); ++ display_raw_data(s, ev.vsei_error, ev.oem_len); + #endif +- + #ifdef HAVE_CPU_FAULT_ISOLATION +- if (ras_handle_cpu_error(s, record, event, &ev, now) < 0) +- return -1; ++ if (ras_handle_cpu_error(s, record, event, &ev, now) < 0) ++ printf("Can't do CPU fault isolation!\n"); + #endif + ++ } ++ + /* Insert data into the SGBD */ + #ifdef HAVE_SQLITE3 + ras_store_arm_record(ras, &ev); +-- +2.33.1 + diff --git a/1041-rasdaemon-add-mc_event-trigger.patch b/1041-rasdaemon-add-mc_event-trigger.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e3ae606e1b5868b11200ce785f71c9733ca03b2 --- /dev/null +++ b/1041-rasdaemon-add-mc_event-trigger.patch @@ -0,0 +1,382 @@ +From 2ec86db2ab6c31670ba038b175f53aba920b7fe3 Mon Sep 17 00:00:00 2001 +From: Ruidong Tian +Date: Thu, 23 Nov 2023 17:47:25 +0800 +Subject: [PATCH 41/85] rasdaemon: add mc_event trigger + +Allow users to run a trigger when RAS mc_event occurs, The mc_event +trigger is separated into CE trigger and UE trigger, this is because +CE is more frequent than UE, and the CE trigger will lead to more +performance hits. Users can choose different triggers for CE/UE to +reduce this effect. + +Users can config trigger in /etc/sysconfig/rasdaemon: + + TRIGGER_DIR: The trigger diretory + MC_CE_TRIGGER: The script executed when corrected error occurs. + MC_UE_TRIGGER: The script executed when uncorrected error occurs. + +No script will be executed if MC_CE_TRIGGER/MC_UE_TRIGGER is null. + +Signed-off-by: Ruidong Tian +Signed-off-by: Mauro Carvalho Chehab +--- + Makefile.am | 8 ++-- + contrib/mc_event_trigger | 24 ++++++++++++ + misc/rasdaemon.env | 18 ++++++++- + ras-events.c | 17 +++++++++ + ras-mc-handler.c | 81 ++++++++++++++++++++++++++++++++++++++++ + ras-mc-handler.h | 2 + + trigger.c | 60 +++++++++++++++++++++++++++++ + trigger.h | 13 +++++++ + 8 files changed, 218 insertions(+), 5 deletions(-) + create mode 100755 contrib/mc_event_trigger + create mode 100644 trigger.c + create mode 100644 trigger.h + +diff --git a/Makefile.am b/Makefile.am +index b16cf34..735d5a7 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -17,7 +17,7 @@ all-local: $(SYSTEMD_SERVICES) + + sbin_PROGRAMS = rasdaemon + rasdaemon_SOURCES = rasdaemon.c ras-events.c ras-mc-handler.c \ +- bitfield.c ++ bitfield.c trigger.c + if WITH_SQLITE3 + rasdaemon_SOURCES += ras-record.c + endif +@@ -77,7 +77,7 @@ include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ + ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ + ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ + non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ +- ras-cpu-isolation.h queue.h non-standard-yitian.h ++ ras-cpu-isolation.h queue.h non-standard-yitian.h trigger.h + + # This rule can't be called with more than one Makefile job (like make -j8) + # I can't figure out a way to fix that +@@ -104,6 +104,6 @@ upload: + # custom target + install-data-local: + $(install_sh) -d "$(DESTDIR)@sysconfdir@/ras/dimm_labels.d" +-if WITH_MEMORY_CE_PFA ++ $(install_sh) -d "$(DESTDIR)@sysconfdir@/ras/triggers" + $(install_sh) @abs_srcdir@/misc/rasdaemon.env "$(DESTDIR)@SYSCONFDEFDIR@/rasdaemon" +-endif ++ $(install_sh) @abs_srcdir@/contrib/mc_event_trigger "$(DESTDIR)@sysconfdir@/ras/triggers/mc_event_trigger" +diff --git a/contrib/mc_event_trigger b/contrib/mc_event_trigger +new file mode 100755 +index 0000000..5c6ccfa +--- /dev/null ++++ b/contrib/mc_event_trigger +@@ -0,0 +1,24 @@ ++#!/bin/sh ++# This shell script can be executed by rasdaemon in daemon mode when a ++# mc_event is occured, environment variables include all information ++# reported by tracepoint. ++# ++# environment: ++# TIMESTAMP Timestamp when error occurred ++# COUNT Number of errors of the same type ++# TYPE Error type from Corrected/Uncorrected ++# MESSAGE Error message ++# LABEL Label of the affected DIMM(s) ++# MC_INDEX DIMM identifier from DMI/SMBIOS if available ++# TOP_LAYER Top layer of the error ++# MIDDLE_LAYER Middle layer of the error ++# LOWER_LAYER Low layer of the error ++# ADDRESS Error address ++# GRAIN Minimum granularity for an error report, in bytes ++# SYNDROME Syndrome of the error (or 0 if unknown or if the syndrome is not applicable) ++# DRIVER_DETAIL Other driver-specific detail about the error ++# ++ ++[ -x ./mc_event_trigger.local ] && . ./mc_event_trigger.local ++ ++exit 0 +diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env +index 7cb18e8..3389a73 100644 +--- a/misc/rasdaemon.env ++++ b/misc/rasdaemon.env +@@ -43,4 +43,20 @@ CPU_CE_THRESHOLD="18" + CPU_ISOLATION_CYCLE="24h" + + # Prevent excessive isolation from causing an avalanche effect +-CPU_ISOLATION_LIMIT="10" +\ No newline at end of file ++CPU_ISOLATION_LIMIT="10" ++ ++# Event Trigger ++ ++# Event trigger will be executed when the specified event occurs. ++# ++# Execute triggers path ++# For example: TRIGGER_DIR=/etc/ras/triggers ++TRIGGER_DIR= ++ ++# Execute these triggers when the mc_event occured, the triggers will not ++# be executed if the trigger is not specified. ++# For example: ++# MC_CE_TRIGGER=mc_event_trigger ++# MC_UE_TRIGGER=mc_event_trigger ++MC_CE_TRIGGER= ++MC_UE_TRIGGER= +diff --git a/ras-events.c b/ras-events.c +index 8d70e43..b071de9 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -42,6 +42,7 @@ + #include "ras-logger.h" + #include "ras-page-isolation.h" + #include "ras-cpu-isolation.h" ++#include "trigger.h" + + /* + * Polling time, if read() doesn't block. Currently, trace_pipe_raw never +@@ -59,6 +60,10 @@ + + extern char* choices_disable; + ++const static struct event_trigger event_triggers[] = { ++ { "mc_event", &mc_event_trigger_setup }, ++}; ++ + static int get_debugfs_dir(char *tracing_dir, size_t len) + { + FILE *fp; +@@ -257,6 +262,16 @@ free_ras: + return rc; + } + ++static void setup_event_trigger(char *event) ++{ ++ struct event_trigger trigger; ++ for (int i = 0; i < ARRAY_SIZE(event_triggers); i++) { ++ trigger = event_triggers[i]; ++ if (!strcmp(event, trigger.name)) ++ trigger.setup(); ++ } ++} ++ + /* + * Set kernel filter. libtrace doesn't provide an API for setting filters + * in kernel, we have to implement it here. +@@ -842,6 +857,8 @@ static int add_event_handler(struct ras_events *ras, struct pevent *pevent, + return EINVAL; + } + ++ setup_event_trigger(event); ++ + log(ALL, LOG_INFO, "Enabled event %s:%s\n", group, event); + + return 0; +diff --git a/ras-mc-handler.c b/ras-mc-handler.c +index 42b05cd..d857ca3 100644 +--- a/ras-mc-handler.c ++++ b/ras-mc-handler.c +@@ -15,16 +15,91 @@ + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ ++#define _GNU_SOURCE + #include + #include + #include + #include + #include "libtrace/kbuffer.h" ++#include + #include "ras-mc-handler.h" + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-page-isolation.h" + #include "ras-report.h" ++#include "trigger.h" ++ ++#define MAX_ENV 30 ++static char *mc_ce_trigger; ++static char *mc_ue_trigger; ++ ++void mc_event_trigger_setup(void) ++{ ++ mc_ce_trigger = getenv("MC_CE_TRIGGER"); ++ if (!mc_ce_trigger || !strcmp(mc_ce_trigger, "") ++ || trigger_check(mc_ce_trigger) < 0) { ++ log(SYSLOG, LOG_ERR, "Cannot access mc_event ce trigger `%s`\n", ++ mc_ce_trigger); ++ } else ++ log(SYSLOG, LOG_INFO, "Setup mc_event ce trigger `%s`\n", ++ mc_ce_trigger); ++ ++ mc_ue_trigger = getenv("MC_UE_TRIGGER"); ++ if (!mc_ue_trigger || !strcmp(mc_ue_trigger, "") ++ || trigger_check(mc_ue_trigger) < 0) { ++ log(SYSLOG, LOG_ERR, "Cannot access mc_event ue trigger `%s`\n", ++ mc_ue_trigger); ++ } else ++ log(SYSLOG, LOG_INFO, "Setup mc_event ue trigger `%s`\n", ++ mc_ue_trigger); ++} ++ ++static void run_mc_trigger(struct ras_mc_event *ev, const char *mc_trigger) ++{ ++ char *env[MAX_ENV]; ++ int ei = 0; ++ int i; ++ ++ if (!mc_trigger || !strcmp(mc_trigger, "")) ++ return; ++ ++ if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0) ++ goto free; ++ if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "COUNT=%d", ev->error_count) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "TYPE=%s", ev->error_type) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "MESSAGE=%s", ev->msg) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "LABEL=%s", ev->label) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "MC_INDEX=%d", ev->mc_index) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "TOP_LAYER=%d", ev->top_layer) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "MIDDLE_LAYER=%d", ev->middle_layer) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "LOWER_LAYER=%d", ev->lower_layer) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "ADDRESS=%llx", ev->address) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "GRAIN=%lld", ev->grain) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "SYNDROME=%llx", ev->syndrome) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "DRIVER_DETAIL=%s", ev->driver_detail) < 0) ++ goto free; ++ env[ei] = NULL; ++ assert(ei < MAX_ENV); ++ ++ run_trigger(mc_trigger, NULL, env, "mc_event"); ++ ++free: ++ for (i = 0; i < ei; i++) ++ free(env[i]); ++} + + int ras_mc_event_handler(struct trace_seq *s, + struct pevent_record *record, +@@ -195,6 +270,12 @@ int ras_mc_event_handler(struct trace_seq *s, + ras_report_mc_event(ras, &ev); + #endif + ++ if (!strcmp(ev.error_type, "Corrected")) ++ run_mc_trigger(&ev, mc_ce_trigger); ++ ++ if (!strcmp(ev.error_type, "Uncorrected")) ++ run_mc_trigger(&ev, mc_ue_trigger); ++ + return 0; + + parse_error: +diff --git a/ras-mc-handler.h b/ras-mc-handler.h +index 2e3dfc5..dc5c545 100644 +--- a/ras-mc-handler.h ++++ b/ras-mc-handler.h +@@ -22,6 +22,8 @@ + #include "ras-events.h" + #include "libtrace/event-parse.h" + ++void mc_event_trigger_setup(void); ++ + int ras_mc_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context); +diff --git a/trigger.c b/trigger.c +new file mode 100644 +index 0000000..95fb8ca +--- /dev/null ++++ b/trigger.c +@@ -0,0 +1,60 @@ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include "ras-logger.h" ++#include "trigger.h" ++ ++void run_trigger(const char *trigger, char *argv[], char **env, const char* reporter) ++{ ++ pid_t child; ++ char *path; ++ int status; ++ char *trigger_dir = getenv("TRIGGER_DIR"); ++ ++ log(SYSLOG, LOG_INFO, "Running trigger `%s' (reporter: %s)\n", trigger, reporter); ++ ++ if (asprintf(&path, "%s/%s", trigger_dir, trigger) < 0) ++ return; ++ ++ child = fork(); ++ if (child < 0) { ++ log(SYSLOG, LOG_ERR, "Cannot create process for trigger"); ++ return; ++ } ++ ++ if (child == 0) { ++ execve(path, argv, env); ++ _exit(127); ++ } else { ++ waitpid(child, &status, 0); ++ if (WIFEXITED(status) && WEXITSTATUS(status)) { ++ log(SYSLOG, LOG_INFO, "Trigger %s exited with status %d", ++ trigger, WEXITSTATUS(status)); ++ } else if (WIFSIGNALED(status)) { ++ log(SYSLOG, LOG_INFO, "Trigger %s killed by signal %d", ++ trigger, WTERMSIG(status)); ++ } ++ } ++} ++ ++int trigger_check(char *s) ++{ ++ char *name; ++ int rc; ++ char *trigger_dir = getenv("TRIGGER_DIR"); ++ ++ if (trigger_dir) { ++ if (asprintf(&name, "%s/%s", trigger_dir, s) < 0) ++ return -1; ++ } else ++ name = s; ++ ++ rc = access(name, R_OK|X_OK); ++ ++ if (trigger_dir) ++ free(name); ++ ++ return rc; ++} +diff --git a/trigger.h b/trigger.h +new file mode 100644 +index 0000000..556a7f2 +--- /dev/null ++++ b/trigger.h +@@ -0,0 +1,13 @@ ++#ifndef __TRIGGER_H__ ++#define __TRIGGER_H__ ++ ++struct event_trigger { ++ const char *name; ++ void (*setup)(void); ++}; ++ ++int trigger_check(char *s); ++void run_trigger(const char *trigger, char *argv[], char **env, const char* reporter); ++ ++ ++#endif +-- +2.33.1 + diff --git a/1042-ras-mc-handler-cleanup-trigger-logic.patch b/1042-ras-mc-handler-cleanup-trigger-logic.patch new file mode 100644 index 0000000000000000000000000000000000000000..15f4cb985b91b44b2fdaf1301a436a99e2f41ea7 --- /dev/null +++ b/1042-ras-mc-handler-cleanup-trigger-logic.patch @@ -0,0 +1,190 @@ +From dcfa32bc1266fa0eaa52a9b42aeff62e5a947cdd Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Tue, 16 Jul 2024 07:38:13 +0200 +Subject: [PATCH 42/85] ras-mc-handler: cleanup trigger logic + +- Only setup mc_ce_trigger/mc_ue_trigger if the trigger is + valid; + +- Check if the trigger is there before doing strcmp, as + checking if a pointer is not null is faster than strcmp(); + +- Ensure that the trigger env vars will be const, as we don't + want to accidentally override those env vars; + +- Print trigger enabled messages when rasdaemon runs with -f; + +- ensure that trigger variables will initialize to NULL; + +- coding style cleanups. + +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 3 ++- + ras-mc-handler.c | 60 +++++++++++++++++++++++++++++------------------- + trigger.c | 4 ++-- + trigger.h | 3 +-- + 4 files changed, 41 insertions(+), 29 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index b071de9..c2eb8f0 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -60,7 +60,7 @@ + + extern char* choices_disable; + +-const static struct event_trigger event_triggers[] = { ++static const struct event_trigger event_triggers[] = { + { "mc_event", &mc_event_trigger_setup }, + }; + +@@ -265,6 +265,7 @@ free_ras: + static void setup_event_trigger(char *event) + { + struct event_trigger trigger; ++ + for (int i = 0; i < ARRAY_SIZE(event_triggers); i++) { + trigger = event_triggers[i]; + if (!strcmp(event, trigger.name)) +diff --git a/ras-mc-handler.c b/ras-mc-handler.c +index d857ca3..203c5af 100644 +--- a/ras-mc-handler.c ++++ b/ras-mc-handler.c +@@ -16,42 +16,54 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #define _GNU_SOURCE ++#include + #include + #include + #include + #include + #include "libtrace/kbuffer.h" +-#include + #include "ras-mc-handler.h" +-#include "ras-record.h" + #include "ras-logger.h" + #include "ras-page-isolation.h" ++#include "ras-record.h" + #include "ras-report.h" + #include "trigger.h" + + #define MAX_ENV 30 +-static char *mc_ce_trigger; +-static char *mc_ue_trigger; ++static const char *mc_ce_trigger = NULL; ++static const char *mc_ue_trigger = NULL; + + void mc_event_trigger_setup(void) + { +- mc_ce_trigger = getenv("MC_CE_TRIGGER"); +- if (!mc_ce_trigger || !strcmp(mc_ce_trigger, "") +- || trigger_check(mc_ce_trigger) < 0) { +- log(SYSLOG, LOG_ERR, "Cannot access mc_event ce trigger `%s`\n", +- mc_ce_trigger); +- } else +- log(SYSLOG, LOG_INFO, "Setup mc_event ce trigger `%s`\n", +- mc_ce_trigger); +- +- mc_ue_trigger = getenv("MC_UE_TRIGGER"); +- if (!mc_ue_trigger || !strcmp(mc_ue_trigger, "") +- || trigger_check(mc_ue_trigger) < 0) { +- log(SYSLOG, LOG_ERR, "Cannot access mc_event ue trigger `%s`\n", +- mc_ue_trigger); +- } else +- log(SYSLOG, LOG_INFO, "Setup mc_event ue trigger `%s`\n", +- mc_ue_trigger); ++ const char *trigger; ++ ++ trigger = getenv("MC_CE_TRIGGER"); ++ if (trigger && strcmp(trigger, "")) { ++ if (trigger_check(trigger) < 0) { ++ log(ALL, LOG_ERR, ++ "Cannot access mc_event ce trigger `%s`\n", ++ trigger); ++ } else { ++ log(ALL, LOG_INFO, ++ "Setup mc_event ce trigger `%s`\n", ++ trigger); ++ mc_ce_trigger = trigger; ++ } ++ } ++ ++ trigger = getenv("MC_UE_TRIGGER"); ++ if (trigger && strcmp(trigger, "")) { ++ if (trigger_check(trigger) < 0) { ++ log(ALL, LOG_ERR, ++ "Cannot access mc_event ue trigger `%s`\n", ++ trigger); ++ } else { ++ log(ALL, LOG_INFO, ++ "Setup mc_event ue trigger `%s`\n", ++ trigger); ++ mc_ue_trigger = trigger; ++ } ++ } + } + + static void run_mc_trigger(struct ras_mc_event *ev, const char *mc_trigger) +@@ -60,7 +72,7 @@ static void run_mc_trigger(struct ras_mc_event *ev, const char *mc_trigger) + int ei = 0; + int i; + +- if (!mc_trigger || !strcmp(mc_trigger, "")) ++ if (!strcmp(mc_trigger, "")) + return; + + if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0) +@@ -270,10 +282,10 @@ int ras_mc_event_handler(struct trace_seq *s, + ras_report_mc_event(ras, &ev); + #endif + +- if (!strcmp(ev.error_type, "Corrected")) ++ if (mc_ce_trigger && !strcmp(ev.error_type, "Corrected")) + run_mc_trigger(&ev, mc_ce_trigger); + +- if (!strcmp(ev.error_type, "Uncorrected")) ++ if (mc_ue_trigger && !strcmp(ev.error_type, "Uncorrected")) + run_mc_trigger(&ev, mc_ue_trigger); + + return 0; +diff --git a/trigger.c b/trigger.c +index 95fb8ca..0d91e05 100644 +--- a/trigger.c ++++ b/trigger.c +@@ -6,7 +6,7 @@ + #include "ras-logger.h" + #include "trigger.h" + +-void run_trigger(const char *trigger, char *argv[], char **env, const char* reporter) ++void run_trigger(const char *trigger, char *argv[], char **env, const char *reporter) + { + pid_t child; + char *path; +@@ -39,7 +39,7 @@ void run_trigger(const char *trigger, char *argv[], char **env, const char* repo + } + } + +-int trigger_check(char *s) ++int trigger_check(const char *s) + { + char *name; + int rc; +diff --git a/trigger.h b/trigger.h +index 556a7f2..ccd1a9b 100644 +--- a/trigger.h ++++ b/trigger.h +@@ -6,8 +6,7 @@ struct event_trigger { + void (*setup)(void); + }; + +-int trigger_check(char *s); ++int trigger_check(const char *s); + void run_trigger(const char *trigger, char *argv[], char **env, const char* reporter); + +- + #endif +-- +2.33.1 + diff --git a/1043-trigger-parse-only-once-TRIGGER_DIR-env-variable.patch b/1043-trigger-parse-only-once-TRIGGER_DIR-env-variable.patch new file mode 100644 index 0000000000000000000000000000000000000000..4f14bc27cd3523530694757600cae0ed8fcb9eac --- /dev/null +++ b/1043-trigger-parse-only-once-TRIGGER_DIR-env-variable.patch @@ -0,0 +1,142 @@ +From 092e251ac3a880284f72c45a42990734e33b4df1 Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Mon, 15 Jul 2024 13:40:37 +0200 +Subject: [PATCH 43/85] trigger: parse only once TRIGGER_DIR env variable + +Instead of parsing TRIGGER_DIR every time a new event happens, +store the trigger full path, simplifying the logic and avoiding +memory leaks. + +Signed-off-by: Mauro Carvalho Chehab +--- + ras-mc-handler.c | 13 ++++++------- + trigger.c | 23 +++++++++-------------- + trigger.h | 2 +- + 3 files changed, 16 insertions(+), 22 deletions(-) + +diff --git a/ras-mc-handler.c b/ras-mc-handler.c +index 203c5af..b62dfb6 100644 +--- a/ras-mc-handler.c ++++ b/ras-mc-handler.c +@@ -39,7 +39,9 @@ void mc_event_trigger_setup(void) + + trigger = getenv("MC_CE_TRIGGER"); + if (trigger && strcmp(trigger, "")) { +- if (trigger_check(trigger) < 0) { ++ mc_ce_trigger = trigger_check(trigger); ++ ++ if (!mc_ce_trigger) { + log(ALL, LOG_ERR, + "Cannot access mc_event ce trigger `%s`\n", + trigger); +@@ -47,13 +49,14 @@ void mc_event_trigger_setup(void) + log(ALL, LOG_INFO, + "Setup mc_event ce trigger `%s`\n", + trigger); +- mc_ce_trigger = trigger; + } + } + + trigger = getenv("MC_UE_TRIGGER"); + if (trigger && strcmp(trigger, "")) { +- if (trigger_check(trigger) < 0) { ++ mc_ue_trigger = trigger_check(trigger); ++ ++ if (!mc_ue_trigger) { + log(ALL, LOG_ERR, + "Cannot access mc_event ue trigger `%s`\n", + trigger); +@@ -61,7 +64,6 @@ void mc_event_trigger_setup(void) + log(ALL, LOG_INFO, + "Setup mc_event ue trigger `%s`\n", + trigger); +- mc_ue_trigger = trigger; + } + } + } +@@ -72,9 +74,6 @@ static void run_mc_trigger(struct ras_mc_event *ev, const char *mc_trigger) + int ei = 0; + int i; + +- if (!strcmp(mc_trigger, "")) +- return; +- + if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0) + goto free; + if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0) +diff --git a/trigger.c b/trigger.c +index 0d91e05..0ee1a5e 100644 +--- a/trigger.c ++++ b/trigger.c +@@ -9,15 +9,10 @@ + void run_trigger(const char *trigger, char *argv[], char **env, const char *reporter) + { + pid_t child; +- char *path; + int status; +- char *trigger_dir = getenv("TRIGGER_DIR"); + + log(SYSLOG, LOG_INFO, "Running trigger `%s' (reporter: %s)\n", trigger, reporter); + +- if (asprintf(&path, "%s/%s", trigger_dir, trigger) < 0) +- return; +- + child = fork(); + if (child < 0) { + log(SYSLOG, LOG_ERR, "Cannot create process for trigger"); +@@ -25,7 +20,7 @@ void run_trigger(const char *trigger, char *argv[], char **env, const char *repo + } + + if (child == 0) { +- execve(path, argv, env); ++ execve(trigger, argv, env); + _exit(127); + } else { + waitpid(child, &status, 0); +@@ -39,7 +34,7 @@ void run_trigger(const char *trigger, char *argv[], char **env, const char *repo + } + } + +-int trigger_check(const char *s) ++const char *trigger_check(const char *s) + { + char *name; + int rc; +@@ -47,14 +42,14 @@ int trigger_check(const char *s) + + if (trigger_dir) { + if (asprintf(&name, "%s/%s", trigger_dir, s) < 0) +- return -1; +- } else +- name = s; ++ return NULL; ++ s = name; ++ } + +- rc = access(name, R_OK|X_OK); ++ rc = access(s, R_OK|X_OK); + +- if (trigger_dir) +- free(name); ++ if (!rc) ++ return(s); + +- return rc; ++ return NULL; + } +diff --git a/trigger.h b/trigger.h +index ccd1a9b..aea29b5 100644 +--- a/trigger.h ++++ b/trigger.h +@@ -6,7 +6,7 @@ struct event_trigger { + void (*setup)(void); + }; + +-int trigger_check(const char *s); ++const char *trigger_check(const char *s); + void run_trigger(const char *trigger, char *argv[], char **env, const char* reporter); + + #endif +-- +2.33.1 + diff --git a/1044-rasdaemon-add-mem_fail_event-trigger.patch b/1044-rasdaemon-add-mem_fail_event-trigger.patch new file mode 100644 index 0000000000000000000000000000000000000000..554dc5522b8dff7adab0ddd22baab77dc96c5d02 --- /dev/null +++ b/1044-rasdaemon-add-mem_fail_event-trigger.patch @@ -0,0 +1,150 @@ +From aeeaf6b0275bfbe7e7fa2686bed8c2ae2b95bb8f Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Tue, 16 Jul 2024 05:05:32 +0000 +Subject: [PATCH 44/85] rasdaemon: add mem_fail_event trigger + +This event is somewhat similar to mc_event, except that this one +occurs on ARM platforms and the fields are different. + +Signed-off-by: Mauro Carvalho Chehab +--- + contrib/mem_fail_trigger.sh | 12 ++++++++ + ras-events.c | 1 + + ras-memory-failure-handler.c | 56 ++++++++++++++++++++++++++++++++++++ + ras-memory-failure-handler.h | 1 + + 4 files changed, 70 insertions(+) + create mode 100755 contrib/mem_fail_trigger.sh + +diff --git a/contrib/mem_fail_trigger.sh b/contrib/mem_fail_trigger.sh +new file mode 100755 +index 0000000..a3ac362 +--- /dev/null ++++ b/contrib/mem_fail_trigger.sh +@@ -0,0 +1,12 @@ ++#!/bin/sh ++# This shell script can be executed by rasdaemon in daemon mode when a ++# memory_failure_event is occured, environment variables include all ++# information reported by tracepoint. ++# ++ ++echo TIMESTAMP: $TIMESTAMP ++echo PFN: $PFN ++echo PAGE_TYPE: $PAGE_TYPE ++echo ACTION_RESULT: $ACTION_RESULT ++ ++exit 0 +diff --git a/ras-events.c b/ras-events.c +index c2eb8f0..e1326f3 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -62,6 +62,7 @@ extern char* choices_disable; + + static const struct event_trigger event_triggers[] = { + { "mc_event", &mc_event_trigger_setup }, ++ { "memory_failure_event", &mem_fail_event_trigger_setup }, + }; + + static int get_debugfs_dir(char *tracing_dir, size_t len) +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index 4798ead..d6e83a9 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -12,6 +12,7 @@ + * GNU General Public License for more details. + */ + ++#include + #include + #include + #include +@@ -19,6 +20,7 @@ + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-report.h" ++#include "trigger.h" + + /* Memory failure - various types of pages */ + enum mf_action_page_type { +@@ -90,6 +92,59 @@ static const struct { + { MF_RECOVERED, "Recovered" }, + }; + ++#define MAX_ENV 6 ++static const char *mf_trigger = NULL; ++ ++void mem_fail_event_trigger_setup(void) ++{ ++ const char *trigger; ++ ++ trigger = getenv("MEM_FAIL_TRIGGER"); ++ if (trigger && strcmp(trigger, "")) { ++ mf_trigger = trigger_check(trigger); ++ ++ if (!mf_trigger) { ++ log(ALL, LOG_ERR, ++ "Cannot access memory_fail_event trigger `%s`\n", ++ trigger); ++ } else { ++ log(ALL, LOG_INFO, ++ "Setup memory_fail_event trigger `%s`\n", ++ trigger); ++ } ++ } ++} ++ ++static void run_mf_trigger(struct ras_mf_event *ev) ++{ ++ char *env[MAX_ENV]; ++ int ei = 0; ++ int i; ++ ++ if (!mf_trigger) ++ return; ++ ++ if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0) ++ goto free; ++ if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "PFN=%s", ev->pfn) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "PAGE_TYPE=%s", ev->page_type) < 0) ++ goto free; ++ if (asprintf(&env[ei++], "ACTION_RESULT=%s", ev->action_result) < 0) ++ goto free; ++ ++ env[ei] = NULL; ++ assert(ei < MAX_ENV); ++ ++ run_trigger(mf_trigger, NULL, env, "memory_fail_event"); ++ ++free: ++ for (i = 0; i < ei; i++) ++ free(env[i]); ++} ++ + static const char *get_page_type(int page_type) + { + unsigned int i; +@@ -169,6 +224,7 @@ int ras_memory_failure_event_handler(struct trace_seq *s, + /* Report event to ABRT */ + ras_report_mf_event(ras, &ev); + #endif ++ run_mf_trigger(&ev); + + return 0; + } +diff --git a/ras-memory-failure-handler.h b/ras-memory-failure-handler.h +index b9e9971..30d8e9d 100644 +--- a/ras-memory-failure-handler.h ++++ b/ras-memory-failure-handler.h +@@ -18,6 +18,7 @@ + #include "ras-events.h" + #include "libtrace/event-parse.h" + ++void mem_fail_event_trigger_setup(void); + int ras_memory_failure_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context); +-- +2.33.1 + diff --git a/1045-rasdaemon-ras-report-fix-possible-but-unlikely-file-.patch b/1045-rasdaemon-ras-report-fix-possible-but-unlikely-file-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9ed013f5cecc94e8a84ccf083c124032323db0c --- /dev/null +++ b/1045-rasdaemon-ras-report-fix-possible-but-unlikely-file-.patch @@ -0,0 +1,93 @@ +From 1c31e9948a2c19ecb0f39d5f14910c80316ac625 Mon Sep 17 00:00:00 2001 +From: Aristeu Rozanski +Date: Thu, 19 Jan 2023 08:45:57 -0500 +Subject: [PATCH 45/85] rasdaemon: ras-report: fix possible but unlikely file + descriptor leak + +Found with covscan. + +Signed-off-by: Aristeu Rozanski +Signed-off-by: Mauro Carvalho Chehab +--- + ras-report.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/ras-report.c b/ras-report.c +index ea3a9b6..62d5eb7 100644 +--- a/ras-report.c ++++ b/ras-report.c +@@ -434,7 +434,7 @@ int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){ + + mc_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -484,7 +484,7 @@ int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){ + + aer_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -533,7 +533,7 @@ int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standar + + non_standard_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -578,7 +578,7 @@ int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev){ + + arm_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -624,7 +624,7 @@ int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ + + mce_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -674,7 +674,7 @@ int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev){ + + devlink_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -723,7 +723,7 @@ int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *e + done = 1; + + diskerror_fail: +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -768,7 +768,7 @@ int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) + done = 1; + + mf_fail: +- if (sockfd > 0) ++ if (sockfd >= 0) + close(sockfd); + + if (done) +-- +2.33.1 + diff --git a/1046-ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch b/1046-ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch new file mode 100644 index 0000000000000000000000000000000000000000..9988e4eafb74702cc7557eb78a20092f640cf111 --- /dev/null +++ b/1046-ras-events-quit-loop-in-read_ras_event-when-kbuf-dat.patch @@ -0,0 +1,41 @@ +From 838234ed7e9cf87b740556c5e15e3e236b723fa4 Mon Sep 17 00:00:00 2001 +From: hubin +Date: Thu, 18 May 2023 16:14:41 +0800 +Subject: [PATCH 46/85] ras-events: quit loop in read_ras_event when kbuf data + is broken + +when kbuf data is broken, kbuffer_next_event() may move kbuf->index back to +the current kbuf->index position, causing dead loop. + +In this situation, rasdaemon will repeatedly parse an invalid event, and +print warning like "ug! negative record size -8!", pushing cpu utilization +rate to 100%. + +when kbuf data is broken, discard current page and continue reading next page +kbuf. + +Signed-off-by: hubin +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/ras-events.c b/ras-events.c +index e1326f3..2cc54b3 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -546,6 +546,11 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + kbuffer_load_subbuffer(kbuf, page); + + while ((data = kbuffer_read_event(kbuf, &time_stamp))) { ++ if (kbuffer_curr_size(kbuf) < 0) { ++ log(TERM, LOG_ERR, "invalid kbuf data, discard\n"); ++ break; ++ } ++ + parse_ras_data(&pdata[i], + kbuf, data, time_stamp); + +-- +2.33.1 + diff --git a/1047-C-files-cleanup-coding-style.patch b/1047-C-files-cleanup-coding-style.patch new file mode 100644 index 0000000000000000000000000000000000000000..913e9ad0a9b9ca99055da7407d637b79662344c4 --- /dev/null +++ b/1047-C-files-cleanup-coding-style.patch @@ -0,0 +1,3343 @@ +From 531a16aa949221932f76108bf9c74fe164fec7df Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Mon, 22 Jan 2024 08:36:47 +0100 +Subject: [PATCH 47/85] C files: cleanup coding style + +The rasdaemon conding style follows Linux Kernel where it makes sense. + +Yet, changes made overtime ended with some coding style non-compliances. + +Adjust rasdaemon coding style by using: + + scripts/checkpatch.pl --fix-inplace --strict *.c --ignore PREFER_KERNEL_TYPES + +And doing some manual fixups where the script didn't work. +As a bonus, some typos were also fixed on some rasdaemon messages. + +Signed-off-by: Mauro Carvalho Chehab +--- + bitfield.c | 13 ++- + mce-amd-k8.c | 28 +++-- + mce-amd-smca.c | 17 ++- + mce-amd.c | 6 +- + mce-intel-broadwell-de.c | 6 +- + mce-intel-broadwell-epex.c | 8 +- + mce-intel-dunnington.c | 2 + + mce-intel-haswell.c | 9 +- + mce-intel-ivb.c | 8 +- + mce-intel-knl.c | 2 +- + mce-intel-nehalem.c | 5 +- + mce-intel-p4-p6.c | 10 +- + mce-intel-sb.c | 8 +- + mce-intel-skylake-xeon.c | 6 +- + mce-intel-tulsa.c | 1 + + mce-intel.c | 47 ++++---- + non-standard-ampere.c | 134 +++++++++++----------- + non-standard-hisi_hip08.c | 4 +- + non-standard-hisilicon.c | 18 +-- + non-standard-yitian.c | 28 ++--- + queue.c | 14 +-- + ras-aer-handler.c | 4 +- + ras-arm-handler.c | 6 +- + ras-cpu-isolation.c | 28 ++--- + ras-devlink-handler.c | 5 +- + ras-diskerror-handler.c | 3 +- + ras-events.c | 36 +++--- + ras-extlog-handler.c | 11 +- + ras-mc-handler.c | 10 +- + ras-mce-handler.c | 26 ++--- + ras-memory-failure-handler.c | 3 +- + ras-non-standard-handler.c | 11 +- + ras-page-isolation.c | 21 ++-- + ras-record.c | 196 ++++++++++++++++----------------- + ras-report.c | 208 +++++++++++++++++++---------------- + rasdaemon.c | 7 +- + rbtree.c | 32 +++--- + 37 files changed, 504 insertions(+), 477 deletions(-) + +diff --git a/bitfield.c b/bitfield.c +index f004755..2076683 100644 +--- a/bitfield.c ++++ b/bitfield.c +@@ -25,10 +25,10 @@ + #include "ras-mce-handler.h" + #include "bitfield.h" + +-unsigned bitfield_msg(char *buf, size_t len, const char **bitarray, +- unsigned array_len, +- unsigned bit_offset, unsigned ignore_bits, +- uint64_t status) ++unsigned int bitfield_msg(char *buf, size_t len, const char **bitarray, ++ unsigned int array_len, ++ unsigned int bit_offset, unsigned int ignore_bits, ++ uint64_t status) + { + int i, n; + char *p = buf; +@@ -64,6 +64,7 @@ unsigned bitfield_msg(char *buf, size_t len, const char **bitarray, + static uint64_t bitmask(uint64_t i) + { + uint64_t mask = 1; ++ + while (mask < i) + mask = (mask << 1) | 1; + return mask; +@@ -77,6 +78,7 @@ void decode_bitfield(struct mce_event *e, uint64_t status, + for (f = fields; f->str; f++) { + uint64_t v = (status >> f->start_bit) & bitmask(f->stringlen - 1); + char *s = NULL; ++ + if (v < f->stringlen) + s = f->str[v]; + if (!s) { +@@ -93,11 +95,14 @@ void decode_numfield(struct mce_event *e, uint64_t status, + struct numfield *fields) + { + struct numfield *f; ++ + for (f = fields; f->name; f++) { + uint64_t mask = (1ULL << (f->end - f->start + 1)) - 1; + uint64_t v = (status >> f->start) & mask; ++ + if (v > 0 || f->force) { + char fmt[32] = {0}; ++ + snprintf(fmt, 32, "%%s: %s\n", f->fmt ? f->fmt : "%Lu"); + mce_snprintf(e->error_msg, fmt, f->name, v); + } +diff --git a/mce-amd-k8.c b/mce-amd-k8.c +index dc772c2..f27b823 100644 +--- a/mce-amd-k8.c ++++ b/mce-amd-k8.c +@@ -43,7 +43,7 @@ static const char *k8bank[] = { + }; + + static const char *k8threshold[] = { +- [0 ... K8_MCELOG_THRESHOLD_DRAM_ECC - 1] = "Unknow threshold counter", ++ [0 ... K8_MCELOG_THRESHOLD_DRAM_ECC - 1] = "Unknown threshold counter", + [K8_MCELOG_THRESHOLD_DRAM_ECC] = "MC4_MISC0 DRAM threshold", + [K8_MCELOG_THRESHOLD_LINK] = "MC4_MISC1 Link threshold", + [K8_MCELOG_THRESHOLD_L3_CACHE] = "MC4_MISC2 L3 Cache threshold", +@@ -56,25 +56,31 @@ static const char *k8threshold[] = { + static const char *transaction[] = { + "instruction", "data", "generic", "reserved" + }; ++ + static const char *cachelevel[] = { + "0", "1", "2", "generic" + }; ++ + static const char *memtrans[] = { + "generic error", "generic read", "generic write", "data read", + "data write", "instruction fetch", "prefetch", "evict", "snoop", + "?", "?", "?", "?", "?", "?", "?" + }; ++ + static const char *partproc[] = { + "local node origin", "local node response", + "local node observed", "generic participation" + }; ++ + static const char *timeout[] = { + "request didn't time out", + "request timed out" + }; ++ + static const char *memoryio[] = { + "memory", "res.", "i/o", "generic" + }; ++ + static const char *nbextendederr[] = { + "RAM ECC error", + "CRC error", +@@ -96,6 +102,7 @@ static const char *nbextendederr[] = { + "L3 Cache Tag Error", + "L3 Cache LRU Error" + }; ++ + static const char *highbits[32] = { + [31] = "valid", + [30] = "error overflow (multiple errors)", +@@ -164,7 +171,7 @@ static void decode_k8_dc_mc(struct mce_event *e) + if (e->status & (3ULL << 45)) { + mce_snprintf(e->error_msg, + "Data cache ECC error (syndrome %x)", +- (uint32_t) (e->status >> 47) & 0xff); ++ (uint32_t)(e->status >> 47) & 0xff); + if (e->status & (1ULL << 40)) + mce_snprintf(e->error_msg, "found by scrubber"); + } +@@ -185,7 +192,7 @@ static void decode_k8_ic_mc(struct mce_event *e) + + if ((errcode & 0xfff0) == 0x0010) + mce_snprintf(e->error_msg, "TLB parity error in %s array", +- (exterrcode == 0) ? "physical" : "virtual"); ++ (exterrcode == 0) ? "physical" : "virtual"); + } + + static void decode_k8_bu_mc(struct mce_event *e) +@@ -196,10 +203,10 @@ static void decode_k8_bu_mc(struct mce_event *e) + mce_snprintf(e->error_msg, "L2 cache ECC error"); + + mce_snprintf(e->error_msg, "%s array error", +- !exterrcode ? "Bus or cache" : "Cache tag"); ++ !exterrcode ? "Bus or cache" : "Cache tag"); + } + +-static void decode_k8_nb_mc(struct mce_event *e, unsigned *memerr) ++static void decode_k8_nb_mc(struct mce_event *e, unsigned int *memerr) + { + unsigned short exterrcode = (e->status >> 16) & 0x0f; + +@@ -209,13 +216,13 @@ static void decode_k8_nb_mc(struct mce_event *e, unsigned *memerr) + case 0: + *memerr = 1; + mce_snprintf(e->error_msg, "ECC syndrome = %x", +- (uint32_t) (e->status >> 47) & 0xff); ++ (uint32_t)(e->status >> 47) & 0xff); + break; + case 8: + *memerr = 1; + mce_snprintf(e->error_msg, "Chipkill ECC syndrome = %x", +- (uint32_t) ((((e->status >> 24) & 0xff) << 8) +- | ((e->status >> 47) & 0xff))); ++ (uint32_t)((((e->status >> 24) & 0xff) << 8) ++ | ((e->status >> 47) & 0xff))); + break; + case 1: + case 2: +@@ -223,7 +230,7 @@ static void decode_k8_nb_mc(struct mce_event *e, unsigned *memerr) + case 4: + case 6: + mce_snprintf(e->error_msg, "link number = %x", +- (uint32_t) (e->status >> 36) & 0xf); ++ (uint32_t)(e->status >> 36) & 0xf); + break; + } + } +@@ -251,11 +258,12 @@ static void bank_name(struct mce_event *e) + + int parse_amd_k8_event(struct ras_events *ras, struct mce_event *e) + { +- unsigned ismemerr = 0; ++ unsigned int ismemerr = 0; + + /* Don't handle GART errors */ + if (e->bank == 4) { + unsigned short exterrcode = (e->status >> 16) & 0x0f; ++ + if (exterrcode == 5 && (e->status & (1ULL << 61))) { + return -1; + } +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 3fd97e0..c66a5f7 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -74,10 +74,10 @@ enum smca_bank_types { + SMCA_PCIE, /* PCI Express Unit */ + SMCA_PCIE_V2, + SMCA_XGMI_PCS, /* xGMI PCS Unit */ +- SMCA_NBIF, /*NBIF Unit */ +- SMCA_SHUB, /* System Hub Unit */ +- SMCA_SATA, /* SATA Unit */ +- SMCA_USB, /* USB Unit */ ++ SMCA_NBIF, /* NBIF Unit */ ++ SMCA_SHUB, /* System Hub Unit */ ++ SMCA_SATA, /* SATA Unit */ ++ SMCA_USB, /* USB Unit */ + SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */ + SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */ + SMCA_GMI_PCS, /* GMI PCS Unit */ +@@ -833,7 +833,7 @@ static struct smca_bank_name smca_names[] = { + [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, + [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, + [SMCA_MP5] = { "Microprocessor 5 Unit" }, +- [SMCA_MPDMA] = { "MPDMA Unit" }, ++ [SMCA_MPDMA] = { "MPDMA Unit" }, + [SMCA_NBIO] = { "Northbridge IO Unit" }, + [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" }, + [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" }, +@@ -851,7 +851,6 @@ static struct smca_bank_name smca_names[] = { + + void amd_decode_errcode(struct mce_event *e) + { +- + decode_amd_errcode(e); + + if (e->status & MCI_STATUS_POISON) +@@ -859,8 +858,8 @@ void amd_decode_errcode(struct mce_event *e) + + if (e->status & MCI_STATUS_TCC) + mce_snprintf(e->mcistatus_msg, "Task_context_corrupt"); +- + } ++ + /* + * To find the UMC channel represented by this bank we need to match on its + * instance_id. The instance_id of a bank is held in the lower 32 bits of its +@@ -890,7 +889,7 @@ static int find_hbm_channel(struct mce_event *e) + return (umc % 2) ? tmp + 4 : tmp; + } + +-static inline void fixup_hwid(struct mce_priv* m, uint32_t *hwid_mcatype) ++static inline void fixup_hwid(struct mce_priv *m, uint32_t *hwid_mcatype) + { + if (m->family == 0x19) { + switch (m->model) { +@@ -1006,7 +1005,7 @@ int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) + uint64_t mcgstatus = e->mcgstatus; + + mce_snprintf(e->mcgstatus_msg, "mcgstatus=%lld", +- (long long)e->mcgstatus); ++ (long long)e->mcgstatus); + + if (mcgstatus & MCG_STATUS_RIPV) + mce_snprintf(e->mcgstatus_msg, "RIPV"); +diff --git a/mce-amd.c b/mce-amd.c +index 116df11..ac0dbac 100644 +--- a/mce-amd.c ++++ b/mce-amd.c +@@ -26,26 +26,31 @@ + static char *transaction[] = { + "instruction", "data", "generic", "reserved" + }; ++ + /* Error codes: cache level (LL) */ + static char *cachelevel[] = { + "reserved", "L1", "L2", "L3/generic" + }; ++ + /* Error codes: memory transaction type (RRRR) */ + static char *memtrans[] = { + "generic", "generic read", "generic write", "data read", + "data write", "instruction fetch", "prefetch", "evict", "snoop", + "?", "?", "?", "?", "?", "?", "?" + }; ++ + /* Participation Processor */ + static char *partproc[] = { + "local node origin", "local node response", + "local node observed", "generic participation" + }; ++ + /* Timeout */ + static char *timeout[] = { + "request didn't time out", + "request timed out" + }; ++ + /* internal unclassified error code */ + static char *internal[] = { "reserved", + "reserved", +@@ -118,5 +123,4 @@ void decode_amd_errcode(struct mce_event *e) + PP_MSG(ec), TO_MSG(ec), + R4_MSG(ec), LL_MSG(ec)); + return; +- + } +diff --git a/mce-intel-broadwell-de.c b/mce-intel-broadwell-de.c +index d52c82e..8210782 100644 +--- a/mce-intel-broadwell-de.c ++++ b/mce-intel-broadwell-de.c +@@ -78,7 +78,7 @@ void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e) + { + uint64_t status = e->status; + uint32_t mca = status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan; ++ unsigned int rank0 = -1, rank1 = -1, chan; + + switch (e->bank) { + case 4: +@@ -115,7 +115,7 @@ void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e) + + /* Ignore unless this is an corrected extended error from an iMC bank */ + if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || +- !test_prefix(7, status & 0xefff)) ++ !test_prefix(7, status & 0xefff)) + return; + + /* +@@ -140,7 +140,7 @@ void broadwell_de_decode_model(struct ras_events *ras, struct mce_event *e) + */ + if (rank0 != -1 && rank1 != -1) + mce_snprintf(e->mc_location, "ranks=%d and %d", +- rank0, rank1); ++ rank0, rank1); + else if (rank0 != -1) + mce_snprintf(e->mc_location, "rank=%d", rank0); + } +diff --git a/mce-intel-broadwell-epex.c b/mce-intel-broadwell-epex.c +index f7cd3b6..9c863d9 100644 +--- a/mce-intel-broadwell-epex.c ++++ b/mce-intel-broadwell-epex.c +@@ -91,7 +91,7 @@ static char *qpi[] = { + [0x22] = "Phy detected in-band reset (no width change)", + [0x23] = "Link failover clock failover", + [0x30] = "Rx detected CRC error - successful LLR after Phy re-init", +- [0x31] = "Rx detected CRC error - successful LLR wihout Phy re-init", ++ [0x31] = "Rx detected CRC error - successful LLR without Phy re-init", + }; + + static struct field qpi_mc[] = { +@@ -118,7 +118,7 @@ void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e) + { + uint64_t status = e->status; + uint32_t mca = status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan; ++ unsigned int rank0 = -1, rank1 = -1, chan; + + switch (e->bank) { + case 4: +@@ -160,7 +160,7 @@ void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e) + + /* Ignore unless this is an corrected extended error from an iMC bank */ + if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || +- !test_prefix(7, status & 0xefff)) ++ !test_prefix(7, status & 0xefff)) + return; + + /* +@@ -185,7 +185,7 @@ void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e) + */ + if (rank0 != -1 && rank1 != -1) + mce_snprintf(e->mc_location, "ranks=%d and %d", +- rank0, rank1); ++ rank0, rank1); + else if (rank0 != -1) + mce_snprintf(e->mc_location, "rank=%d", rank0); + } +diff --git a/mce-intel-dunnington.c b/mce-intel-dunnington.c +index c695c62..71c3fb4 100644 +--- a/mce-intel-dunnington.c ++++ b/mce-intel-dunnington.c +@@ -91,6 +91,7 @@ static void dunnington_decode_bus(struct mce_event *e, uint64_t status) + static void dunnington_decode_internal(struct mce_event *e, uint64_t status) + { + uint32_t mca = (status >> 16) & 0xffff; ++ + if ((mca & 0xfff0) == 0) + decode_bitfield(e, mca, dnt_front_status); + else if ((mca & 0xf0ff) == 0) +@@ -104,6 +105,7 @@ static void dunnington_decode_internal(struct mce_event *e, uint64_t status) + void dunnington_decode_model(struct mce_event *e) + { + uint64_t status = e->status; ++ + if ((status & 0xffff) == 0xe0f) + dunnington_decode_bus(e, status); + else if ((status & 0xffff) == (1 << 10)) +diff --git a/mce-intel-haswell.c b/mce-intel-haswell.c +index 1791a36..195f6ed 100644 +--- a/mce-intel-haswell.c ++++ b/mce-intel-haswell.c +@@ -23,7 +23,6 @@ + #include "ras-mce-handler.h" + #include "bitfield.h" + +- + /* See IA32 SDM Vol3B Table 16-20 */ + + static char *pcu_1[] = { +@@ -92,7 +91,7 @@ static char *qpi[] = { + [0x22] = "Phy detected in-band reset (no width change)", + [0x23] = "Link failover clock failover", + [0x30] = "Rx detected CRC error - successful LLR after Phy re-init", +- [0x31] = "Rx detected CRC error - successful LLR wihout Phy re-init", ++ [0x31] = "Rx detected CRC error - successful LLR without Phy re-init", + }; + + static struct field qpi_mc[] = { +@@ -120,7 +119,7 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e) + { + uint64_t status = e->status; + uint32_t mca = status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan; ++ unsigned int rank0 = -1, rank1 = -1, chan; + + switch (e->bank) { + case 4: +@@ -160,7 +159,7 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e) + + /* Ignore unless this is an corrected extended error from an iMC bank */ + if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || +- !test_prefix(7, status & 0xefff)) ++ !test_prefix(7, status & 0xefff)) + return; + + /* +@@ -185,7 +184,7 @@ void hsw_decode_model(struct ras_events *ras, struct mce_event *e) + */ + if (rank0 != -1 && rank1 != -1) + mce_snprintf(e->mc_location, "ranks=%d and %d", +- rank0, rank1); ++ rank0, rank1); + else if (rank0 != -1) + mce_snprintf(e->mc_location, "rank=%d", rank0); + } +diff --git a/mce-intel-ivb.c b/mce-intel-ivb.c +index 0c5bebc..e5e6a7a 100644 +--- a/mce-intel-ivb.c ++++ b/mce-intel-ivb.c +@@ -90,7 +90,7 @@ void ivb_decode_model(struct ras_events *ras, struct mce_event *e) + struct mce_priv *mce = ras->mce_priv; + uint64_t status = e->status; + uint32_t mca = status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan; ++ unsigned int rank0 = -1, rank1 = -1, chan; + + switch (e->bank) { + case 4: +@@ -121,7 +121,7 @@ void ivb_decode_model(struct ras_events *ras, struct mce_event *e) + + /* Ignore unless this is an corrected extended error from an iMC bank */ + if (e->bank < 9 || e->bank > 16 || (status & MCI_STATUS_UC) || +- !test_prefix(7, status & 0xefff)) ++ !test_prefix(7, status & 0xefff)) + return; + + /* +@@ -146,7 +146,7 @@ void ivb_decode_model(struct ras_events *ras, struct mce_event *e) + */ + if (rank0 >= 0 && rank1 >= 0) + mce_snprintf(e->mc_location, "ranks=%d and %d", +- rank0, rank1); ++ rank0, rank1); + else if (rank0 >= 0) + mce_snprintf(e->mc_location, "rank=%d", rank0); + else +@@ -162,7 +162,7 @@ void ivb_decode_model(struct ras_events *ras, struct mce_event *e) + * faling rank to a DIMM slot. + */ + #if 0 +-static int failrank2dimm(unsigned failrank, int socket, int channel) ++static int failrank2dimm(unsigned int failrank, int socket, int channel) + { + switch (failrank) { + case 0: case 1: case 2: case 3: +diff --git a/mce-intel-knl.c b/mce-intel-knl.c +index 7062fbb..0e2ea80 100644 +--- a/mce-intel-knl.c ++++ b/mce-intel-knl.c +@@ -36,7 +36,7 @@ void knl_decode_model(struct ras_events *ras, struct mce_event *e) + { + uint64_t status = e->status; + uint32_t mca = status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan = 0; ++ unsigned int rank0 = -1, rank1 = -1, chan = 0; + + switch (e->bank) { + case 5: +diff --git a/mce-intel-nehalem.c b/mce-intel-nehalem.c +index ad4ce69..18992d1 100644 +--- a/mce-intel-nehalem.c ++++ b/mce-intel-nehalem.c +@@ -112,9 +112,9 @@ void nehalem_decode_model(struct mce_event *e) + uint64_t status = e->status; + uint32_t mca = status & 0xffff; + uint64_t misc = e->misc; +- unsigned channel, dimm; ++ unsigned int channel, dimm; + +- if ((mca >> 11) == 1) { /* bus and interconnect QPI */ ++ if ((mca >> 11) == 1) { /* bus and interconnect QPI */ + decode_bitfield(e, status, qpi_status); + if (status & MCI_STATUS_MISCV) { + decode_numfield(e, misc, qpi_numbers); +@@ -143,6 +143,7 @@ void xeon75xx_decode_model(struct mce_event *e) + { + uint64_t status = e->status; + uint32_t mca = status & 0xffff; ++ + if (mca == 0x0001) { /* internal unspecified */ + decode_bitfield(e, status, internal_error_status); + decode_numfield(e, status, internal_error_numbers); +diff --git a/mce-intel-p4-p6.c b/mce-intel-p4-p6.c +index 5c6c3ff..2751d34 100644 +--- a/mce-intel-p4-p6.c ++++ b/mce-intel-p4-p6.c +@@ -66,8 +66,8 @@ static struct field p6_shared_status[] = { + FIELD(25, bus_queue_error_type), + SBITFIELD(30, "internal BINIT"), + SBITFIELD(36, "received parity error on response transaction"), +- SBITFIELD(38, "timeout BINIT (ROB timeout)." +- " No micro-instruction retired for some time"), ++ SBITFIELD(38, ++ "timeout BINIT (ROB timeout). No micro-instruction retired for some time"), + FIELD_NULL(39), + SBITFIELD(42, "bus transaction received hard error response"), + SBITFIELD(43, "failure that caused IERR"), +@@ -86,7 +86,7 @@ static struct field p6old_status[] = { + FIELD_NULL(31), + FIELD_NULL(32), + SBITFIELD(35, "BINIT received from external bus"), +- SBITFIELD(37, "Received hard error reponse on split transaction (Bus BINIT)"), ++ SBITFIELD(37, "Received hard error response on split transaction (Bus BINIT)"), + {} + }; + +@@ -109,7 +109,7 @@ static struct numfield p6old_status_numbers[] = { + static struct { + int value; + char *str; +-} p4_model []= { ++} p4_model[] = { + {16, "FSB address parity"}, + {17, "Response hard fail"}, + {18, "Response parity"}, +@@ -123,7 +123,7 @@ static struct { + void p4_decode_model(struct mce_event *e) + { + uint32_t model = e->status & 0xffff0000L; +- unsigned i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(p4_model); i++) { + if (model & (1 << p4_model[i].value)) +diff --git a/mce-intel-sb.c b/mce-intel-sb.c +index e754496..385b125 100644 +--- a/mce-intel-sb.c ++++ b/mce-intel-sb.c +@@ -82,7 +82,7 @@ void snb_decode_model(struct ras_events *ras, struct mce_event *e) + { + struct mce_priv *mce = ras->mce_priv; + uint32_t mca = e->status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan; ++ unsigned int rank0 = -1, rank1 = -1, chan; + + switch (e->bank) { + case 4: +@@ -113,7 +113,7 @@ void snb_decode_model(struct ras_events *ras, struct mce_event *e) + + /* Ignore unless this is an corrected extended error from an iMC bank */ + if (e->bank < 8 || e->bank > 11 || (e->status & MCI_STATUS_UC) || +- !test_prefix(7, e->status & 0xefff)) ++ !test_prefix(7, e->status & 0xefff)) + return; + + /* +@@ -138,7 +138,7 @@ void snb_decode_model(struct ras_events *ras, struct mce_event *e) + */ + if (rank0 >= 0 && rank1 >= 0) + mce_snprintf(e->mc_location, "ranks=%d and %d", +- rank0, rank1); ++ rank0, rank1); + else if (rank0 >= 0) + mce_snprintf(e->mc_location, "rank=%d", rank0); + else +@@ -162,7 +162,7 @@ void snb_decode_model(struct ras_events *ras, struct mce_event *e) + * can be converted to a DIMM number within a channel for systems with either + * two or three DIMMs per channel. + */ +-static int failrank2dimm(unsigned failrank, int socket, int channel) ++static int failrank2dimm(unsigned int failrank, int socket, int channel) + { + switch (failrank) { + case 0: case 1: case 2: case 3: +diff --git a/mce-intel-skylake-xeon.c b/mce-intel-skylake-xeon.c +index 680578a..37e5e11 100644 +--- a/mce-intel-skylake-xeon.c ++++ b/mce-intel-skylake-xeon.c +@@ -170,7 +170,7 @@ void skylake_s_decode_model(struct ras_events *ras, struct mce_event *e) + { + uint64_t status = e->status; + uint32_t mca = status & 0xffff; +- unsigned rank0 = -1, rank1 = -1, chan; ++ unsigned int rank0 = -1, rank1 = -1, chan; + + switch (e->bank) { + case 4: +@@ -221,7 +221,7 @@ void skylake_s_decode_model(struct ras_events *ras, struct mce_event *e) + + /* Ignore unless this is an corrected extended error from an iMC bank */ + if (e->bank < 13 || e->bank > 18 || (status & MCI_STATUS_UC) || +- !test_prefix(7, status & 0xefff)) ++ !test_prefix(7, status & 0xefff)) + return; + + /* +@@ -246,7 +246,7 @@ void skylake_s_decode_model(struct ras_events *ras, struct mce_event *e) + */ + if (rank0 != -1 && rank1 != -1) + mce_snprintf(e->mc_location, "ranks=%d and %d", +- rank0, rank1); ++ rank0, rank1); + else if (rank0 != -1) + mce_snprintf(e->mc_location, "rank=%d", rank0); + } +diff --git a/mce-intel-tulsa.c b/mce-intel-tulsa.c +index e59bf06..f38d638 100644 +--- a/mce-intel-tulsa.c ++++ b/mce-intel-tulsa.c +@@ -105,6 +105,7 @@ static void tulsa_decode_bus(struct mce_event *e, uint64_t status) + static void tulsa_decode_internal(struct mce_event *e, uint64_t status) + { + uint32_t mca = (status >> 16) & 0xffff; ++ + if ((mca & 0xfff0) == 0) + decode_bitfield(e, mca, tls_front_status); + else if ((mca & 0xf0ff) == 0) +diff --git a/mce-intel.c b/mce-intel.c +index 18a9072..7f48cc4 100644 +--- a/mce-intel.c ++++ b/mce-intel.c +@@ -57,8 +57,7 @@ + #define BUS_PP_MASK 0x600 /*bit 9, bit 10*/ + #define BUS_PP_SHIFT 0x9 + +-#define MCG_TES_P (1ULL<<11) /* Yellow bit cache threshold supported */ +- ++#define MCG_TES_P BIT_ULL(11) /* Yellow bit cache threshold supported */ + + static char *TT[] = { + "Instruction", +@@ -76,8 +75,8 @@ static char *LL[] = { + + static struct { + uint8_t value; +- char* str; +-} RRRR [] = { ++ char *str; ++} RRRR[] = { + {0, "Generic"}, + {1, "Read"}, + {2, "Write" }, +@@ -121,7 +120,7 @@ static char *mca_msg[] = { + static char *tracking_msg[] = { + [1] = "green", + [2] = "yellow", +- [3] ="res3" ++ [3] = "res3" + }; + + static const char *arstate[4] = { +@@ -157,9 +156,9 @@ static void decode_memory_controller(struct mce_event *e, uint32_t status) + sprintf(channel, "%u", status & 0xf); + + mce_snprintf(e->error_msg, "MEMORY CONTROLLER %s_CHANNEL%s_ERR", +- mmm_mnemonic[(status >> 4) & 7], channel); ++ mmm_mnemonic[(status >> 4) & 7], channel); + mce_snprintf(e->error_msg, "Transaction: %s", +- mmm_desc[(status >> 4) & 7]); ++ mmm_desc[(status >> 4) & 7]); + } + + static void decode_termal_bank(struct mce_event *e) +@@ -207,7 +206,7 @@ static void bank_name(struct mce_event *e) + + static char *get_RRRR_str(uint8_t rrrr) + { +- unsigned i; ++ unsigned int i; + + for (i = 0; i < ARRAY_SIZE(RRRR); i++) { + if (RRRR[i].value == rrrr) { +@@ -220,7 +219,7 @@ static char *get_RRRR_str(uint8_t rrrr) + + #define decode_attr(arr, val) ({ \ + char *__str; \ +- if ((unsigned)(val) >= ARRAY_SIZE(arr)) \ ++ if ((unsigned int)(val) >= ARRAY_SIZE(arr)) \ + __str = "UNKNOWN"; \ + else \ + __str = (arr)[val]; \ +@@ -248,17 +247,17 @@ static void decode_mca(struct mce_event *e, uint64_t track, int *ismemerr) + decode_attr(LL, mca & 3)); + } else if (test_prefix(4, mca)) { + mce_snprintf(e->mcastatus_msg, "%s TLB %s Error", +- decode_attr(TT, (mca & TLB_TT_MASK) >> TLB_TT_SHIFT), +- decode_attr(LL, (mca & TLB_LL_MASK) >> TLB_LL_SHIFT)); ++ decode_attr(TT, (mca & TLB_TT_MASK) >> TLB_TT_SHIFT), ++ decode_attr(LL, (mca & TLB_LL_MASK) >> TLB_LL_SHIFT)); + } else if (test_prefix(8, mca)) { +- unsigned typenum = (mca & CACHE_TT_MASK) >> CACHE_TT_SHIFT; +- unsigned levelnum = (mca & CACHE_LL_MASK) >> CACHE_LL_SHIFT; ++ unsigned int typenum = (mca & CACHE_TT_MASK) >> CACHE_TT_SHIFT; ++ unsigned int levelnum = (mca & CACHE_LL_MASK) >> CACHE_LL_SHIFT; + char *type = decode_attr(TT, typenum); + char *level = decode_attr(LL, levelnum); ++ + mce_snprintf(e->mcastatus_msg, + "%s CACHE %s %s Error", type, level, +- get_RRRR_str((mca & CACHE_RRRR_MASK) >> +- CACHE_RRRR_SHIFT)); ++ get_RRRR_str((mca & CACHE_RRRR_MASK) >> CACHE_RRRR_SHIFT)); + #if 0 + /* FIXME: We shouldn't mix parsing with actions */ + if (track == 2) +@@ -313,15 +312,13 @@ static void decode_mci(struct mce_event *e, int *ismemerr) + else + mce_snprintf(e->mcistatus_msg, "Corrected_error"); + +- + if (e->status & MCI_STATUS_EN) + mce_snprintf(e->mcistatus_msg, "Error_enabled"); + +- + if (e->status & MCI_STATUS_PCC) + mce_snprintf(e->mcistatus_msg, "Processor_context_corrupt"); + +- if (e->status & (MCI_STATUS_S|MCI_STATUS_AR)) ++ if (e->status & (MCI_STATUS_S | MCI_STATUS_AR)) + mce_snprintf(e->mcistatus_msg, "%s", + arstate[(e->status >> 55) & 3]); + +@@ -350,14 +347,14 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) + + /* Check if the error is at the memory controller */ + if (((e->status & 0xffff) >> 7) == 1) { +- unsigned corr_err_cnt; ++ unsigned int corr_err_cnt; + + corr_err_cnt = EXTRACT(e->status, 38, 52); + mce_snprintf(e->mc_location, "n_errors=%d", corr_err_cnt); + } + + if (test_prefix(11, (e->status & 0xffffL))) { +- switch(mce->cputype) { ++ switch (mce->cputype) { + case CPU_P6OLD: + p6old_decode_model(e); + break; +@@ -375,7 +372,7 @@ int parse_intel_event(struct ras_events *ras, struct mce_event *e) + break; + } + } +- switch(mce->cputype) { ++ switch (mce->cputype) { + case CPU_NEHALEM: + nehalem_decode_model(e); + break; +@@ -447,18 +444,18 @@ static int domsr(int cpu, int msr, int bit) + return -EINVAL; + } + } +- if (pread(fd, &data, sizeof data, msr) != sizeof data) { ++ if (pread(fd, &data, sizeof(data), msr) != sizeof(data)) { + log(ALL, LOG_ERR, + "Cannot read MSR_ERROR_CONTROL from %s\n", fpath); + return -EINVAL; + } + data |= bit; +- if (pwrite(fd, &data, sizeof data, msr) != sizeof data) { ++ if (pwrite(fd, &data, sizeof(data), msr) != sizeof(data)) { + log(ALL, LOG_ERR, + "Cannot write MSR_ERROR_CONTROL to %s\n", fpath); + return -EINVAL; + } +- if (pread(fd, &data, sizeof data, msr) != sizeof data) { ++ if (pread(fd, &data, sizeof(data), msr) != sizeof(data)) { + log(ALL, LOG_ERR, + "Cannot re-read MSR_ERROR_CONTROL from %s\n", fpath); + return -EINVAL; +@@ -472,7 +469,7 @@ static int domsr(int cpu, int msr, int bit) + return 0; + } + +-int set_intel_imc_log(enum cputype cputype, unsigned ncpus) ++int set_intel_imc_log(enum cputype cputype, unsigned int ncpus) + { + int cpu, msr, bit, rc; + +diff --git a/non-standard-ampere.c b/non-standard-ampere.c +index 05b5252..79e09a0 100644 +--- a/non-standard-ampere.c ++++ b/non-standard-ampere.c +@@ -31,6 +31,7 @@ static const char * const disp_payload0_err_reg_name[] = { + "MISC2:", + "MISC3:", + }; ++ + /*PCIe AER Error Payload Type 1*/ + static const char * const disp_payload1_err_reg_name[] = { + "Error Type:", +@@ -86,7 +87,6 @@ static const char * const err_cpm_sub_type[] = { + "ARMv8 Core 1", + }; + +- + static const char * const err_mcu_sub_type[] = { + "ERR0", + "ERR1", +@@ -155,8 +155,6 @@ static char *err_smmu_sub_type(int etype) + return "unknown error"; + } + +- +- + static const char * const err_pcie_aer_sub_type[] = { + "Root Port", + "Device", +@@ -173,7 +171,6 @@ static char *err_peci_rasdp_sub_type(int etype) + return "unknown error"; + } + +- + static const char * const err_ocm_sub_type[] = { + "ERR0", + "ERR1", +@@ -327,7 +324,7 @@ static const struct amp_ras_type_info amp_payload_error_type[] = { + + /*get the error type name*/ + static const char *oem_type_name(const struct amp_ras_type_info *info, +- uint8_t type_id) ++ uint8_t type_id) + { + const struct amp_ras_type_info *type = &info[0]; + +@@ -350,7 +347,7 @@ static const char *oem_subtype_name(const struct amp_ras_type_info *info, + + if (type->id != type_id) + continue; +- if (type->sub == NULL) ++ if (!type->sub) + return type->name; + if (sub_type_id >= type->sub_num) + return "unknown"; +@@ -477,7 +474,7 @@ static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder, + } + + static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, +- const char *name) ++ const char *name) + { + int rc; + +@@ -502,37 +499,37 @@ static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, + + /*save all Ampere Specific Error Payload type 0 to sqlite3 database*/ + static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, ++ const char *type_str, const char *subtype_str, + const struct amp_payload0_type_sec *err) + { +- if (ev_decoder != NULL) { ++ if (ev_decoder) { + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- AMP_PAYLOAD0_FIELD_TYPE, 0, type_str); ++ AMP_PAYLOAD0_FIELD_TYPE, 0, type_str); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- AMP_PAYLOAD0_FIELD_SUB_TYPE, 0, subtype_str); ++ AMP_PAYLOAD0_FIELD_SUB_TYPE, 0, subtype_str); + + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD0_FIELD_INS, INSTANCE(err->instance), NULL); ++ AMP_PAYLOAD0_FIELD_INS, INSTANCE(err->instance), NULL); + + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD0_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD0_FIELD_SOCKET_NUM, + SOCKET_NUM(err->instance), NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD0_FIELD_STATUS_REG, err->err_status, NULL); ++ AMP_PAYLOAD0_FIELD_STATUS_REG, err->err_status, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD0_FIELD_ADDR_REG, ++ AMP_PAYLOAD0_FIELD_ADDR_REG, + err->err_addr, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD0_FIELD_MISC0, ++ AMP_PAYLOAD0_FIELD_MISC0, + err->err_misc_0, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD0_FIELD_MISC1, ++ AMP_PAYLOAD0_FIELD_MISC1, + err->err_misc_1, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD0_FIELD_MISC2, ++ AMP_PAYLOAD0_FIELD_MISC2, + err->err_misc_2, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD0_FIELD_MISC3, ++ AMP_PAYLOAD0_FIELD_MISC3, + err->err_misc_3, NULL); + store_amp_err_data(ev_decoder, "amp_payload0_event_tab"); + } +@@ -540,10 +537,10 @@ static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, + + /*save all Ampere Specific Error Payload type 1 to sqlite3 database*/ + static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, ++ const char *type_str, const char *subtype_str, + const struct amp_payload1_type_sec *err) + { +- if (ev_decoder != NULL) { ++ if (ev_decoder) { + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, + AMP_PAYLOAD1_FIELD_TYPE, 0, type_str); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +@@ -590,45 +587,45 @@ static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, + + /*save all Ampere Specific Error Payload type 2 to sqlite3 database*/ + static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, +- const struct amp_payload2_type_sec *err) ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload2_type_sec *err) + { +- if (ev_decoder != NULL) { ++ if (ev_decoder) { + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- AMP_PAYLOAD2_FIELD_TYPE, 0, type_str); ++ AMP_PAYLOAD2_FIELD_TYPE, 0, type_str); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- AMP_PAYLOAD2_FIELD_SUB_TYPE, 0, subtype_str); ++ AMP_PAYLOAD2_FIELD_SUB_TYPE, 0, subtype_str); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_INS, INSTANCE(err->instance), NULL); ++ AMP_PAYLOAD2_FIELD_INS, INSTANCE(err->instance), NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD2_FIELD_SOCKET_NUM, + SOCKET_NUM(err->instance), NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_CE_REPORT_REG, ++ AMP_PAYLOAD2_FIELD_CE_REPORT_REG, + err->ce_register, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_CE_LOACATION, ++ AMP_PAYLOAD2_FIELD_CE_LOACATION, + err->ce_location, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_CE_ADDR, ++ AMP_PAYLOAD2_FIELD_CE_ADDR, + err->ce_addr, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_UE_REPORT_REG, ++ AMP_PAYLOAD2_FIELD_UE_REPORT_REG, + err->ue_register, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_UE_LOCATION, ++ AMP_PAYLOAD2_FIELD_UE_LOCATION, + err->ue_location, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_UE_ADDR, ++ AMP_PAYLOAD2_FIELD_UE_ADDR, + err->ue_addr, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD2_FIELD_RESERVED1, ++ AMP_PAYLOAD2_FIELD_RESERVED1, + err->reserved1, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD2_FIELD_RESERVED2, ++ AMP_PAYLOAD2_FIELD_RESERVED2, + err->reserved2, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD2_FIELD_RESERVED3, ++ AMP_PAYLOAD2_FIELD_RESERVED3, + err->reserved3, NULL); + store_amp_err_data(ev_decoder, "amp_payload2_event_tab"); + } +@@ -636,36 +633,36 @@ static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, + + /*save all Ampere Specific Error Payload type 3 to sqlite3 database*/ + static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, ++ const char *type_str, const char *subtype_str, + const struct amp_payload3_type_sec *err) + { +- if (ev_decoder != NULL) { ++ if (ev_decoder) { + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- AMP_PAYLOAD3_FIELD_TYPE, 0, type_str); ++ AMP_PAYLOAD3_FIELD_TYPE, 0, type_str); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- AMP_PAYLOAD3_FIELD_SUB_TYPE, 0, subtype_str); ++ AMP_PAYLOAD3_FIELD_SUB_TYPE, 0, subtype_str); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD3_FIELD_INS, INSTANCE(err->instance), NULL); ++ AMP_PAYLOAD3_FIELD_INS, INSTANCE(err->instance), NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD3_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD3_FIELD_SOCKET_NUM, + SOCKET_NUM(err->instance), NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, +- AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0, + err->fw_speci_data0, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1, + err->fw_speci_data1, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2, + err->fw_speci_data2, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3, + err->fw_speci_data3, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4, + err->fw_speci_data4, NULL); + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, +- AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5, + err->fw_speci_data5, NULL); + store_amp_err_data(ev_decoder, "amp_payload3_event_tab"); + } +@@ -680,29 +677,29 @@ static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder, + } + + static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, +- const struct amp_payload0_type_sec *err) ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload0_type_sec *err) + { + return 0; + } + + static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, +- const struct amp_payload1_type_sec *err) ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload1_type_sec *err) + { + return 0; + } + + static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, +- const struct amp_payload2_type_sec *err) ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload2_type_sec *err) + { + return 0; + } + + static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder, +- const char *type_str, const char *subtype_str, +- const struct amp_payload3_type_sec *err) ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload3_type_sec *err) + { + return 0; + } +@@ -716,7 +713,7 @@ static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, char *name) + /*decode ampere specific error payload type 0, the CPU's data is save*/ + /*to sqlite by ras-arm-handler, others are saved by this function.*/ + void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, +- struct trace_seq *s, ++ struct trace_seq *s, + const struct amp_payload0_type_sec *err) + { + char buf[AMP_PAYLOAD0_BUF_LEN]; +@@ -803,8 +800,8 @@ void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + + /*decode ampere specific error payload type 1 and save to sqlite db*/ + static void decode_amp_payload1_err_regs(struct ras_ns_ev_decoder *ev_decoder, +- struct trace_seq *s, +- const struct amp_payload1_type_sec *err) ++ struct trace_seq *s, ++ const struct amp_payload1_type_sec *err) + { + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; +@@ -887,8 +884,8 @@ static void decode_amp_payload1_err_regs(struct ras_ns_ev_decoder *ev_decoder, + + /*decode ampere specific error payload type 2 and save to sqlite db*/ + static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder, +- struct trace_seq *s, +- const struct amp_payload2_type_sec *err) ++ struct trace_seq *s, ++ const struct amp_payload2_type_sec *err) + { + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; +@@ -903,7 +900,7 @@ static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder, + subtype_str = err_peci_rasdp_sub_type(err->subtype); + else + subtype_str = oem_subtype_name(amp_payload_error_type, +- TYPE(err->type), err->subtype); ++ TYPE(err->type), err->subtype); + //display error type + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", type_str); +@@ -972,8 +969,8 @@ static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder, + + /*decode ampere specific error payload type 3 and save to sqlite db*/ + static void decode_amp_payload3_err_regs(struct ras_ns_ev_decoder *ev_decoder, +- struct trace_seq *s, +- const struct amp_payload3_type_sec *err) ++ struct trace_seq *s, ++ const struct amp_payload3_type_sec *err) + { + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; +@@ -989,7 +986,6 @@ static void decode_amp_payload3_err_regs(struct ras_ns_ev_decoder *ev_decoder, + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", type_str); + +- + //display error subtype + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", subtype_str); +@@ -1074,13 +1070,13 @@ static int decode_amp_oem_type_error(struct ras_events *ras, + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &db_tab) != SQLITE_OK) { + trace_seq_printf(s, +- "create sql %s fail\n", +- sqlite3_table_list[payload_type]); ++ "create sql %s fail\n", ++ sqlite3_table_list[payload_type]); + return -1; + } + } + record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, +- id, 0, event->timestamp); ++ id, 0, event->timestamp); + #endif + + if (payload_type == PAYLOAD_TYPE_0) { +diff --git a/non-standard-hisi_hip08.c b/non-standard-hisi_hip08.c +index 0899812..e482a7a 100644 +--- a/non-standard-hisi_hip08.c ++++ b/non-standard-hisi_hip08.c +@@ -453,7 +453,7 @@ static const char *oem_submodule_name(const struct hisi_module_info *info, + if (module->id != module_id) + continue; + +- if (module->sub == NULL) ++ if (!module->sub) + return module->name; + + if (sub_module_id >= module->sub_num) +@@ -675,7 +675,7 @@ static int decode_hip08_oem_type1_error(struct ras_events *ras, + struct ras_non_standard_event *event) + { + const struct hisi_oem_type1_err_sec *err = +- (struct hisi_oem_type1_err_sec*)event->error; ++ (struct hisi_oem_type1_err_sec *)event->error; + + if (err->val_bits == 0) { + trace_seq_printf(s, "%s: no valid error information\n", +diff --git a/non-standard-hisilicon.c b/non-standard-hisilicon.c +index 7296d28..e9ea5df 100644 +--- a/non-standard-hisilicon.c ++++ b/non-standard-hisilicon.c +@@ -88,10 +88,10 @@ struct hisi_event { + + #ifdef HAVE_SQLITE3 + void record_vendor_data(struct ras_ns_ev_decoder *ev_decoder, +- enum hisi_oem_data_type data_type, ++ enum hisi_oem_data_type data_type, + int id, int64_t data, const char *text) + { +- if (ev_decoder->stmt_dec_record == NULL) ++ if (!ev_decoder->stmt_dec_record) + return; + + switch (data_type) { +@@ -111,7 +111,7 @@ int step_vendor_data_tab(struct ras_ns_ev_decoder *ev_decoder, const char *name) + { + int rc; + +- if (ev_decoder->stmt_dec_record == NULL) ++ if (!ev_decoder->stmt_dec_record) + return 0; + + rc = sqlite3_step(ev_decoder->stmt_dec_record); +@@ -171,13 +171,13 @@ static const struct db_table_descriptor hisi_common_section_tab = { + }; + #endif + +-static const char* soc_desc[] = { ++static const char *soc_desc[] = { + "Kunpeng916", + "Kunpeng920", + "Kunpeng930", + }; + +-static const char* module_name[] = { ++static const char *module_name[] = { + "MN", + "PLL", + "SLLC", +@@ -221,9 +221,9 @@ static const char* module_name[] = { + "HBMC", + }; + +-static const char* get_soc_desc(uint8_t soc_id) ++static const char *get_soc_desc(uint8_t soc_id) + { +- if (soc_id >= sizeof(soc_desc)/sizeof(char *)) ++ if (soc_id >= sizeof(soc_desc) / sizeof(char *)) + return "unknown"; + + return soc_desc[soc_id]; +@@ -232,7 +232,7 @@ static const char* get_soc_desc(uint8_t soc_id) + static void decode_module(struct ras_ns_ev_decoder *ev_decoder, + struct hisi_event *event, uint8_t module_id) + { +- if (module_id >= sizeof(module_name)/sizeof(char *)) { ++ if (module_id >= sizeof(module_name) / sizeof(char *)) { + HISI_SNPRINTF(event->error_msg, "module=unknown(id=%hhu) ", module_id); + record_vendor_data(ev_decoder, HISI_OEM_DATA_TYPE_TEXT, + HISI_COMMON_FIELD_MODULE_ID, +@@ -246,7 +246,7 @@ static void decode_module(struct ras_ns_ev_decoder *ev_decoder, + } + + static void decode_hisi_common_section_hdr(struct ras_ns_ev_decoder *ev_decoder, +- const struct hisi_common_error_section *err, ++ const struct hisi_common_error_section *err, + struct hisi_event *event) + { + HISI_SNPRINTF(event->error_msg, "[ table_version=%hhu", err->version); +diff --git a/non-standard-yitian.c b/non-standard-yitian.c +index 4c30514..daadb95 100644 +--- a/non-standard-yitian.c ++++ b/non-standard-yitian.c +@@ -86,25 +86,25 @@ static const struct db_table_descriptor yitian_ddr_payload_section_tab = { + }; + + int record_yitian_ddr_reg_dump_event(struct ras_ns_ev_decoder *ev_decoder, +- struct ras_yitian_ddr_payload_event *ev) ++ struct ras_yitian_ddr_payload_event *ev) + { + int rc; + struct sqlite3_stmt *stmt = ev_decoder->stmt_dec_record; + + log(TERM, LOG_INFO, "yitian_ddr_reg_dump_event store: %p\n", stmt); + +- sqlite3_bind_text (stmt, 1, ev->timestamp, -1, NULL); +- sqlite3_bind_int64 (stmt, 2, ev->address); +- sqlite3_bind_text (stmt, 3, ev->reg_msg, -1, NULL); ++ sqlite3_bind_text(stmt, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_int64(stmt, 2, ev->address); ++ sqlite3_bind_text(stmt, 3, ev->reg_msg, -1, NULL); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, +- "Failed to do yitian_ddr_reg_dump_event step on sqlite: error = %d\n", rc); ++ "Failed to do yitian_ddr_reg_dump_event step on sqlite: error = %d\n", rc); + rc = sqlite3_reset(stmt); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, +- "Failed reset yitian_ddr_reg_dump_event on sqlite: error = %d\n", rc); ++ "Failed reset yitian_ddr_reg_dump_event on sqlite: error = %d\n", rc); + log(TERM, LOG_INFO, "register inserted at db\n"); + + return rc; +@@ -112,7 +112,7 @@ int record_yitian_ddr_reg_dump_event(struct ras_ns_ev_decoder *ev_decoder, + #endif + + static const char *oem_type_name(const struct yitian_ras_type_info *info, +- uint8_t type_id) ++ uint8_t type_id) + { + const struct yitian_ras_type_info *type = &info[0]; + +@@ -134,7 +134,7 @@ static const char *oem_subtype_name(const struct yitian_ras_type_info *info, + + if (type->id != type_id) + continue; +- if (type->sub == NULL) ++ if (!type->sub) + return type->name; + if (sub_type_id >= type->sub_num) + return "unknown"; +@@ -144,7 +144,7 @@ static const char *oem_subtype_name(const struct yitian_ras_type_info *info, + } + + void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, +- struct trace_seq *s, ++ struct trace_seq *s, + const struct yitian_ddr_payload_type_sec *err, + struct ras_events *ras) + { +@@ -168,7 +168,7 @@ void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, + tm = localtime(&now); + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), +- "%Y-%m-%d %H:%M:%S %z", tm); ++ "%Y-%m-%d %H:%M:%S %z", tm); + //display error type + p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]); + p += snprintf(p, end - p, " %s,", type_str); +@@ -204,18 +204,17 @@ void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, + #ifdef HAVE_SQLITE3 + record_yitian_ddr_reg_dump_event(ev_decoder, &ev); + #endif +- + } + + static int add_yitian_common_table(struct ras_events *ras, +- struct ras_ns_ev_decoder *ev_decoder) ++ struct ras_ns_ev_decoder *ev_decoder) + { + #ifdef HAVE_SQLITE3 + if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, +- &yitian_ddr_payload_section_tab) != SQLITE_OK) { ++ &yitian_ddr_payload_section_tab) != SQLITE_OK) { + log(TERM, LOG_WARNING, +- "Failed to create sql yitian_ddr_payload_section_tab\n"); ++ "Failed to create sql yitian_ddr_payload_section_tab\n"); + return -1; + } + } +@@ -253,6 +252,7 @@ struct ras_ns_ev_decoder yitian_ns_oem_decoder[] = { + static void __attribute__((constructor)) yitian_ns_init(void) + { + int i; ++ + for (i = 0; i < ARRAY_SIZE(yitian_ns_oem_decoder); i++) + register_ns_ev_decoder(&yitian_ns_oem_decoder[i]); + } +diff --git a/queue.c b/queue.c +index 65b6fb8..a90ed6a 100644 +--- a/queue.c ++++ b/queue.c +@@ -29,7 +29,7 @@ struct link_queue *init_queue(void) + struct link_queue *queue = NULL; + + queue = (struct link_queue *)malloc(sizeof(struct link_queue)); +- if (queue == NULL) { ++ if (!queue) { + log(TERM, LOG_ERR, "Failed to allocate memory for queue.\n"); + return NULL; + } +@@ -43,13 +43,13 @@ struct link_queue *init_queue(void) + + void clear_queue(struct link_queue *queue) + { +- if (queue == NULL) ++ if (!queue) + return; + + struct queue_node *node = queue->head; + struct queue_node *tmp = NULL; + +- while (node != NULL) { ++ while (node) { + tmp = node; + node = node->next; + free(tmp); +@@ -72,7 +72,7 @@ void free_queue(struct link_queue *queue) + void push(struct link_queue *queue, struct queue_node *node) + { + /* there is no element in the queue */ +- if (queue->head == NULL) ++ if (!queue->head) + queue->head = node; + else + queue->tail->next = node; +@@ -85,7 +85,7 @@ int pop(struct link_queue *queue) + { + struct queue_node *tmp = NULL; + +- if (queue == NULL || is_empty(queue)) ++ if (!queue || is_empty(queue)) + return -1; + + tmp = queue->head; +@@ -98,7 +98,7 @@ int pop(struct link_queue *queue) + + struct queue_node *front(struct link_queue *queue) + { +- if (queue == NULL) ++ if (!queue) + return NULL; + + return queue->head; +@@ -109,7 +109,7 @@ struct queue_node *node_create(time_t time, unsigned int value) + struct queue_node *node = NULL; + + node = (struct queue_node *)malloc(sizeof(struct queue_node)); +- if (node != NULL) { ++ if (node) { + node->time = time; + node->value = value; + node->next = NULL; +diff --git a/ras-aer-handler.c b/ras-aer-handler.c +index d6898e0..a867ae4 100644 +--- a/ras-aer-handler.c ++++ b/ras-aer-handler.c +@@ -82,7 +82,7 @@ int ras_aer_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +@@ -177,7 +177,7 @@ int ras_aer_event_handler(struct trace_seq *s, + sel_data[4] = (((dev & 0x1f) << 3) | (fn & 0x7)); + + sprintf(ipmi_add_sel, +- "ipmitool raw 0x0a 0x44 0x00 0x00 0xc0 0x00 0x00 0x00 0x00 0x3a 0xcd 0x00 0xc0 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x", ++ "ipmitool raw 0x0a 0x44 0x00 0x00 0xc0 0x00 0x00 0x00 0x00 0x3a 0xcd 0x00 0xc0 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x", + sel_data[0], sel_data[1], sel_data[2], sel_data[3], sel_data[4]); + + system(ipmi_add_sel); +diff --git a/ras-arm-handler.c b/ras-arm-handler.c +index 731176d..c7e9fd5 100644 +--- a/ras-arm-handler.c ++++ b/ras-arm-handler.c +@@ -29,7 +29,7 @@ + #define BIT2 2 + + void display_raw_data(struct trace_seq *s, +- const uint8_t *buf, ++ const uint8_t *buf, + uint32_t datalen) + { + int i = 0, line_count = 0; +@@ -72,7 +72,7 @@ static int count_errors(struct ras_arm_event *ev, int sev) + + if (ev->pei_len % err_info_size != 0) { + log(TERM, LOG_ERR, +- "The event data does not match to the ARM Processor Error Information Structure\n"); ++ "The event data does not match to the ARM Processor Error Information Structure\n"); + return num; + } + num_pei = ev->pei_len / err_info_size; +@@ -172,7 +172,7 @@ int ras_arm_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +diff --git a/ras-cpu-isolation.c b/ras-cpu-isolation.c +index 90633fd..8af31e9 100644 +--- a/ras-cpu-isolation.c ++++ b/ras-cpu-isolation.c +@@ -120,7 +120,7 @@ static int init_cpu_info(unsigned int cpus) + cpu_infos = (struct cpu_info *)malloc(sizeof(*cpu_infos) * cpus); + if (!cpu_infos) { + log(TERM, LOG_ERR, +- "Failed to allocate memory for cpu infos in %s.\n", __func__); ++ "Failed to allocate memory for cpu infos in %s.\n", __func__); + return -1; + } + +@@ -130,9 +130,9 @@ static int init_cpu_info(unsigned int cpus) + cpu_infos[i].state = get_cpu_status(i); + cpu_infos[i].ce_queue = init_queue(); + +- if (cpu_infos[i].ce_queue == NULL) { ++ if (!cpu_infos[i].ce_queue) { + log(TERM, LOG_ERR, +- "Failed to allocate memory for cpu ce queue in %s.\n", __func__); ++ "Failed to allocate memory for cpu ce queue in %s.\n", __func__); + return -1; + } + } +@@ -147,7 +147,7 @@ static void check_config(struct isolation_param *config) + { + if (config->value > config->limit) { + log(TERM, LOG_WARNING, "Value: %lu exceed limit: %lu, set to limit\n", +- config->value, config->limit); ++ config->value, config->limit); + config->value = config->limit; + } + } +@@ -173,7 +173,7 @@ static int parse_ul_config(struct isolation_param *config, char *env, unsigned l + for (int i = 0; i < env_size; ++i) { + if (isdigit(env[i])) { + if (*value > ULONG_MAX / DEC_CHECK || +- (*value == ULONG_MAX / DEC_CHECK && env[i] - '0' > LAST_BIT_OF_UL)) { ++ (*value == ULONG_MAX / DEC_CHECK && env[i] - '0' > LAST_BIT_OF_UL)) { + log(TERM, LOG_ERR, "%s is out of range: %lu\n", env, ULONG_MAX); + return -1; + } +@@ -208,7 +208,7 @@ static void init_config(struct isolation_param *config) + + if (parse_ul_config(config, env, &value) < 0) { + log(TERM, LOG_ERR, "Invalid %s: %s! Use default value %lu.\n", +- config->name, env, config->value); ++ config->name, env, config->value); + return; + } + +@@ -220,7 +220,7 @@ static int check_config_status(void) + { + char *env = getenv("CPU_ISOLATION_ENABLE"); + +- if (env == NULL || strcasecmp(env, "yes")) ++ if (!env || strcasecmp(env, "yes")) + return -1; + + return 0; +@@ -295,12 +295,12 @@ static int do_ce_handler(unsigned int cpu) + cpu_infos[cpu].ce_nums -= tmp; + } + log(TERM, LOG_INFO, +- "Current number of Corrected Errors in cpu%d in the cycle is %lu\n", ++ "Current number of Corrected Errors in cpu%d in the cycle is %lu\n", + cpu, cpu_infos[cpu].ce_nums); + + if (cpu_infos[cpu].ce_nums >= threshold.value) { + log(TERM, LOG_INFO, +- "Corrected Errors exceeded threshold %lu, try to offline cpu%u\n", ++ "Corrected Errors exceeded threshold %lu, try to offline cpu%u\n", + threshold.value, cpu); + return do_cpu_offline(cpu); + } +@@ -341,7 +341,7 @@ static void record_error_info(unsigned int cpu, struct error_info *err_info) + { + struct queue_node *node = node_create(err_info->time, err_info->nums); + +- if (node == NULL) { ++ if (!node) { + log(TERM, LOG_ERR, "Fail to allocate memory for queue node\n"); + return; + } +@@ -366,7 +366,7 @@ void ras_record_cpu_error(struct error_info *err_info, int cpu) + + if (cpu >= ncores || cpu < 0) { + log(TERM, LOG_ERR, +- "The current cpu %d has exceed the total number of cpu:%u\n", cpu, ncores); ++ "The current cpu %d has exceed the total number of cpu:%u\n", cpu, ncores); + return; + } + +@@ -385,7 +385,7 @@ void ras_record_cpu_error(struct error_info *err_info, int cpu) + */ + if (ncores - sysconf(_SC_NPROCESSORS_ONLN) >= cpu_limit.value) { + log(TERM, LOG_WARNING, +- "Offlined cpus have exceeded limit: %lu, choose to do nothing\n", ++ "Offlined cpus have exceeded limit: %lu, choose to do nothing\n", + cpu_limit.value); + return; + } +@@ -395,11 +395,11 @@ void ras_record_cpu_error(struct error_info *err_info, int cpu) + log(TERM, LOG_WARNING, "Doing nothing in the cpu%d\n", cpu); + else if (ret == HANDLE_SUCCEED) { + log(TERM, LOG_INFO, "Offline cpu%d succeed, the state is %s\n", +- cpu, cpu_state[cpu_infos[cpu].state]); ++ cpu, cpu_state[cpu_infos[cpu].state]); + clear_queue(cpu_infos[cpu].ce_queue); + cpu_infos[cpu].ce_nums = 0; + cpu_infos[cpu].uce_nums = 0; + } else + log(TERM, LOG_WARNING, "Offline cpu%d fail, the state is %s\n", +- cpu, cpu_state[cpu_infos[cpu].state]); ++ cpu, cpu_state[cpu_infos[cpu].state]); + } +diff --git a/ras-devlink-handler.c b/ras-devlink-handler.c +index e52d66e..b19ccaa 100644 +--- a/ras-devlink-handler.c ++++ b/ras-devlink-handler.c +@@ -38,7 +38,7 @@ int ras_net_xmit_timeout_handler(struct trace_seq *s, + struct devlink_event ev; + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +@@ -78,7 +78,6 @@ int ras_net_xmit_timeout_handler(struct trace_seq *s, + + free(ev.msg); + return 0; +- + } + + int ras_devlink_event_handler(struct trace_seq *s, +@@ -104,7 +103,7 @@ int ras_devlink_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +diff --git a/ras-diskerror-handler.c b/ras-diskerror-handler.c +index b46f859..618afdb 100644 +--- a/ras-diskerror-handler.c ++++ b/ras-diskerror-handler.c +@@ -30,7 +30,6 @@ + #include "ras-logger.h" + #include "ras-report.h" + +- + static const struct { + int error; + const char *name; +@@ -82,7 +81,7 @@ int ras_diskerror_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +diff --git a/ras-events.c b/ras-events.c +index 2cc54b3..c83b8de 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -58,7 +58,7 @@ + #define ENDIAN KBUFFER_ENDIAN_BIG + #endif + +-extern char* choices_disable; ++extern char *choices_disable; + + static const struct event_trigger event_triggers[] = { + { "mc_event", &mc_event_trigger_setup }, +@@ -71,7 +71,7 @@ static int get_debugfs_dir(char *tracing_dir, size_t len) + char line[MAX_PATH + 1 + 256]; + char *p, *type, *dir; + +- fp = fopen("/proc/mounts","r"); ++ fp = fopen("/proc/mounts", "r"); + if (!fp) { + log(ALL, LOG_INFO, "Can't open /proc/mounts"); + return errno; +@@ -99,7 +99,7 @@ static int get_debugfs_dir(char *tracing_dir, size_t len) + tracing_dir[len - 1] = '\0'; + return 0; + } +- } while(1); ++ } while (1); + + fclose(fp); + log(ALL, LOG_INFO, "Can't find debugfs\n"); +@@ -144,7 +144,7 @@ static int get_tracing_dir(struct ras_events *ras) + strcat(ras->tracing, "/tracing"); + if (has_instances) { + strcat(ras->tracing, "/instances/" TOOL_NAME); +- rc = mkdir(ras->tracing, S_IRWXU); ++ rc = mkdir(ras->tracing, 0700); + if (rc < 0 && errno != EEXIST) { + log(ALL, LOG_INFO, + "Unable to create " TOOL_NAME " instance at %s\n", +@@ -155,13 +155,14 @@ static int get_tracing_dir(struct ras_events *ras) + return 0; + } + +-static int is_disabled_event(char *group, char *event) { ++static int is_disabled_event(char *group, char *event) ++{ + char ras_event_name[MAX_PATH + 1]; + + snprintf(ras_event_name, sizeof(ras_event_name), "%s:%s", +- group, event); ++ group, event); + +- if (choices_disable != NULL && strlen(choices_disable) != 0 && strstr(choices_disable, ras_event_name)) { ++ if (choices_disable && strlen(choices_disable) != 0 && strstr(choices_disable, ras_event_name)) { + return 1; + } + return 0; +@@ -175,6 +176,7 @@ static int __toggle_ras_mc_event(struct ras_events *ras, + { + int fd, rc; + char fname[MAX_PATH + 1]; ++ + enable = is_disabled_event(group, event) ? 0 : 1; + + snprintf(fname, sizeof(fname), "%s%s:%s\n", +@@ -188,7 +190,7 @@ static int __toggle_ras_mc_event(struct ras_events *ras, + return errno; + } + +- rc = write(fd, fname,strlen(fname)); ++ rc = write(fd, fname, strlen(fname)); + if (rc < 0) { + log(ALL, LOG_WARNING, "Can't write to set_event\n"); + close(fd); +@@ -330,7 +332,6 @@ static int get_pagesize(struct ras_events *ras, struct pevent *pevent) + error: + close(fd); + return page_size; +- + } + + static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf, +@@ -411,7 +412,7 @@ static int set_buffer_percent(struct ras_events *ras, int percent) + } + + static int read_ras_event_all_cpus(struct pthread_data *pdata, +- unsigned n_cpus) ++ unsigned int n_cpus) + { + ssize_t size; + unsigned long long time_stamp; +@@ -462,7 +463,7 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + + /* FIXME: use select to open for all CPUs */ + snprintf(pipe_raw, sizeof(pipe_raw), +- "per_cpu/cpu%d/trace_pipe_raw", i); ++ "per_cpu/cpu%d/trace_pipe_raw", i); + + fds[i].fd = open_trace(pdata[0].ras, pipe_raw, O_RDONLY); + if (fds[i].fd < 0) { +@@ -512,7 +513,7 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + fdsiginfo.ssi_signo == SIGTERM || + fdsiginfo.ssi_signo == SIGHUP || + fdsiginfo.ssi_signo == SIGQUIT) { +- log(TERM, LOG_INFO, "Recevied signal=%d\n", ++ log(TERM, LOG_INFO, "Received signal=%d\n", + fdsiginfo.ssi_signo); + goto cleanup; + } else { +@@ -717,7 +718,7 @@ static int select_tracing_timestamp(struct ras_events *ras) + int fd, rc; + time_t uptime, now; + size_t size; +- unsigned j1; ++ unsigned int j1; + char buf[4096]; + + /* Check if uptime is supported (kernel 3.10-rc1 or upper) */ +@@ -850,7 +851,7 @@ static int add_event_handler(struct ras_events *ras, struct pevent *pevent, + + if (is_disabled_event(group, event)) { + log(ALL, LOG_INFO, "Disabled %s:%s tracing from config\n", +- group, event); ++ group, event); + return -EINVAL; + } + +@@ -1043,7 +1044,6 @@ int handle_ras_events(int record_events) + if (!data) + goto err; + +- + for (i = 0; i < cpus; i++) { + data[i].ras = ras; + data[i].cpu = i; +@@ -1058,14 +1058,14 @@ int handle_ras_events(int record_events) + } + + log(SYSLOG, LOG_INFO, +- "Opening one thread per cpu (%d threads)\n", cpus); ++ "Opening one thread per cpu (%d threads)\n", cpus); + for (i = 0; i < cpus; i++) { + rc = pthread_create(&data[i].thread, NULL, +- handle_ras_events_cpu, ++ handle_ras_events_cpu, + (void *)&data[i]); + if (rc) { + log(SYSLOG, LOG_INFO, +- "Failed to create thread for cpu %d. Aborting.\n", ++ "Failed to create thread for cpu %d. Aborting.\n", + i); + while (--i) + pthread_cancel(data[i].thread); +diff --git a/ras-extlog-handler.c b/ras-extlog-handler.c +index 1834687..b40160a 100644 +--- a/ras-extlog-handler.c ++++ b/ras-extlog-handler.c +@@ -139,7 +139,7 @@ static char *err_cper_data(const char *c) + p += sprintf(p, "card_handle: %d ", cpd->mem_array_handle); + if (cpd->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) + p += sprintf(p, "module_handle: %d ", cpd->mem_dev_handle); +- p += sprintf(p-1, ")"); ++ p += sprintf(p - 1, ")"); + + return buf; + } +@@ -149,10 +149,10 @@ static char *uuid_le(const char *uu) + static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; + char *p = uuid; + int i; +- static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; ++ static const unsigned char le[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; + + for (i = 0; i < 16; i++) { +- p += sprintf(p, "%.2x", (unsigned char) uu[le[i]]); ++ p += sprintf(p, "%.2x", (unsigned char)uu[le[i]]); + switch (i) { + case 3: + case 5: +@@ -168,14 +168,13 @@ static char *uuid_le(const char *uu) + return uuid; + } + +- + static void report_extlog_mem_event(struct ras_events *ras, + struct pevent_record *record, + struct trace_seq *s, + struct ras_extlog_event *ev) + { + trace_seq_printf(s, "%d %s error: %s physical addr: 0x%llx mask: 0x%llx%s %s %s", +- ev->error_seq, err_severity(ev->severity), ++ ev->error_seq, err_severity(ev->severity), + err_type(ev->etype), ev->address, + err_mask(ev->pa_mask_lsb), + err_cper_data(ev->cper_data), +@@ -204,7 +203,7 @@ int ras_extlog_mem_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +diff --git a/ras-mc-handler.c b/ras-mc-handler.c +index b62dfb6..7a4fa3c 100644 +--- a/ras-mc-handler.c ++++ b/ras-mc-handler.c +@@ -134,7 +134,7 @@ int ras_mc_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +@@ -207,22 +207,22 @@ int ras_mc_event_handler(struct trace_seq *s, + if (pevent_get_field_val(s, event, "top_layer", record, &val, 1) < 0) + goto parse_error; + parsed_fields++; +- ev.top_layer = (signed char) val; ++ ev.top_layer = (signed char)val; + + if (pevent_get_field_val(s, event, "middle_layer", record, &val, 1) < 0) + goto parse_error; + parsed_fields++; +- ev.middle_layer = (signed char) val; ++ ev.middle_layer = (signed char)val; + + if (pevent_get_field_val(s, event, "lower_layer", record, &val, 1) < 0) + goto parse_error; + parsed_fields++; +- ev.lower_layer = (signed char) val; ++ ev.lower_layer = (signed char)val; + + if (ev.top_layer >= 0 || ev.middle_layer >= 0 || ev.lower_layer >= 0) { + if (ev.lower_layer >= 0) + trace_seq_printf(s, " location: %d:%d:%d", +- ev.top_layer, ev.middle_layer, ev.lower_layer); ++ ev.top_layer, ev.middle_layer, ev.lower_layer); + else if (ev.middle_layer >= 0) + trace_seq_printf(s, " location: %d:%d", + ev.top_layer, ev.middle_layer); +diff --git a/ras-mce-handler.c b/ras-mce-handler.c +index 370e68a..f55c732 100644 +--- a/ras-mce-handler.c ++++ b/ras-mce-handler.c +@@ -114,11 +114,11 @@ static enum cputype select_intel_cputype(struct mce_priv *mce) + else if (mce->model == 0x6a) + return CPU_ICELAKE_XEON; + else if (mce->model == 0x6c) +- return CPU_ICELAKE_DE; ++ return CPU_ICELAKE_DE; + else if (mce->model == 0x86) +- return CPU_TREMONT_D; ++ return CPU_TREMONT_D; + else if (mce->model == 0x8f) +- return CPU_SAPPHIRERAPIDS; ++ return CPU_SAPPHIRERAPIDS; + else if (mce->model == 0xcf) + return CPU_EMERALDRAPIDS; + +@@ -161,7 +161,7 @@ static int detect_cpu(struct mce_priv *mce) + mce->mhz = 0; + mce->vendor[0] = '\0'; + +- f = fopen("/proc/cpuinfo","r"); ++ f = fopen("/proc/cpuinfo", "r"); + if (!f) { + log(ALL, LOG_INFO, "Can't open /proc/cpuinfo\n"); + return errno; +@@ -169,7 +169,7 @@ static int detect_cpu(struct mce_priv *mce) + + while (seen != CPU_ALL && getdelim(&line, &linelen, '\n', f) > 0) { + if (sscanf(line, "vendor_id : %63[^\n]", +- (char *)&mce->vendor) == 1) ++ (char *)&mce->vendor) == 1) + seen |= CPU_VENDOR; + else if (sscanf(line, "cpu family : %d", &mce->family) == 1) + seen |= CPU_FAMILY; +@@ -189,7 +189,7 @@ static int detect_cpu(struct mce_priv *mce) + + if (seen != CPU_ALL) { + log(ALL, LOG_INFO, "Can't parse /proc/cpuinfo: missing%s%s%s%s%s\n", +- (seen & CPU_VENDOR) ? "" : " [vendor_id]", ++ (seen & CPU_VENDOR) ? "" : " [vendor_id]", + (seen & CPU_FAMILY) ? "" : " [cpu family]", + (seen & CPU_MODEL) ? "" : " [model]", + (seen & CPU_MHZ) ? "" : " [cpu MHz]", +@@ -215,12 +215,12 @@ static int detect_cpu(struct mce_priv *mce) + ret = EINVAL; + } + goto ret; +- } else if (!strcmp(mce->vendor,"HygonGenuine")) { ++ } else if (!strcmp(mce->vendor, "HygonGenuine")) { + if (mce->family == 24) { + mce->cputype = CPU_DHYANA; + } + goto ret; +- } else if (!strcmp(mce->vendor,"GenuineIntel")) { ++ } else if (!strcmp(mce->vendor, "GenuineIntel")) { + mce->cputype = select_intel_cputype(mce); + } else { + ret = EINVAL; +@@ -233,7 +233,7 @@ ret: + return ret; + } + +-int register_mce_handler(struct ras_events *ras, unsigned ncpus) ++int register_mce_handler(struct ras_events *ras, unsigned int ncpus) + { + int rc; + struct mce_priv *mce; +@@ -249,8 +249,8 @@ int register_mce_handler(struct ras_events *ras, unsigned ncpus) + rc = detect_cpu(mce); + if (rc) { + if (mce->processor_flags) +- free (mce->processor_flags); +- free (ras->mce_priv); ++ free(mce->processor_flags); ++ free(ras->mce_priv); + ras->mce_priv = NULL; + return (rc); + } +@@ -290,7 +290,7 @@ static void report_mce_event(struct ras_events *ras, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +@@ -459,7 +459,7 @@ int ras_offline_mce_event(struct ras_mc_offline_event *event) + mce->ipid = event->ipid; + if (!mce->ipid || !mce->status) { + log(TERM, LOG_ERR, "%s MSR required.\n", +- mce->ipid ? "Status" : "Ipid"); ++ mce->ipid ? "Status" : "Ipid"); + rc = -EINVAL; + goto free_mce; + } +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index d6e83a9..855c08c 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -167,7 +167,6 @@ static const char *get_action_result(int result) + return "unknown"; + } + +- + int ras_memory_failure_event_handler(struct trace_seq *s, + struct pevent_record *record, + struct event_format *event, void *context) +@@ -188,7 +187,7 @@ int ras_memory_failure_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c +index 3a4e300..968bd56 100644 +--- a/ras-non-standard-handler.c ++++ b/ras-non-standard-handler.c +@@ -24,8 +24,9 @@ + + static struct ras_ns_ev_decoder *ras_ns_ev_dec_list; + +-void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) { +- trace_seq_printf(s, "%02x%02x%02x%02x", buf[index+3], buf[index+2], buf[index+1], buf[index]); ++void print_le_hex(struct trace_seq *s, const uint8_t *buf, int index) ++{ ++ trace_seq_printf(s, "%02x%02x%02x%02x", buf[index + 3], buf[index + 2], buf[index + 1], buf[index]); + } + + static char *uuid_le(const char *uu) +@@ -33,10 +34,10 @@ static char *uuid_le(const char *uu) + static char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; + char *p = uuid; + int i; +- static const unsigned char le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; ++ static const unsigned char le[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15}; + + for (i = 0; i < 16; i++) { +- p += sprintf(p, "%.2x", (unsigned char) uu[le[i]]); ++ p += sprintf(p, "%.2x", (unsigned char)uu[le[i]]); + switch (i) { + case 3: + case 5: +@@ -183,7 +184,7 @@ int ras_non_standard_event_handler(struct trace_seq *s, + */ + + if (ras->use_uptime) +- now = record->ts/user_hz + ras->uptime_diff; ++ now = record->ts / user_hz + ras->uptime_diff; + else + now = time(NULL); + +diff --git a/ras-page-isolation.c b/ras-page-isolation.c +index caa8c31..89f8c15 100644 +--- a/ras-page-isolation.c ++++ b/ras-page-isolation.c +@@ -116,7 +116,7 @@ static void parse_isolation_env(struct isolation *config) + int unit_matched = 0; + unsigned long value, tmp; + +- /* check if env is vaild */ ++ /* check if env is valid */ + if (env && strlen(env)) { + /* All the character before unit must be digit */ + for (i = 0; i < strlen(env) - 1; i++) { +@@ -125,7 +125,7 @@ static void parse_isolation_env(struct isolation *config) + } + if (sscanf(env, "%lu", &value) < 1 || !value) + goto parse; +- /* check if the unit is vaild */ ++ /* check if the unit is valid */ + unit = env + strlen(env) - 1; + /* no unit, all the character are value character */ + if (isdigit(*unit)) { +@@ -151,7 +151,7 @@ parse: + config->unit = unit; + } else { + log(TERM, LOG_INFO, "Improper %s, set to default %s.\n", +- config->name, config->env); ++ config->name, config->env); + } + + /* if env value string is greater than ulong_max, truncate the last digit */ +@@ -177,10 +177,11 @@ static void parse_env_string(struct isolation *config, char *str, unsigned int s + + if (config->overflow) { + /* when overflow, use basic unit */ +- for (i = 0; config->units[i].name; i++) ; +- snprintf(str, size, "%lu%s", config->val, config->units[i-1].name); ++ for (i = 0; config->units[i].name; i++) ++ ; ++ snprintf(str, size, "%lu%s", config->val, config->units[i - 1].name); + log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n", +- config->name, config->env); ++ config->name, config->env); + } else { + snprintf(str, size, "%s%s", config->env, config->unit); + } +@@ -202,7 +203,7 @@ static void page_isolation_init(void) + parse_env_string(&threshold, threshold_string, sizeof(threshold_string)); + parse_env_string(&cycle, cycle_string, sizeof(cycle_string)); + log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n", +- threshold_string, cycle_string); ++ threshold_string, cycle_string); + } + + void ras_page_account_init(void) +@@ -239,7 +240,7 @@ static void page_offline(struct page_record *pr) + /* Offlining page is not required */ + if (offline <= OFFLINE_ACCOUNT) { + log(TERM, LOG_INFO, "PAGE_CE_ACTION=%s, ignore to offline page at %#llx\n", +- offline_choice[offline].name, addr); ++ offline_choice[offline].name, addr); + return; + } + +@@ -264,7 +265,7 @@ static void page_offline(struct page_record *pr) + addr, page_state[pr->offlined]); + } + +-static void page_record(struct page_record *pr, unsigned count, time_t time) ++static void page_record(struct page_record *pr, unsigned int count, time_t time) + { + unsigned long period = time - pr->start; + unsigned long tolerate; +@@ -328,7 +329,7 @@ static struct page_record *page_lookup_insert(unsigned long long addr) + return find; + } + +-void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) ++void ras_record_page_error(unsigned long long addr, unsigned int count, time_t time) + { + struct page_record *pr = NULL; + +diff --git a/ras-record.c b/ras-record.c +index adb00ca..0409099 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -42,20 +42,20 @@ + */ + + static const struct db_fields mc_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="err_count", .type="INTEGER" }, +- { .name="err_type", .type="TEXT" }, +- { .name="err_msg", .type="TEXT" }, +- { .name="label", .type="TEXT" }, +- { .name="mc", .type="INTEGER" }, +- { .name="top_layer", .type="INTEGER" }, +- { .name="middle_layer", .type="INTEGER" }, +- { .name="lower_layer", .type="INTEGER" }, +- { .name="address", .type="INTEGER" }, +- { .name="grain", .type="INTEGER" }, +- { .name="syndrome", .type="INTEGER" }, +- { .name="driver_detail", .type="TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "err_count", .type = "INTEGER" }, ++ { .name = "err_type", .type = "TEXT" }, ++ { .name = "err_msg", .type = "TEXT" }, ++ { .name = "label", .type = "TEXT" }, ++ { .name = "mc", .type = "INTEGER" }, ++ { .name = "top_layer", .type = "INTEGER" }, ++ { .name = "middle_layer", .type = "INTEGER" }, ++ { .name = "lower_layer", .type = "INTEGER" }, ++ { .name = "address", .type = "INTEGER" }, ++ { .name = "grain", .type = "INTEGER" }, ++ { .name = "syndrome", .type = "INTEGER" }, ++ { .name = "driver_detail", .type = "TEXT" }, + }; + + static const struct db_table_descriptor mc_event_tab = { +@@ -82,9 +82,9 @@ int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) + sqlite3_bind_int (priv->stmt_mc_event, 7, ev->top_layer); + sqlite3_bind_int (priv->stmt_mc_event, 8, ev->middle_layer); + sqlite3_bind_int (priv->stmt_mc_event, 9, ev->lower_layer); +- sqlite3_bind_int64 (priv->stmt_mc_event, 10, ev->address); +- sqlite3_bind_int64 (priv->stmt_mc_event, 11, ev->grain); +- sqlite3_bind_int64 (priv->stmt_mc_event, 12, ev->syndrome); ++ sqlite3_bind_int64(priv->stmt_mc_event, 10, ev->address); ++ sqlite3_bind_int64(priv->stmt_mc_event, 11, ev->grain); ++ sqlite3_bind_int64(priv->stmt_mc_event, 12, ev->syndrome); + sqlite3_bind_text(priv->stmt_mc_event, 13, ev->driver_detail, -1, NULL); + rc = sqlite3_step(priv->stmt_mc_event); + if (rc != SQLITE_OK && rc != SQLITE_DONE) +@@ -106,11 +106,11 @@ int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) + + #ifdef HAVE_AER + static const struct db_fields aer_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="dev_name", .type="TEXT" }, +- { .name="err_type", .type="TEXT" }, +- { .name="err_msg", .type="TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "dev_name", .type = "TEXT" }, ++ { .name = "err_type", .type = "TEXT" }, ++ { .name = "err_msg", .type = "TEXT" }, + }; + + static const struct db_table_descriptor aer_event_tab = { +@@ -154,13 +154,13 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) + + #ifdef HAVE_NON_STANDARD + static const struct db_fields non_standard_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="sec_type", .type="BLOB" }, +- { .name="fru_id", .type="BLOB" }, +- { .name="fru_text", .type="TEXT" }, +- { .name="severity", .type="TEXT" }, +- { .name="error", .type="BLOB" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "sec_type", .type = "BLOB" }, ++ { .name = "fru_id", .type = "BLOB" }, ++ { .name = "fru_text", .type = "TEXT" }, ++ { .name = "severity", .type = "TEXT" }, ++ { .name = "error", .type = "BLOB" }, + }; + + static const struct db_table_descriptor non_standard_event_tab = { +@@ -178,12 +178,12 @@ int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standar + return 0; + log(TERM, LOG_INFO, "non_standard_event store: %p\n", priv->stmt_non_standard_record); + +- sqlite3_bind_text (priv->stmt_non_standard_record, 1, ev->timestamp, -1, NULL); +- sqlite3_bind_blob (priv->stmt_non_standard_record, 2, ev->sec_type, -1, NULL); +- sqlite3_bind_blob (priv->stmt_non_standard_record, 3, ev->fru_id, 16, NULL); +- sqlite3_bind_text (priv->stmt_non_standard_record, 4, ev->fru_text, -1, NULL); +- sqlite3_bind_text (priv->stmt_non_standard_record, 5, ev->severity, -1, NULL); +- sqlite3_bind_blob (priv->stmt_non_standard_record, 6, ev->error, ev->length, NULL); ++ sqlite3_bind_text(priv->stmt_non_standard_record, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_blob(priv->stmt_non_standard_record, 2, ev->sec_type, -1, NULL); ++ sqlite3_bind_blob(priv->stmt_non_standard_record, 3, ev->fru_id, 16, NULL); ++ sqlite3_bind_text(priv->stmt_non_standard_record, 4, ev->fru_text, -1, NULL); ++ sqlite3_bind_text(priv->stmt_non_standard_record, 5, ev->severity, -1, NULL); ++ sqlite3_bind_blob(priv->stmt_non_standard_record, 6, ev->error, ev->length, NULL); + + rc = sqlite3_step(priv->stmt_non_standard_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) +@@ -205,16 +205,16 @@ int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standar + + #ifdef HAVE_ARM + static const struct db_fields arm_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="error_count", .type="INTEGER" }, +- { .name="affinity", .type="INTEGER" }, +- { .name="mpidr", .type="INTEGER" }, +- { .name="running_state", .type="INTEGER" }, +- { .name="psci_state", .type="INTEGER" }, +- { .name="err_info", .type="BLOB" }, +- { .name="context_info", .type="BLOB" }, +- { .name="vendor_info", .type="BLOB" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "error_count", .type = "INTEGER" }, ++ { .name = "affinity", .type = "INTEGER" }, ++ { .name = "mpidr", .type = "INTEGER" }, ++ { .name = "running_state", .type = "INTEGER" }, ++ { .name = "psci_state", .type = "INTEGER" }, ++ { .name = "err_info", .type = "BLOB" }, ++ { .name = "context_info", .type = "BLOB" }, ++ { .name = "vendor_info", .type = "BLOB" }, + }; + + static const struct db_table_descriptor arm_event_tab = { +@@ -232,18 +232,18 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) + return 0; + log(TERM, LOG_INFO, "arm_event store: %p\n", priv->stmt_arm_record); + +- sqlite3_bind_text (priv->stmt_arm_record, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_text(priv->stmt_arm_record, 1, ev->timestamp, -1, NULL); + sqlite3_bind_int (priv->stmt_arm_record, 2, ev->error_count); + sqlite3_bind_int (priv->stmt_arm_record, 3, ev->affinity); +- sqlite3_bind_int64 (priv->stmt_arm_record, 4, ev->mpidr); ++ sqlite3_bind_int64(priv->stmt_arm_record, 4, ev->mpidr); + sqlite3_bind_int (priv->stmt_arm_record, 5, ev->running_state); + sqlite3_bind_int (priv->stmt_arm_record, 6, ev->psci_state); +- sqlite3_bind_blob (priv->stmt_arm_record, 7, +- ev->pei_error, ev->pei_len, NULL); +- sqlite3_bind_blob (priv->stmt_arm_record, 8, +- ev->ctx_error, ev->ctx_len, NULL); +- sqlite3_bind_blob (priv->stmt_arm_record, 9, +- ev->vsei_error, ev->oem_len, NULL); ++ sqlite3_bind_blob(priv->stmt_arm_record, 7, ++ ev->pei_error, ev->pei_len, NULL); ++ sqlite3_bind_blob(priv->stmt_arm_record, 8, ++ ev->ctx_error, ev->ctx_len, NULL); ++ sqlite3_bind_blob(priv->stmt_arm_record, 9, ++ ev->vsei_error, ev->oem_len, NULL); + + rc = sqlite3_step(priv->stmt_arm_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) +@@ -262,15 +262,15 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) + + #ifdef HAVE_EXTLOG + static const struct db_fields extlog_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="etype", .type="INTEGER" }, +- { .name="error_count", .type="INTEGER" }, +- { .name="severity", .type="INTEGER" }, +- { .name="address", .type="INTEGER" }, +- { .name="fru_id", .type="BLOB" }, +- { .name="fru_text", .type="TEXT" }, +- { .name="cper_data", .type="BLOB" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "etype", .type = "INTEGER" }, ++ { .name = "error_count", .type = "INTEGER" }, ++ { .name = "severity", .type = "INTEGER" }, ++ { .name = "address", .type = "INTEGER" }, ++ { .name = "fru_id", .type = "BLOB" }, ++ { .name = "fru_text", .type = "TEXT" }, ++ { .name = "cper_data", .type = "BLOB" }, + }; + + static const struct db_table_descriptor extlog_event_tab = { +@@ -288,14 +288,14 @@ int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event + return 0; + log(TERM, LOG_INFO, "extlog_record store: %p\n", priv->stmt_extlog_record); + +- sqlite3_bind_text (priv->stmt_extlog_record, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_text(priv->stmt_extlog_record, 1, ev->timestamp, -1, NULL); + sqlite3_bind_int (priv->stmt_extlog_record, 2, ev->etype); + sqlite3_bind_int (priv->stmt_extlog_record, 3, ev->error_seq); + sqlite3_bind_int (priv->stmt_extlog_record, 4, ev->severity); +- sqlite3_bind_int64 (priv->stmt_extlog_record, 5, ev->address); +- sqlite3_bind_blob (priv->stmt_extlog_record, 6, ev->fru_id, 16, NULL); +- sqlite3_bind_text (priv->stmt_extlog_record, 7, ev->fru_text, -1, NULL); +- sqlite3_bind_blob (priv->stmt_extlog_record, 8, ev->cper_data, ev->cper_data_length, NULL); ++ sqlite3_bind_int64(priv->stmt_extlog_record, 5, ev->address); ++ sqlite3_bind_blob(priv->stmt_extlog_record, 6, ev->fru_id, 16, NULL); ++ sqlite3_bind_text(priv->stmt_extlog_record, 7, ev->fru_text, -1, NULL); ++ sqlite3_bind_blob(priv->stmt_extlog_record, 8, ev->cper_data, ev->cper_data_length, NULL); + + rc = sqlite3_step(priv->stmt_extlog_record); + if (rc != SQLITE_OK && rc != SQLITE_DONE) +@@ -318,8 +318,8 @@ int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event + + #ifdef HAVE_MCE + static const struct db_fields mce_record_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, + + /* MCE registers */ + { .name="mcgcap", .type="INTEGER" }, +@@ -363,7 +363,7 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) + return 0; + log(TERM, LOG_INFO, "mce_record store: %p\n", priv->stmt_mce_record); + +- sqlite3_bind_text (priv->stmt_mce_record, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mce_record, 1, ev->timestamp, -1, NULL); + sqlite3_bind_int (priv->stmt_mce_record, 2, ev->mcgcap); + sqlite3_bind_int (priv->stmt_mce_record, 3, ev->mcgstatus); + sqlite3_bind_int64 (priv->stmt_mce_record, 4, ev->status); +@@ -409,13 +409,13 @@ int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) + + #ifdef HAVE_DEVLINK + static const struct db_fields devlink_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="bus_name", .type="TEXT" }, +- { .name="dev_name", .type="TEXT" }, +- { .name="driver_name", .type="TEXT" }, +- { .name="reporter_name", .type="TEXT" }, +- { .name="msg", .type="TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "bus_name", .type = "TEXT" }, ++ { .name = "dev_name", .type = "TEXT" }, ++ { .name = "driver_name", .type = "TEXT" }, ++ { .name = "reporter_name", .type = "TEXT" }, ++ { .name = "msg", .type = "TEXT" }, + }; + + static const struct db_table_descriptor devlink_event_tab = { +@@ -461,14 +461,14 @@ int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) + + #ifdef HAVE_DISKERROR + static const struct db_fields diskerror_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="dev", .type="TEXT" }, +- { .name="sector", .type="INTEGER" }, +- { .name="nr_sector", .type="INTEGER" }, +- { .name="error", .type="TEXT" }, +- { .name="rwbs", .type="TEXT" }, +- { .name="cmd", .type="TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "dev", .type = "TEXT" }, ++ { .name = "sector", .type = "INTEGER" }, ++ { .name = "nr_sector", .type = "INTEGER" }, ++ { .name = "error", .type = "TEXT" }, ++ { .name = "rwbs", .type = "TEXT" }, ++ { .name = "cmd", .type = "TEXT" }, + }; + + static const struct db_table_descriptor diskerror_event_tab = { +@@ -515,11 +515,11 @@ int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev + + #ifdef HAVE_MEMORY_FAILURE + static const struct db_fields mf_event_fields[] = { +- { .name="id", .type="INTEGER PRIMARY KEY" }, +- { .name="timestamp", .type="TEXT" }, +- { .name="pfn", .type="TEXT" }, +- { .name="page_type", .type="TEXT" }, +- { .name="action_result", .type="TEXT" }, ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "pfn", .type = "TEXT" }, ++ { .name = "page_type", .type = "TEXT" }, ++ { .name = "action_result", .type = "TEXT" }, + }; + + static const struct db_table_descriptor mf_event_tab = { +@@ -664,7 +664,7 @@ static int ras_mc_alter_table(struct sqlite3_priv *priv, + found = 0; + for (j = 0; j < col_count; j++) { + if (!strcmp(field->name, +- sqlite3_column_name(*stmt, j))) { ++ sqlite3_column_name(*stmt, j))) { + found = 1; + break; + } +@@ -755,13 +755,13 @@ int ras_mc_finalize_vendor_table(sqlite3_stmt *stmt) + return rc; + } + +-int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) ++int ras_mc_event_opendb(unsigned int cpu, struct ras_events *ras) + { + int rc; + sqlite3 *db; + struct sqlite3_priv *priv; + +- printf("Calling %s()\n", __FUNCTION__); ++ printf("Calling %s()\n", __func__); + + ras->db_ref_count++; + if (ras->db_ref_count > 1) +@@ -774,6 +774,7 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + return -1; + + struct stat st = {0}; ++ + if (stat(RASSTATEDIR, &st) == -1) { + if (errno != ENOENT) { + log(TERM, LOG_ERR, +@@ -855,7 +856,7 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + rc = ras_mc_create_table(priv, &non_standard_event_tab); + if (rc == SQLITE_OK) { + rc = ras_mc_prepare_stmt(priv, &priv->stmt_non_standard_record, +- &non_standard_event_tab); ++ &non_standard_event_tab); + if (rc != SQLITE_OK) + goto error; + } +@@ -865,7 +866,7 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + rc = ras_mc_create_table(priv, &arm_event_tab); + if (rc == SQLITE_OK) { + rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record, +- &arm_event_tab); ++ &arm_event_tab); + if (rc != SQLITE_OK) + goto error; + } +@@ -874,7 +875,7 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + rc = ras_mc_create_table(priv, &devlink_event_tab); + if (rc == SQLITE_OK) { + rc = ras_mc_prepare_stmt(priv, &priv->stmt_devlink_event, +- &devlink_event_tab); ++ &devlink_event_tab); + if (rc != SQLITE_OK) + goto error; + } +@@ -884,7 +885,7 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + rc = ras_mc_create_table(priv, &diskerror_event_tab); + if (rc == SQLITE_OK) { + rc = ras_mc_prepare_stmt(priv, &priv->stmt_diskerror_event, +- &diskerror_event_tab); ++ &diskerror_event_tab); + if (rc != SQLITE_OK) + goto error; + } +@@ -958,7 +959,6 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + } + #endif + +- + #ifdef HAVE_MCE + if (priv->stmt_mce_record) { + rc = sqlite3_finalize(priv->stmt_mce_record); +diff --git a/ras-report.c b/ras-report.c +index 62d5eb7..6e3b351 100644 +--- a/ras-report.c ++++ b/ras-report.c +@@ -21,13 +21,14 @@ + + #include "ras-report.h" + +-static int setup_report_socket(void){ ++static int setup_report_socket(void) ++{ + int sockfd = -1; + int rc = -1; + struct sockaddr_un addr; + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); +- if (sockfd < 0){ ++ if (sockfd < 0) { + return -1; + } + +@@ -45,12 +46,13 @@ static int setup_report_socket(void){ + return sockfd; + } + +-static int commit_report_basic(int sockfd){ ++static int commit_report_basic(int sockfd) ++{ + char buf[INPUT_BUFFER_SIZE]; + struct utsname un; + int rc = -1; + +- if(sockfd < 0){ ++ if (sockfd < 0) { + return rc; + } + +@@ -58,7 +60,7 @@ static int commit_report_basic(int sockfd){ + memset(&un, 0, sizeof(struct utsname)); + + rc = uname(&un); +- if(rc < 0){ ++ if (rc < 0) { + return rc; + } + +@@ -67,35 +69,36 @@ static int commit_report_basic(int sockfd){ + */ + sprintf(buf, "PUT / HTTP/1.1\r\n\r\n"); + rc = write(sockfd, buf, strlen(buf)); +- if(rc < strlen(buf)){ ++ if (rc < strlen(buf)) { + return -1; + } + + sprintf(buf, "PID=%d", (int)getpid()); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + return -1; + } + + sprintf(buf, "EXECUTABLE=/boot/vmlinuz-%s", un.release); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + return -1; + } + + sprintf(buf, "TYPE=%s", "ras"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + return -1; + } + + return 0; + } + +-static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev){ ++static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -131,10 +134,11 @@ static int set_mc_event_backtrace(char *buf, struct ras_mc_event *ev){ + return 0; + } + +-static int set_mce_event_backtrace(char *buf, struct mce_event *ev){ ++static int set_mce_event_backtrace(char *buf, struct mce_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -190,10 +194,11 @@ static int set_mce_event_backtrace(char *buf, struct mce_event *ev){ + return 0; + } + +-static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev){ ++static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -211,10 +216,11 @@ static int set_aer_event_backtrace(char *buf, struct ras_aer_event *ev){ + return 0; + } + +-static int set_non_standard_event_backtrace(char *buf, struct ras_non_standard_event *ev){ ++static int set_non_standard_event_backtrace(char *buf, struct ras_non_standard_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -230,10 +236,11 @@ static int set_non_standard_event_backtrace(char *buf, struct ras_non_standard_e + return 0; + } + +-static int set_arm_event_backtrace(char *buf, struct ras_arm_event *ev){ ++static int set_arm_event_backtrace(char *buf, struct ras_arm_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -257,10 +264,11 @@ static int set_arm_event_backtrace(char *buf, struct ras_arm_event *ev){ + return 0; + } + +-static int set_devlink_event_backtrace(char *buf, struct devlink_event *ev){ ++static int set_devlink_event_backtrace(char *buf, struct devlink_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -282,10 +290,11 @@ static int set_devlink_event_backtrace(char *buf, struct devlink_event *ev){ + return 0; + } + +-static int set_diskerror_event_backtrace(char *buf, struct diskerror_event *ev) { ++static int set_diskerror_event_backtrace(char *buf, struct diskerror_event *ev) ++{ + char bt_buf[MAX_BACKTRACE_SIZE]; + +- if(!buf || !ev) ++ if (!buf || !ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +@@ -317,14 +326,14 @@ static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev) + return -1; + + sprintf(bt_buf, "BACKTRACE=" \ +- "timestamp=%s\n" \ +- "pfn=%s\n" \ +- "page_type=%s\n" \ +- "action_result=%s\n", \ +- ev->timestamp, \ +- ev->pfn, \ +- ev->page_type, \ +- ev->action_result); ++ "timestamp=%s\n" \ ++ "pfn=%s\n" \ ++ "page_type=%s\n" \ ++ "action_result=%s\n", \ ++ ev->timestamp, \ ++ ev->pfn, \ ++ ev->page_type, \ ++ ev->action_result); + + strcat(buf, bt_buf); + +@@ -337,13 +346,13 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + int rc = -1; + int buf_len = 0; + +- if(sockfd < 0 || !ev){ ++ if (sockfd < 0 || !ev) { + return -1; + } + + memset(buf, 0, MAX_BACKTRACE_SIZE); + +- switch(type){ ++ switch (type) { + case MC_EVENT: + rc = set_mc_event_backtrace(buf, (struct ras_mc_event *)ev); + break; +@@ -372,15 +381,15 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + return -1; + } + +- if(rc < 0){ ++ if (rc < 0) { + return -1; + } + + buf_len = strlen(buf); + +- for(;buf_len > INPUT_BUFFER_SIZE - 1; buf_len -= (INPUT_BUFFER_SIZE - 1)){ ++ for (; buf_len > INPUT_BUFFER_SIZE - 1; buf_len -= (INPUT_BUFFER_SIZE - 1)) { + rc = write(sockfd, pbuf, INPUT_BUFFER_SIZE - 1); +- if(rc < INPUT_BUFFER_SIZE - 1){ ++ if (rc < INPUT_BUFFER_SIZE - 1) { + return -1; + } + +@@ -388,14 +397,15 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + } + + rc = write(sockfd, pbuf, buf_len + 1); +- if(rc < buf_len){ ++ if (rc < buf_len) { + return -1; + } + + return 0; + } + +-int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){ ++int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = -1; + int done = 0; +@@ -404,29 +414,29 @@ int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){ + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return -1; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto mc_fail; + } + + rc = commit_report_backtrace(sockfd, MC_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto mc_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-mc"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto mc_fail; + } + + sprintf(buf, "REASON=%s", "EDAC driver report problem"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto mc_fail; + } + +@@ -434,18 +444,19 @@ int ras_report_mc_event(struct ras_events *ras, struct ras_mc_event *ev){ + + mc_fail: + +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + +- if(done){ ++ if (done) { + return 0; +- }else{ ++ } else { + return -1; + } + } + +-int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){ ++int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int done = 0; +@@ -454,29 +465,29 @@ int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){ + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return -1; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto aer_fail; + } + + rc = commit_report_backtrace(sockfd, AER_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto aer_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-aer"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto aer_fail; + } + + sprintf(buf, "REASON=%s", "PCIe AER driver report problem"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto aer_fail; + } + +@@ -484,18 +495,19 @@ int ras_report_aer_event(struct ras_events *ras, struct ras_aer_event *ev){ + + aer_fail: + +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + +- if(done){ ++ if (done) { + return 0; +- }else{ ++ } else { + return -1; + } + } + +-int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev){ ++int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int rc = -1; +@@ -503,29 +515,29 @@ int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standar + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return rc; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto non_standard_fail; + } + + rc = commit_report_backtrace(sockfd, NON_STANDARD_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto non_standard_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-non-standard"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto non_standard_fail; + } + + sprintf(buf, "REASON=%s", "Unknown CPER section problem"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto non_standard_fail; + } + +@@ -533,14 +545,15 @@ int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standar + + non_standard_fail: + +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + + return rc; + } + +-int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev){ ++int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int rc = -1; +@@ -548,29 +561,29 @@ int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev){ + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return rc; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto arm_fail; + } + + rc = commit_report_backtrace(sockfd, ARM_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto arm_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-arm"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto arm_fail; + } + + sprintf(buf, "REASON=%s", "ARM CPU report problem"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto arm_fail; + } + +@@ -578,14 +591,15 @@ int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev){ + + arm_fail: + +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + + return rc; + } + +-int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ ++int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int done = 0; +@@ -594,29 +608,29 @@ int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return -1; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto mce_fail; + } + + rc = commit_report_backtrace(sockfd, MCE_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto mce_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-mce"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto mce_fail; + } + + sprintf(buf, "REASON=%s", "Machine Check driver report problem"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto mce_fail; + } + +@@ -624,18 +638,19 @@ int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev){ + + mce_fail: + +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + +- if(done){ ++ if (done) { + return 0; +- }else{ ++ } else { + return -1; + } + } + +-int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev){ ++int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int done = 0; +@@ -644,29 +659,29 @@ int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev){ + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return -1; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto devlink_fail; + } + + rc = commit_report_backtrace(sockfd, DEVLINK_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto devlink_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-devlink"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto devlink_fail; + } + + sprintf(buf, "REASON=%s", "devlink health report problem"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto devlink_fail; + } + +@@ -674,18 +689,19 @@ int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev){ + + devlink_fail: + +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + +- if(done){ ++ if (done) { + return 0; +- }else{ ++ } else { + return -1; + } + } + +-int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev){ ++int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) ++{ + char buf[MAX_MESSAGE_SIZE]; + int sockfd = 0; + int done = 0; +@@ -694,42 +710,42 @@ int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *e + memset(buf, 0, sizeof(buf)); + + sockfd = setup_report_socket(); +- if(sockfd < 0){ ++ if (sockfd < 0) { + return -1; + } + + rc = commit_report_basic(sockfd); +- if(rc < 0){ ++ if (rc < 0) { + goto diskerror_fail; + } + + rc = commit_report_backtrace(sockfd, DISKERROR_EVENT, ev); +- if(rc < 0){ ++ if (rc < 0) { + goto diskerror_fail; + } + + sprintf(buf, "ANALYZER=%s", "rasdaemon-diskerror"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto diskerror_fail; + } + + sprintf(buf, "REASON=%s", "disk I/O error"); + rc = write(sockfd, buf, strlen(buf) + 1); +- if(rc < strlen(buf) + 1){ ++ if (rc < strlen(buf) + 1) { + goto diskerror_fail; + } + + done = 1; + + diskerror_fail: +- if(sockfd >= 0){ ++ if (sockfd >= 0) { + close(sockfd); + } + +- if(done){ ++ if (done) { + return 0; +- }else{ ++ } else { + return -1; + } + } +diff --git a/rasdaemon.c b/rasdaemon.c +index 0db51c9..7a3f964 100644 +--- a/rasdaemon.c ++++ b/rasdaemon.c +@@ -34,7 +34,7 @@ + #define TOOL_DESCRIPTION "RAS daemon to log the RAS events." + #define ARGS_DOC "" + #define DISABLE "DISABLE" +-char *choices_disable = NULL; ++char *choices_disable; + + const char *argp_program_version = TOOL_NAME " " VERSION; + const char *argp_program_bug_address = "Mauro Carvalho Chehab "; +@@ -129,6 +129,7 @@ int main(int argc, char *argv[]) + { + struct arguments args; + int idx = -1; ++ + choices_disable = getenv(DISABLE); + + #ifdef HAVE_MCE +@@ -179,7 +180,7 @@ int main(int argc, char *argv[]) + .children = offline_parser, + #endif + }; +- memset (&args, 0, sizeof(args)); ++ memset(&args, 0, sizeof(args)); + + user_hz = sysconf(_SC_CLK_TCK); + +@@ -208,7 +209,7 @@ int main(int argc, char *argv[]) + + openlog(TOOL_NAME, 0, LOG_DAEMON); + if (!args.foreground) +- if (daemon(0,0)) ++ if (daemon(0, 0)) + exit(EXIT_FAILURE); + + handle_ras_events(args.record_events); +diff --git a/rbtree.c b/rbtree.c +index d9b1bd4..43da434 100644 +--- a/rbtree.c ++++ b/rbtree.c +@@ -28,7 +28,8 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) + struct rb_node *right = node->rb_right; + struct rb_node *parent = rb_parent(node); + +- if ((node->rb_right = right->rb_left)) ++ node->rb_right = right->rb_left; ++ if (node->rb_right) + rb_set_parent(right->rb_left, node); + right->rb_left = node; + +@@ -40,8 +41,7 @@ static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) + parent->rb_left = right; + else + parent->rb_right = right; +- } +- else ++ } else + root->rb_node = right; + rb_set_parent(node, right); + } +@@ -51,7 +51,8 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) + struct rb_node *left = node->rb_left; + struct rb_node *parent = rb_parent(node); + +- if ((node->rb_left = left->rb_right)) ++ node->rb_left = left->rb_right; ++ if (node->rb_left) + rb_set_parent(left->rb_right, node); + left->rb_right = node; + +@@ -63,8 +64,7 @@ static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) + parent->rb_right = left; + else + parent->rb_left = left; +- } +- else ++ } else + root->rb_node = left; + rb_set_parent(node, left); + } +@@ -81,6 +81,7 @@ void rb_insert_color(struct rb_node *node, struct rb_root *root) + { + { + register struct rb_node *uncle = gparent->rb_right; ++ + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); +@@ -94,6 +95,7 @@ void rb_insert_color(struct rb_node *node, struct rb_root *root) + if (parent->rb_right == node) + { + struct rb_node *tmp; ++ + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; +@@ -106,6 +108,7 @@ void rb_insert_color(struct rb_node *node, struct rb_root *root) + } else { + { + struct rb_node *uncle = gparent->rb_left; ++ + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); +@@ -119,6 +122,7 @@ void rb_insert_color(struct rb_node *node, struct rb_root *root) + if (parent->rb_left == node) + { + struct rb_node *tmp; ++ + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; +@@ -157,8 +161,7 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + rb_set_red(other); + node = parent; + parent = rb_parent(node); +- } +- else ++ } else + { + if (!other->rb_right || rb_is_black(other->rb_right)) + { +@@ -174,8 +177,7 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + node = root->rb_node; + break; + } +- } +- else ++ } else + { + other = parent->rb_left; + if (rb_is_red(other)) +@@ -191,8 +193,7 @@ static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + rb_set_red(other); + node = parent; + parent = rb_parent(node); +- } +- else ++ } else + { + if (!other->rb_left || rb_is_black(other->rb_left)) + { +@@ -272,8 +273,7 @@ void rb_erase(struct rb_node *node, struct rb_root *root) + parent->rb_left = child; + else + parent->rb_right = child; +- } +- else ++ } else + root->rb_node = child; + + color: +@@ -320,7 +320,7 @@ struct rb_node *rb_next(const struct rb_node *node) + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) +- node=node->rb_left; ++ node = node->rb_left; + return (struct rb_node *)node; + } + +@@ -348,7 +348,7 @@ struct rb_node *rb_prev(const struct rb_node *node) + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) +- node=node->rb_right; ++ node = node->rb_right; + return (struct rb_node *)node; + } + +-- +2.33.1 + diff --git a/1048-Do-a-coding-style-cleanup-with-regards-to-tabs-and-w.patch b/1048-Do-a-coding-style-cleanup-with-regards-to-tabs-and-w.patch new file mode 100644 index 0000000000000000000000000000000000000000..0069232432fce6e71405c2692d1ad515f1286677 --- /dev/null +++ b/1048-Do-a-coding-style-cleanup-with-regards-to-tabs-and-w.patch @@ -0,0 +1,2343 @@ +From 964756be1024a526dbe343bea7a161535051065b Mon Sep 17 00:00:00 2001 +From: Mauro Carvalho Chehab +Date: Tue, 11 Jun 2024 12:01:40 +0200 +Subject: [PATCH 48/85] Do a coding style cleanup with regards to tabs and + white spaces + +Use tabs instead of spaces and remove blank ending whitespaces. + +No functional changes. + +Signed-off-by: Mauro Carvalho Chehab +--- + labels/supermicro | 16 +- + util/ras-mc-ctl.in | 1765 ++++++++++++++++++++++---------------------- + 2 files changed, 890 insertions(+), 891 deletions(-) + +diff --git a/labels/supermicro b/labels/supermicro +index aea7c3c..d358bcd 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -64,7 +64,7 @@ Vendor: Supermicro + P1_DIMM4B: 1.1.1; + P2_DIMM4B: 2.0.1; + P2_DIMM4B: 2.1.1; +- ++ + Model: X11DPH-i, X11DPH-T, X11DPH-TQ + P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; + P1-DIMMB1: 0.1.0; +@@ -78,7 +78,7 @@ Vendor: Supermicro + P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1; + P2-DIMME1: 3.1.0; + P2-DIMMF1: 3.2.0; +- ++ + Model: X10DRI, X10DRI-T + P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; + P1-DIMMB1: 0.1.0; P1-DIMMB2: 0.1.1; +@@ -98,7 +98,7 @@ Vendor: Supermicro + P2-DIMMF1: 1.1.0; + P2-DIMMG1: 1.2.0; + P2-DIMMH1: 1.3.0; +- ++ + Model: X11DDW-NT, X11DDW-L + P1-DIMMA1: 0.0.0; + P1-DIMMB1: 0.1.0; +@@ -112,7 +112,7 @@ Vendor: Supermicro + P2-DIMMD1: 3.0.0; + P2-DIMME1: 3.1.0; + P2-DIMMF1: 3.2.0; +- ++ + Model: X11SPM-F, X11SPM-TF, X11SPM-TPF + DIMMA1: 0.0.0; + DIMMB1: 0.1.0; +@@ -120,7 +120,7 @@ Vendor: Supermicro + DIMMD1: 1.0.0; + DIMME1: 1.1.0; + DIMMF1: 1.2.0; +- ++ + Model: B1DRi + P1_DIMMA1: 0.0.0; + P1_DIMMB1: 0.1.0; +@@ -130,13 +130,13 @@ Vendor: Supermicro + P2_DIMMF1: 1.1.0; + P2_DIMMG1: 1.2.0; + P2_DIMMH1: 1.3.0; +- ++ + Model: X11SCA, X11SCA-F + DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0; + DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; +- ++ + Model: X11SCW-F + DIMMA1: 0.1.0; + DIMMA2: 0.0.0; + DIMMB1: 0.1.1; +- DIMMB2: 0.0.1; +\ No newline at end of file ++ DIMMB2: 0.0.1; +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 725d215..b971ddd 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -112,25 +112,25 @@ if ( $conf{opt}{mainboard} || $conf{opt}{print_labels} + get_mainboard_info(); + + if ($conf{opt}{mainboard} eq "report") { +- print "$prog: mainboard: ", +- "$conf{mainboard}{vendor} model $conf{mainboard}{model}\n"; ++ print "$prog: mainboard: ", ++ "$conf{mainboard}{vendor} model $conf{mainboard}{model}\n"; + } + + if ($conf{opt}{print_labels}) { +- print_dimm_labels (); ++ print_dimm_labels (); + + } + if ($conf{opt}{register_labels}) { +- register_dimm_labels (); ++ register_dimm_labels (); + } + if ($conf{opt}{display_memory_layout}) { +- display_memory_layout (); ++ display_memory_layout (); + } + if ($conf{opt}{guess_dimm_label}) { +- guess_dimm_label (); ++ guess_dimm_label (); + } + if ($conf{opt}{error_count}) { +- display_error_count (); ++ display_error_count (); + } + } + +@@ -184,39 +184,39 @@ sub parse_cmdline + + Getopt::Long::Configure ("bundling"); + my $rc = GetOptions ("mainboard:s" => sub { $$mref = $_[1]||"report" }, +- "help" => sub {usage (0)}, +- "quiet" => \$conf{opt}{quiet}, +- "print-labels" => \$conf{opt}{print_labels}, +- "guess-labels" => \$conf{opt}{guess_dimm_label}, +- "register-labels" => \$conf{opt}{register_labels}, +- "delay:s" => \$conf{opt}{delay}, +- "labeldb=s" => \$conf{labeldb}, +- "status" => \$conf{opt}{status}, +- "layout" => \$conf{opt}{display_memory_layout}, +- "summary" => \$conf{opt}{summary}, +- "errors" => \$conf{opt}{errors}, +- "error-count" => \$conf{opt}{error_count}, +- "vendor-errors-summary" => \$conf{opt}{vendor_errors_summary}, +- "vendor-errors" => \$conf{opt}{vendor_errors}, +- "since=s" => \$conf{opt}{since}, +- "vendor-platforms" => \$conf{opt}{vendor_platforms}, +- ); ++ "help" => sub {usage (0)}, ++ "quiet" => \$conf{opt}{quiet}, ++ "print-labels" => \$conf{opt}{print_labels}, ++ "guess-labels" => \$conf{opt}{guess_dimm_label}, ++ "register-labels" => \$conf{opt}{register_labels}, ++ "delay:s" => \$conf{opt}{delay}, ++ "labeldb=s" => \$conf{labeldb}, ++ "status" => \$conf{opt}{status}, ++ "layout" => \$conf{opt}{display_memory_layout}, ++ "summary" => \$conf{opt}{summary}, ++ "errors" => \$conf{opt}{errors}, ++ "error-count" => \$conf{opt}{error_count}, ++ "vendor-errors-summary" => \$conf{opt}{vendor_errors_summary}, ++ "vendor-errors" => \$conf{opt}{vendor_errors}, ++ "since=s" => \$conf{opt}{since}, ++ "vendor-platforms" => \$conf{opt}{vendor_platforms}, ++ ); + + usage(1) if !$rc; + + usage (0) if !grep $conf{opt}{$_}, keys %{$conf{opt}}; + + if ($conf{opt}{delay} && !$conf{opt}{register_labels}) { +- log_error ("Only use --delay with --register-labels\n"); +- exit (1); ++ log_error ("Only use --delay with --register-labels\n"); ++ exit (1); + } + + if ($conf{opt}{since}) { +- if ($conf{opt}{since} !~ /^20\d\d-[01]\d-[0-3]\d/) { +- log_error ("--since requires a date like yyyy-mm-dd where yyyy is the year, mm the month, and dd the day\n"); +- exit (1); +- } +- $conf{opt}{since} = " where timestamp>='$conf{opt}{since}'"; ++ if ($conf{opt}{since} !~ /^20\d\d-[01]\d-[0-3]\d/) { ++ log_error ("--since requires a date like yyyy-mm-dd where yyyy is the year, mm the month, and dd the day\n"); ++ exit (1); ++ } ++ $conf{opt}{since} = " where timestamp>='$conf{opt}{since}'"; + } + } + +@@ -239,14 +239,14 @@ sub print_status + { + my $status = 0; + open (MODULES, "/proc/modules") +- or die "Unable to open /proc/modules: $!\n"; ++ or die "Unable to open /proc/modules: $!\n"; + + while () { + $status = 1 if /_edac/; + } + + print "$prog: drivers ", ($status ? "are" : "not"), " loaded.\n" +- unless $conf{opt}{quiet}; ++ unless $conf{opt}{quiet}; + + return ($status); + } +@@ -256,118 +256,118 @@ sub parse_dimm_nodes + my $file = $File::Find::name; + + if (($file =~ /max_location$/)) { +- open IN, $file; +- my $location = ; +- $location =~ s/\s+$//; +- close IN; +- my @temp = split(/ /, $location); +- +- $layers[0] = "mc"; +- +- if (m,/mc/mc(\d+),) { +- $max_pos[0] = $1 if (!exists($max_pos[0]) || $1 > $max_pos[0]); +- } else { +- $max_pos[0] = 0 if (!exists($max_pos[0])); +- } +- for (my $i = 0; $i < scalar(@temp); $i += 2) { +- $layers[$i / 2 + 1] = $temp[$i]; +- $max_pos[$i / 2 + 1] = $temp[$i + 1]; +- } +- +- return; ++ open IN, $file; ++ my $location = ; ++ $location =~ s/\s+$//; ++ close IN; ++ my @temp = split(/ /, $location); ++ ++ $layers[0] = "mc"; ++ ++ if (m,/mc/mc(\d+),) { ++ $max_pos[0] = $1 if (!exists($max_pos[0]) || $1 > $max_pos[0]); ++ } else { ++ $max_pos[0] = 0 if (!exists($max_pos[0])); ++ } ++ for (my $i = 0; $i < scalar(@temp); $i += 2) { ++ $layers[$i / 2 + 1] = $temp[$i]; ++ $max_pos[$i / 2 + 1] = $temp[$i + 1]; ++ } ++ ++ return; + } + if ($file =~ /size_mb$/) { +- my $mc = $file; +- $mc =~ s,.*mc(\d+).*,$1,; ++ my $mc = $file; ++ $mc =~ s,.*mc(\d+).*,$1,; + +- my $csrow = $file; +- $csrow =~ s,.*csrow(\d+).*,$1,; ++ my $csrow = $file; ++ $csrow =~ s,.*csrow(\d+).*,$1,; + +- open IN, $file; +- my $size = ; +- close IN; ++ open IN, $file; ++ my $size = ; ++ close IN; + +- my $str_loc = join(':', $mc, $csrow); +- $csrow_size{$str_loc} = $size; ++ my $str_loc = join(':', $mc, $csrow); ++ $csrow_size{$str_loc} = $size; + +- return; ++ return; + } + if ($file =~ /location$/) { +- my $mc = $file; +- $mc =~ s,.*mc(\d+).*,$1,; +- +- my $dimm = $file; +- $dimm =~ s,.*(rank|dimm)(\d+).*,$2,; +- +- open IN, $file; +- my $location = ; +- $location =~ s/\s+$//; +- close IN; +- +- my @pos; +- +- # Get the name of the hierarchy labels +- if (!@layers) { +- my @temp = split(/ /, $location); +- $max_pos[0] = 0; +- $layers[0] = "mc"; +- for (my $i = 0; $i < scalar(@temp); $i += 2) { +- $layers[$i / 2 + 1] = $temp[$i]; +- $max_pos[$i / 2 + 1] = 0; +- } +- } +- +- my @temp = split(/ /, $location); +- for (my $i = 1; $i < scalar(@temp); $i += 2) { +- $pos[$i / 2] = $temp[$i]; +- +- if ($pos[$i / 2] > $max_pos[$i / 2 + 1]) { +- $max_pos[$i / 2 + 1] = $pos[$i / 2]; +- } +- } +- if ($mc > $max_pos[0]) { +- $max_pos[0] = $mc; +- } +- +- # Get DIMM size +- +- $file =~ s/dimm_location/size/; +- open IN, $file; +- my $size = ; +- close IN; +- +- my $str_loc = join(':', $mc, @pos); +- $dimm_size{$str_loc} = $size; +- $dimm_node{$str_loc} = $dimm; +- $file =~ s/size/dimm_label/; +- $dimm_label_file{$str_loc} = $file; +- $dimm_location{$str_loc} = $location; +- +- my $count; +- +- $file =~s/dimm_label/dimm_ce_count/; +- if (-e $file) { +- open IN, $file; +- chomp($count = ); +- close IN; +- } else { +- log_error ("dimm_ce_count not found in sysfs. Old kernel?\n"); +- exit -1; +- } +- $dimm_ce_count{$str_loc} = $count; +- +- $file =~s/dimm_ce_count/dimm_ue_count/; +- if (-e $file) { +- open IN, $file; +- chomp($count = ); +- close IN; +- } else { +- log_error ("dimm_ue_count not found in sysfs. Old kernel?\n"); +- exit -1; +- } +- $dimm_ue_count{$str_loc} = $count; +- +- return; ++ my $mc = $file; ++ $mc =~ s,.*mc(\d+).*,$1,; ++ ++ my $dimm = $file; ++ $dimm =~ s,.*(rank|dimm)(\d+).*,$2,; ++ ++ open IN, $file; ++ my $location = ; ++ $location =~ s/\s+$//; ++ close IN; ++ ++ my @pos; ++ ++ # Get the name of the hierarchy labels ++ if (!@layers) { ++ my @temp = split(/ /, $location); ++ $max_pos[0] = 0; ++ $layers[0] = "mc"; ++ for (my $i = 0; $i < scalar(@temp); $i += 2) { ++ $layers[$i / 2 + 1] = $temp[$i]; ++ $max_pos[$i / 2 + 1] = 0; ++ } ++ } ++ ++ my @temp = split(/ /, $location); ++ for (my $i = 1; $i < scalar(@temp); $i += 2) { ++ $pos[$i / 2] = $temp[$i]; ++ ++ if ($pos[$i / 2] > $max_pos[$i / 2 + 1]) { ++ $max_pos[$i / 2 + 1] = $pos[$i / 2]; ++ } ++ } ++ if ($mc > $max_pos[0]) { ++ $max_pos[0] = $mc; ++ } ++ ++ # Get DIMM size ++ ++ $file =~ s/dimm_location/size/; ++ open IN, $file; ++ my $size = ; ++ close IN; ++ ++ my $str_loc = join(':', $mc, @pos); ++ $dimm_size{$str_loc} = $size; ++ $dimm_node{$str_loc} = $dimm; ++ $file =~ s/size/dimm_label/; ++ $dimm_label_file{$str_loc} = $file; ++ $dimm_location{$str_loc} = $location; ++ ++ my $count; ++ ++ $file =~s/dimm_label/dimm_ce_count/; ++ if (-e $file) { ++ open IN, $file; ++ chomp($count = ); ++ close IN; ++ } else { ++ log_error ("dimm_ce_count not found in sysfs. Old kernel?\n"); ++ exit -1; ++ } ++ $dimm_ce_count{$str_loc} = $count; ++ ++ $file =~s/dimm_ce_count/dimm_ue_count/; ++ if (-e $file) { ++ open IN, $file; ++ chomp($count = ); ++ close IN; ++ } else { ++ log_error ("dimm_ue_count not found in sysfs. Old kernel?\n"); ++ exit -1; ++ } ++ $dimm_ue_count{$str_loc} = $count; ++ ++ return; + } + } + +@@ -376,14 +376,14 @@ sub guess_product { + my $pname = undef; + + if (open (VENDOR, "/sys/class/dmi/id/product_vendor")) { +- $pvendor = ; +- close VENDOR; +- chomp($pvendor); ++ $pvendor = ; ++ close VENDOR; ++ chomp($pvendor); + } + if (open (NAME, "/sys/class/dmi/id/product_name")) { +- $pname = ; +- close NAME; +- chomp($pname); ++ $pname = ; ++ close NAME; ++ chomp($pname); + } + + return ($pvendor, $pname); +@@ -394,11 +394,11 @@ sub get_mainboard_info { + my ($pvendor, $pname); + + if ($conf{opt}{mainboard} && $conf{opt}{mainboard} ne "report") { +- ($vendor, $model) = split (/[: ]/, $conf{opt}{mainboard}, 2); ++ ($vendor, $model) = split (/[: ]/, $conf{opt}{mainboard}, 2); + } + + if (!$vendor || !$model) { +- ($vendor, $model) = guess_vendor_model (); ++ ($vendor, $model) = guess_vendor_model (); + } + + $conf{mainboard}{vendor} = $vendor; +@@ -407,9 +407,9 @@ sub get_mainboard_info { + ($pvendor, $pname) = guess_product (); + # since product vendor is rare, use mainboard's vendor + if ($pvendor) { +- $conf{mainboard}{product_vendor} = $pvendor; ++ $conf{mainboard}{product_vendor} = $pvendor; + } else { +- $conf{mainboard}{product_vendor} = $vendor; ++ $conf{mainboard}{product_vendor} = $vendor; + } + $conf{mainboard}{product_name} = $pname if $pname; + } +@@ -427,25 +427,25 @@ sub guess_vendor_model_dmidecode { + + LINE: + while () { +- $line++; ++ $line++; + +- /^(\s*)(board|base board|system) information/i || next LINE; +- my $indent = $1; ++ /^(\s*)(board|base board|system) information/i || next LINE; ++ my $indent = $1; + my $type = $2; + +- while ( ) { +- /^(\s*)/; +- $1 lt $indent && last LINE; +- $indent = $1; +- if ($type eq "system") { +- /(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $system_vendor = $1 ); +- /product(?: name)?:\s*(.*\S)\s*/i && ( $system_model = $1 ); +- } else { +- /(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $vendor = $1 ); +- /product(?: name)?:\s*(.*\S)\s*/i && ( $model = $1 ); +- } +- last LINE if ($vendor && $model); +- } ++ while ( ) { ++ /^(\s*)/; ++ $1 lt $indent && last LINE; ++ $indent = $1; ++ if ($type eq "system") { ++ /(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $system_vendor = $1 ); ++ /product(?: name)?:\s*(.*\S)\s*/i && ( $system_model = $1 ); ++ } else { ++ /(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $vendor = $1 ); ++ /product(?: name)?:\s*(.*\S)\s*/i && ( $model = $1 ); ++ } ++ last LINE if ($vendor && $model); ++ } + } + + close (DMI); +@@ -483,18 +483,18 @@ sub parse_mainboard_config + + open (CFG, "$file") or die "Failed to read mainboard config: $file: $!\n"; + while () { +- $line++; +- chomp; # remove newline +- s/^((?:[^'"#]*(?:(['"])[^\2]*\2)*)*)#.*/$1/; # remove comments +- s/^\s+//; # remove leading space +- s/\s+$//; # remove trailing space +- next unless length; # skip blank lines +- if (my ($key, $val) = /^\s*([-\w]+)\s*=\s*(.*)/) { +- $hash{$key}{val} = $val; +- $hash{$key}{line} = $line; +- next; +- } +- return undef; ++ $line++; ++ chomp; # remove newline ++ s/^((?:[^'"#]*(?:(['"])[^\2]*\2)*)*)#.*/$1/; # remove comments ++ s/^\s+//; # remove leading space ++ s/\s+$//; # remove trailing space ++ next unless length; # skip blank lines ++ if (my ($key, $val) = /^\s*([-\w]+)\s*=\s*(.*)/) { ++ $hash{$key}{val} = $val; ++ $hash{$key}{line} = $line; ++ next; ++ } ++ return undef; + } + close (CFG) or &log_error ("close $file: $!\n"); + return \%hash; +@@ -507,16 +507,16 @@ sub guess_vendor_model { + # to get the vendor and model information. + # + if (-f $conf{mbconfig} ) { +- my $cfg = &parse_mainboard_config ($conf{mbconfig}); ++ my $cfg = &parse_mainboard_config ($conf{mbconfig}); + +- # If mainboard config file specified a script, then try to +- # run the specified script or executable: +- # +- if ($cfg->{"script"}) { +- $cfg = &parse_mainboard_config ("$cfg->{script}{val} |"); +- die "Failed to run mainboard script\n" if (!$cfg); +- } +- return ($cfg->{vendor}{val}, $cfg->{model}{val}); ++ # If mainboard config file specified a script, then try to ++ # run the specified script or executable: ++ # ++ if ($cfg->{"script"}) { ++ $cfg = &parse_mainboard_config ("$cfg->{script}{val} |"); ++ die "Failed to run mainboard script\n" if (!$cfg); ++ } ++ return ($cfg->{vendor}{val}, $cfg->{model}{val}); + } + + ($vendor, $model) = &guess_vendor_model_sysfs (); +@@ -531,10 +531,10 @@ sub guess_dimm_label { + + LINE: + while () { +- /^(\s*)memory device$/i || next LINE; ++ /^(\s*)memory device$/i || next LINE; + my ($dimm_label, $dimm_addr); + +- while () { ++ while () { + if (/^\s*(locator|bank locator)/i) { + my $indent = $1; + $indent =~ tr/A-Z/a-z/; +@@ -552,7 +552,7 @@ sub guess_dimm_label { + next LINE; + } + next LINE if (/^\s*\n/); +- } ++ } + } + + close (DMI); +@@ -568,84 +568,84 @@ sub parse_dimm_labels_file + my $num; + + open (LABELS, "$file") +- or die "Unable to open label database: $file: $!\n"; ++ or die "Unable to open label database: $file: $!\n"; + + while () { +- $line++; +- next if /^#/; +- chomp; +- s/^\s+//; +- s/\s+$//; +- next unless length; +- +- if (/vendor\s*:\s*(.*\S)\s*/i) { +- $vendor = lc $1; +- @models = (); +- @products = (); +- $num = 0; +- next; +- } +- if (/(model|board)\s*:\s*(.*)$/i) { +- !$vendor && die "$file: line $line: MB model without vendor\n"; +- @models = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); +- @products = (); +- $num = 0; +- next; +- } +- if (/(product)\s*:\s*(.*)$/i) { +- !$vendor && die "$file: line $line: product without vendor\n"; +- @models = (); +- @products = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); +- $num = 0; +- next; +- } +- +- # Allow multiple labels to be specified on a single line, +- # separated by ; +- for my $str (split /;/) { +- $str =~ s/^\s*(.*)\s*$/$1/; +- +- next unless (my ($label, $info) = ($str =~ /^(.*)\s*:\s*(.*)$/i)); +- +- unless ($info =~ /\d+(?:[\.\:]\d+)*/) { +- log_error ("$file: $line: Invalid syntax, ignoring: \"$_\"\n"); +- next; +- } +- +- for my $target (split (/[, ]+/, $info)) { +- my $n; +- my ($mc, $top, $mid, $low, $extra) = ($target =~ /(\d+)(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}/); +- +- if (defined($extra)) { +- die ("Error: Only up to 3 layers are currently supported on label db \"$file\"\n"); +- return; +- } elsif (!defined($top)) { +- die ("Error: The label db \"$file\" is defining a zero-layers machine\n"); +- return; +- } else { +- $n = 3; +- if (!defined($low)) { +- $low = 0; +- $n--; +- } +- if (!defined($mid)) { +- $mid = 0; +- $n--; +- } +- map { $lh->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } +- @models; +- map { $lh_prod->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } +- @products; +- } +- if (!$num) { +- $num = $n; +- map { $num_layers->{$vendor}{lc $_} = $num } @models; +- map { $num_layers_prod->{$vendor}{lc $_} = $num } @products; +- } elsif ($num != $n) { +- die ("Error: Inconsistent number of layers at label db \"$file\"\n"); +- } +- } +- } ++ $line++; ++ next if /^#/; ++ chomp; ++ s/^\s+//; ++ s/\s+$//; ++ next unless length; ++ ++ if (/vendor\s*:\s*(.*\S)\s*/i) { ++ $vendor = lc $1; ++ @models = (); ++ @products = (); ++ $num = 0; ++ next; ++ } ++ if (/(model|board)\s*:\s*(.*)$/i) { ++ !$vendor && die "$file: line $line: MB model without vendor\n"; ++ @models = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); ++ @products = (); ++ $num = 0; ++ next; ++ } ++ if (/(product)\s*:\s*(.*)$/i) { ++ !$vendor && die "$file: line $line: product without vendor\n"; ++ @models = (); ++ @products = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); ++ $num = 0; ++ next; ++ } ++ ++ # Allow multiple labels to be specified on a single line, ++ # separated by ; ++ for my $str (split /;/) { ++ $str =~ s/^\s*(.*)\s*$/$1/; ++ ++ next unless (my ($label, $info) = ($str =~ /^(.*)\s*:\s*(.*)$/i)); ++ ++ unless ($info =~ /\d+(?:[\.\:]\d+)*/) { ++ log_error ("$file: $line: Invalid syntax, ignoring: \"$_\"\n"); ++ next; ++ } ++ ++ for my $target (split (/[, ]+/, $info)) { ++ my $n; ++ my ($mc, $top, $mid, $low, $extra) = ($target =~ /(\d+)(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}/); ++ ++ if (defined($extra)) { ++ die ("Error: Only up to 3 layers are currently supported on label db \"$file\"\n"); ++ return; ++ } elsif (!defined($top)) { ++ die ("Error: The label db \"$file\" is defining a zero-layers machine\n"); ++ return; ++ } else { ++ $n = 3; ++ if (!defined($low)) { ++ $low = 0; ++ $n--; ++ } ++ if (!defined($mid)) { ++ $mid = 0; ++ $n--; ++ } ++ map { $lh->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } ++ @models; ++ map { $lh_prod->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } ++ @products; ++ } ++ if (!$num) { ++ $num = $n; ++ map { $num_layers->{$vendor}{lc $_} = $num } @models; ++ map { $num_layers_prod->{$vendor}{lc $_} = $num } @products; ++ } elsif ($num != $n) { ++ die ("Error: Inconsistent number of layers at label db \"$file\"\n"); ++ } ++ } ++ } + } + + close (LABELS) or die "Error from label db \"$file\" : $!\n"; +@@ -663,8 +663,8 @@ sub parse_dimm_labels + # well as any files under the labels dir + # + for my $file ($conf{labeldb}, <$conf{labeldir}/*>) { +- next unless -r $file; +- parse_dimm_labels_file (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod, $file); ++ next unless -r $file; ++ parse_dimm_labels_file (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod, $file); + } + + return (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod); +@@ -681,9 +681,9 @@ sub read_dimm_label + $pos = "$mc:$top" if ($num_layers == 1); + + if (!defined($dimm_node{$pos})) { +- my $label = "$pos missing"; +- $pos = ""; +- return ($label, $pos); ++ my $label = "$pos missing"; ++ $pos = ""; ++ return ($label, $pos); + } + + my $dimm = $dimm_node{$pos}; +@@ -695,8 +695,8 @@ sub read_dimm_label + return ("label missing", "$pos missing") unless -f $dimm_label_file; + + if (!open (LABEL, "$dimm_label_file")) { +- warn "Failed to open $dimm_label_file: $!\n"; +- return ("Error"); ++ warn "Failed to open $dimm_label_file: $!\n"; ++ return ("Error"); + } + + chomp (my $label =