diff --git a/0001-Check-CPUs-online-not-configured.patch b/0001-Check-CPUs-online-not-configured.patch new file mode 100644 index 0000000000000000000000000000000000000000..60f5b4e598b945aaa4b9aaa7ae55a24592805986 --- /dev/null +++ b/0001-Check-CPUs-online-not-configured.patch @@ -0,0 +1,38 @@ +From f1ea76375281001cdf4a048c1a4a24d86c6fbe48 Mon Sep 17 00:00:00 2001 +From: Zeph / Liz Loss-Cutler-Hull +Date: Sun, 9 Jul 2023 04:57:19 -0700 +Subject: [PATCH] Check CPUs online, not configured. + +When the number of CPUs detected is greater than the number of CPUs in +the system, rasdaemon will crash when it receives some events. + +Looking deeper, we also fail to use the poll method for similar reasons +in this case. + +All of this can be prevented by checking to see how many CPUs are +currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many +CPUs the current kernel was configured to support +(sysconf(_SC_NPROCESSORS_CONF)). + +For the kernel side of the discussion, see https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rQh9DA@mail.gmail.com/T/ +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ras-events.c b/ras-events.c +index a82dab2..5935163 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -350,7 +350,7 @@ static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf, + + static int get_num_cpus(struct ras_events *ras) + { +- return sysconf(_SC_NPROCESSORS_CONF); ++ return sysconf(_SC_NPROCESSORS_ONLN); + #if 0 + char fname[MAX_PATH + 1]; + int num_cpus = 0; +-- +2.25.1 + diff --git a/0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch b/0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch new file mode 100644 index 0000000000000000000000000000000000000000..55f106b4c11762366eb12e80103159f4bfc56b2e --- /dev/null +++ b/0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch @@ -0,0 +1,122 @@ +From 3576ebb2e0e7badb475807058776de748bbc8c43 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Thu, 24 Aug 2023 13:07:17 +0100 +Subject: [PATCH] rasdaemon: ras-mc-ctl: Modify check for HiSilicon KunPeng9xx + error fields + +Modify check for valid HiSilicon KunPeng9xx error fields. +Fixes an error data is not printed when it's value is 0. + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + util/ras-mc-ctl.in | 72 +++++++++++++++++++++++----------------------- + 1 file changed, 36 insertions(+), 36 deletions(-) + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 4178dcf..07e6fca 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -1672,13 +1672,13 @@ sub vendor_errors + if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "Error Registers: $regs " if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +@@ -1697,13 +1697,13 @@ sub vendor_errors + if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs " if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "Error Registers: $regs " if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +@@ -1722,15 +1722,15 @@ sub vendor_errors + if ($module eq 0 || ($sub_module_id && uc($module) eq uc($sub_module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "Error Registers: $regs " if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "core_id=$core_id, " if (defined $core_id && length $core_id); ++ $out .= "port_id=$port_id, " if (defined $port_id && length $port_id); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "err_type=$err_type, " if (defined $err_type && length $err_type); ++ $out .= "Error Registers: $regs " if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +@@ -1749,19 +1749,19 @@ sub vendor_errors + if ($module eq 0 || ($module_id && uc($module) eq uc($module_id))) { + $out .= "$id. $timestamp Error Info: "; + $out .= "version=$version, "; +- $out .= "soc_id=$soc_id, " if ($soc_id); +- $out .= "socket_id=$socket_id, " if ($socket_id); +- $out .= "totem_id=$totem_id, " if ($totem_id); +- $out .= "nimbus_id=$nimbus_id, " if ($nimbus_id); +- $out .= "sub_system_id=$sub_system_id, " if ($sub_system_id); +- $out .= "module_id=$module_id, " if ($module_id); +- $out .= "sub_module_id=$sub_module_id, " if ($sub_module_id); +- $out .= "core_id=$core_id, " if ($core_id); +- $out .= "port_id=$port_id, " if ($port_id); +- $out .= "err_type=$err_type, " if ($err_type); +- $out .= "pcie_info=$pcie_info, " if ($pcie_info); +- $out .= "err_severity=$err_severity, " if ($err_severity); +- $out .= "Error Registers: $regs" if ($regs); ++ $out .= "soc_id=$soc_id, " if (defined $soc_id && length $soc_id); ++ $out .= "socket_id=$socket_id, " if (defined $socket_id && length $socket_id); ++ $out .= "totem_id=$totem_id, " if (defined $totem_id && length $totem_id); ++ $out .= "nimbus_id=$nimbus_id, " if (defined $nimbus_id && length $nimbus_id); ++ $out .= "sub_system_id=$sub_system_id, " if (defined $sub_system_id && length $sub_system_id); ++ $out .= "module_id=$module_id, " if (defined $module_id && length $module_id); ++ $out .= "sub_module_id=$sub_module_id, " if (defined $sub_module_id && length $sub_module_id); ++ $out .= "core_id=$core_id, " if (defined $core_id && length $core_id ); ++ $out .= "port_id=$port_id, " if (defined $port_id && length $port_id); ++ $out .= "err_type=$err_type, " if (defined $err_type && length $err_type); ++ $out .= "pcie_info=$pcie_info, " if (defined $pcie_info && length $pcie_info); ++ $out .= "err_severity=$err_severity, " if (defined $err_severity && length $err_severity); ++ $out .= "Error Registers: $regs" if (defined $regs && length $regs); + $out .= "\n\n"; + $found_module = 1; + } +-- +2.25.1 + diff --git a/rasdaemon.spec b/rasdaemon.spec index 5cc1a8b69520709c00f762128f5f9864ded199e7..61e482323fe13102898f27e66449b13ea7acb324 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.7 -Release: 14 +Release: 15 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -44,6 +44,8 @@ Patch6000: backport-rasdaemon-ras-mc-ctl-Fix-script-to-parse-dimm-sizes.patch Patch6001: backport-rasdaemon-ras-memory-failure-handler-handle-localtim.patch Patch6002: backport-rasdaemon-ras-report-fix-possible-but-unlikely-file-.patch Patch6003: backport-tools-lib-traceevent-Add-proper-KBUFFER_TYPE_TIME_ST.patch +Patch6004: 0001-rasdaemon-ras-mc-ctl-Modify-check-for-HiSilicon-KunP.patch +Patch6005: 0001-Check-CPUs-online-not-configured.patch Patch9000: fix-ras-mc-ctl.service-startup-failed-when-selinux-is-no.patch Patch9001: 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch @@ -104,6 +106,14 @@ fi /usr/bin/systemctl disable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Wed Dec 6 2023 caijian - 0.6.7-15 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC: + 1. Fix "nimbus_id=0" field is not displayed when query RAS error information. + 2. Check CPUs online,not configured. + * Wed Nov 29 2023 renhongxun - 0.6.7-14 - Type:bugfix - ID:NA