diff --git a/0001-rasdaemon-add-rbtree-support-for-page-record.patch b/0001-rasdaemon-add-rbtree-support-for-page-record.patch deleted file mode 100644 index 6aa817a6ae653371066d8631066f39b31d4b3575..0000000000000000000000000000000000000000 --- a/0001-rasdaemon-add-rbtree-support-for-page-record.patch +++ /dev/null @@ -1,584 +0,0 @@ -From 24b206648ec4841cb75257e4507155a656b7d6eb Mon Sep 17 00:00:00 2001 -From: wuyun -Date: Sat, 20 Jun 2020 20:26:21 +0800 -Subject: [PATCH] rasdaemon: add rbtree support for page record - -commit 5fd96f457262052f7d06435af8a49689ffb6ffcf upstream - -The rbtree is very efficient for recording and querying fault page info. - -Signed-off-by: wuyun -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab -Signed-off-by: Bixuan Cui ---- - rbtree.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - rbtree.h | 165 +++++++++++++++++++++++++++ - 2 files changed, 549 insertions(+) - create mode 100644 rbtree.c - create mode 100644 rbtree.h - -diff --git a/rbtree.c b/rbtree.c -new file mode 100644 -index 0000000..d9b1bd4 ---- /dev/null -+++ b/rbtree.c -@@ -0,0 +1,384 @@ -+/* -+ Red Black Trees -+ (C) 1999 Andrea Arcangeli -+ (C) 2002 David Woodhouse -+ Taken from the Linux 2.6.30 source with some minor modificatons. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ linux/lib/rbtree.c -+*/ -+ -+#include "rbtree.h" -+ -+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *right = node->rb_right; -+ struct rb_node *parent = rb_parent(node); -+ -+ if ((node->rb_right = right->rb_left)) -+ rb_set_parent(right->rb_left, node); -+ right->rb_left = node; -+ -+ rb_set_parent(right, parent); -+ -+ if (parent) -+ { -+ if (node == parent->rb_left) -+ parent->rb_left = right; -+ else -+ parent->rb_right = right; -+ } -+ else -+ root->rb_node = right; -+ rb_set_parent(node, right); -+} -+ -+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *left = node->rb_left; -+ struct rb_node *parent = rb_parent(node); -+ -+ if ((node->rb_left = left->rb_right)) -+ rb_set_parent(left->rb_right, node); -+ left->rb_right = node; -+ -+ rb_set_parent(left, parent); -+ -+ if (parent) -+ { -+ if (node == parent->rb_right) -+ parent->rb_right = left; -+ else -+ parent->rb_left = left; -+ } -+ else -+ root->rb_node = left; -+ rb_set_parent(node, left); -+} -+ -+void rb_insert_color(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *parent, *gparent; -+ -+ while ((parent = rb_parent(node)) && rb_is_red(parent)) -+ { -+ gparent = rb_parent(parent); -+ -+ if (parent == gparent->rb_left) -+ { -+ { -+ register struct rb_node *uncle = gparent->rb_right; -+ if (uncle && rb_is_red(uncle)) -+ { -+ rb_set_black(uncle); -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ node = gparent; -+ continue; -+ } -+ } -+ -+ if (parent->rb_right == node) -+ { -+ struct rb_node *tmp; -+ __rb_rotate_left(parent, root); -+ tmp = parent; -+ parent = node; -+ node = tmp; -+ } -+ -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ __rb_rotate_right(gparent, root); -+ } else { -+ { -+ struct rb_node *uncle = gparent->rb_left; -+ if (uncle && rb_is_red(uncle)) -+ { -+ rb_set_black(uncle); -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ node = gparent; -+ continue; -+ } -+ } -+ -+ if (parent->rb_left == node) -+ { -+ struct rb_node *tmp; -+ __rb_rotate_right(parent, root); -+ tmp = parent; -+ parent = node; -+ node = tmp; -+ } -+ -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ __rb_rotate_left(gparent, root); -+ } -+ } -+ -+ rb_set_black(root->rb_node); -+} -+ -+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, -+ struct rb_root *root) -+{ -+ struct rb_node *other; -+ -+ while ((!node || rb_is_black(node)) && node != root->rb_node) -+ { -+ if (parent->rb_left == node) -+ { -+ other = parent->rb_right; -+ if (rb_is_red(other)) -+ { -+ rb_set_black(other); -+ rb_set_red(parent); -+ __rb_rotate_left(parent, root); -+ other = parent->rb_right; -+ } -+ if ((!other->rb_left || rb_is_black(other->rb_left)) && -+ (!other->rb_right || rb_is_black(other->rb_right))) -+ { -+ rb_set_red(other); -+ node = parent; -+ parent = rb_parent(node); -+ } -+ else -+ { -+ if (!other->rb_right || rb_is_black(other->rb_right)) -+ { -+ rb_set_black(other->rb_left); -+ rb_set_red(other); -+ __rb_rotate_right(other, root); -+ other = parent->rb_right; -+ } -+ rb_set_color(other, rb_color(parent)); -+ rb_set_black(parent); -+ rb_set_black(other->rb_right); -+ __rb_rotate_left(parent, root); -+ node = root->rb_node; -+ break; -+ } -+ } -+ else -+ { -+ other = parent->rb_left; -+ if (rb_is_red(other)) -+ { -+ rb_set_black(other); -+ rb_set_red(parent); -+ __rb_rotate_right(parent, root); -+ other = parent->rb_left; -+ } -+ if ((!other->rb_left || rb_is_black(other->rb_left)) && -+ (!other->rb_right || rb_is_black(other->rb_right))) -+ { -+ rb_set_red(other); -+ node = parent; -+ parent = rb_parent(node); -+ } -+ else -+ { -+ if (!other->rb_left || rb_is_black(other->rb_left)) -+ { -+ rb_set_black(other->rb_right); -+ rb_set_red(other); -+ __rb_rotate_left(other, root); -+ other = parent->rb_left; -+ } -+ rb_set_color(other, rb_color(parent)); -+ rb_set_black(parent); -+ rb_set_black(other->rb_left); -+ __rb_rotate_right(parent, root); -+ node = root->rb_node; -+ break; -+ } -+ } -+ } -+ if (node) -+ rb_set_black(node); -+} -+ -+void rb_erase(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *child, *parent; -+ int color; -+ -+ if (!node->rb_left) -+ child = node->rb_right; -+ else if (!node->rb_right) -+ child = node->rb_left; -+ else -+ { -+ struct rb_node *old = node, *left; -+ -+ node = node->rb_right; -+ while ((left = node->rb_left) != NULL) -+ node = left; -+ child = node->rb_right; -+ parent = rb_parent(node); -+ color = rb_color(node); -+ -+ if (child) -+ rb_set_parent(child, parent); -+ if (parent == old) { -+ parent->rb_right = child; -+ parent = node; -+ } else -+ parent->rb_left = child; -+ -+ node->rb_parent_color = old->rb_parent_color; -+ node->rb_right = old->rb_right; -+ node->rb_left = old->rb_left; -+ -+ if (rb_parent(old)) -+ { -+ if (rb_parent(old)->rb_left == old) -+ rb_parent(old)->rb_left = node; -+ else -+ rb_parent(old)->rb_right = node; -+ } else -+ root->rb_node = node; -+ -+ rb_set_parent(old->rb_left, node); -+ if (old->rb_right) -+ rb_set_parent(old->rb_right, node); -+ goto color; -+ } -+ -+ parent = rb_parent(node); -+ color = rb_color(node); -+ -+ if (child) -+ rb_set_parent(child, parent); -+ if (parent) -+ { -+ if (parent->rb_left == node) -+ parent->rb_left = child; -+ else -+ parent->rb_right = child; -+ } -+ else -+ root->rb_node = child; -+ -+ color: -+ if (color == RB_BLACK) -+ __rb_erase_color(child, parent, root); -+} -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+struct rb_node *rb_first(const struct rb_root *root) -+{ -+ struct rb_node *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return NULL; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+ -+struct rb_node *rb_last(const struct rb_root *root) -+{ -+ struct rb_node *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return NULL; -+ while (n->rb_right) -+ n = n->rb_right; -+ return n; -+} -+ -+struct rb_node *rb_next(const struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+ if (rb_parent(node) == node) -+ return NULL; -+ -+ /* If we have a right-hand child, go down and then left as far -+ as we can. */ -+ if (node->rb_right) { -+ node = node->rb_right; -+ while (node->rb_left) -+ node=node->rb_left; -+ return (struct rb_node *)node; -+ } -+ -+ /* No right-hand children. Everything down and left is -+ smaller than us, so any 'next' node must be in the general -+ direction of our parent. Go up the tree; any time the -+ ancestor is a right-hand child of its parent, keep going -+ up. First time it's a left-hand child of its parent, said -+ parent is our 'next' node. */ -+ while ((parent = rb_parent(node)) && node == parent->rb_right) -+ node = parent; -+ -+ return parent; -+} -+ -+struct rb_node *rb_prev(const struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+ if (rb_parent(node) == node) -+ return NULL; -+ -+ /* If we have a left-hand child, go down and then right as far -+ as we can. */ -+ if (node->rb_left) { -+ node = node->rb_left; -+ while (node->rb_right) -+ node=node->rb_right; -+ return (struct rb_node *)node; -+ } -+ -+ /* No left-hand children. Go up till we find an ancestor which -+ is a right-hand child of its parent */ -+ while ((parent = rb_parent(node)) && node == parent->rb_left) -+ node = parent; -+ -+ return parent; -+} -+ -+void rb_replace_node(struct rb_node *victim, struct rb_node *new, -+ struct rb_root *root) -+{ -+ struct rb_node *parent = rb_parent(victim); -+ -+ /* Set the surrounding nodes to point to the replacement */ -+ if (parent) { -+ if (victim == parent->rb_left) -+ parent->rb_left = new; -+ else -+ parent->rb_right = new; -+ } else { -+ root->rb_node = new; -+ } -+ if (victim->rb_left) -+ rb_set_parent(victim->rb_left, new); -+ if (victim->rb_right) -+ rb_set_parent(victim->rb_right, new); -+ -+ /* Copy the pointers/colour from the victim to the replacement */ -+ *new = *victim; -+} -diff --git a/rbtree.h b/rbtree.h -new file mode 100644 -index 0000000..a8a0459 ---- /dev/null -+++ b/rbtree.h -@@ -0,0 +1,165 @@ -+/* -+ Red Black Trees -+ (C) 1999 Andrea Arcangeli -+ Taken from the Linux 2.6.30 source. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ linux/include/linux/rbtree.h -+ -+ To use rbtrees you'll have to implement your own insert and search cores. -+ This will avoid us to use callbacks and to drop drammatically performances. -+ I know it's not the cleaner way, but in C (not in C++) to get -+ performances and genericity... -+ -+ Some example of insert and search follows here. The search is a plain -+ normal search over an ordered tree. The insert instead must be implemented -+ int two steps: as first thing the code must insert the element in -+ order as a red leaf in the tree, then the support library function -+ rb_insert_color() must be called. Such function will do the -+ not trivial work to rebalance the rbtree if necessary. -+ -+----------------------------------------------------------------------- -+static inline struct page * rb_search_page_cache(struct inode * inode, -+ unsigned long offset) -+{ -+ struct rb_node * n = inode->i_rb_page_cache.rb_node; -+ struct page * page; -+ -+ while (n) -+ { -+ page = rb_entry(n, struct page, rb_page_cache); -+ -+ if (offset < page->offset) -+ n = n->rb_left; -+ else if (offset > page->offset) -+ n = n->rb_right; -+ else -+ return page; -+ } -+ return NULL; -+} -+ -+static inline struct page * __rb_insert_page_cache(struct inode * inode, -+ unsigned long offset, -+ struct rb_node * node) -+{ -+ struct rb_node ** p = &inode->i_rb_page_cache.rb_node; -+ struct rb_node * parent = NULL; -+ struct page * page; -+ -+ while (*p) -+ { -+ parent = *p; -+ page = rb_entry(parent, struct page, rb_page_cache); -+ -+ if (offset < page->offset) -+ p = &(*p)->rb_left; -+ else if (offset > page->offset) -+ p = &(*p)->rb_right; -+ else -+ return page; -+ } -+ -+ rb_link_node(node, parent, p); -+ -+ return NULL; -+} -+ -+static inline struct page * rb_insert_page_cache(struct inode * inode, -+ unsigned long offset, -+ struct rb_node * node) -+{ -+ struct page * ret; -+ if ((ret = __rb_insert_page_cache(inode, offset, node))) -+ goto out; -+ rb_insert_color(node, &inode->i_rb_page_cache); -+ out: -+ return ret; -+} -+----------------------------------------------------------------------- -+*/ -+ -+#ifndef _LINUX_RBTREE_H -+#define _LINUX_RBTREE_H -+ -+#include -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct rb_node -+{ -+ unsigned long rb_parent_color; -+#define RB_RED 0 -+#define RB_BLACK 1 -+ struct rb_node *rb_right; -+ struct rb_node *rb_left; -+} __attribute__((aligned(sizeof(long)))); -+ /* The alignment might seem pointless, but allegedly CRIS needs it */ -+ -+struct rb_root -+{ -+ struct rb_node *rb_node; -+}; -+ -+ -+#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -+#define rb_color(r) ((r)->rb_parent_color & 1) -+#define rb_is_red(r) (!rb_color(r)) -+#define rb_is_black(r) rb_color(r) -+#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -+#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) -+ -+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -+{ -+ rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -+} -+static inline void rb_set_color(struct rb_node *rb, int color) -+{ -+ rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -+} -+ -+#define RB_ROOT (struct rb_root) { NULL, } -+#define rb_entry(ptr, type, member) container_of(ptr, type, member) -+ -+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -+#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -+#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) -+ -+extern void rb_insert_color(struct rb_node *, struct rb_root *); -+extern void rb_erase(struct rb_node *, struct rb_root *); -+ -+/* Find logical next and previous nodes in a tree */ -+extern struct rb_node *rb_next(const struct rb_node *); -+extern struct rb_node *rb_prev(const struct rb_node *); -+extern struct rb_node *rb_first(const struct rb_root *); -+extern struct rb_node *rb_last(const struct rb_root *); -+ -+/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, -+ struct rb_root *root); -+ -+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, -+ struct rb_node ** rb_link) -+{ -+ node->rb_parent_color = (unsigned long )parent; -+ node->rb_left = node->rb_right = NULL; -+ -+ *rb_link = node; -+} -+ -+#endif /* _LINUX_RBTREE_H */ --- -1.8.3.1 - diff --git a/0002-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch b/0002-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch deleted file mode 100644 index 01d40c31116994e3ddfd33383da3773f9a931944..0000000000000000000000000000000000000000 --- a/0002-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch +++ /dev/null @@ -1,647 +0,0 @@ -From c1b22ed576515945913fb5f48cb1a5ba33480d25 Mon Sep 17 00:00:00 2001 -From: wuyun -Date: Sat, 20 Jun 2020 20:26:22 +0800 -Subject: [PATCH] rasdaemon: add support for memory Corrected Error - predictive failure analysis - -commit 9ae6b70effb8adc9572debc800b8e16173f74bb8 upstream -commit 32b978fcf73215c90ce7383580224e4295930ae7 upstream - -Memory Corrected Error was corrected by hardware. These errors do not -require immediate software actions, but are still reported for -accounting and predictive failure analysis. - -Based on statistical results, some actions can be taken to prevent -Corrected Error from evoluting to Uncorrected Error. - -Signed-off-by: wuyun -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab -Signed-off-by: Bixuan Cui ---- - Makefile.am | 10 +- - configure.ac | 11 ++ - man/rasdaemon.1.in | 7 + - misc/rasdaemon.env | 29 ++++ - misc/rasdaemon.service.in | 1 + - misc/rasdaemon.spec.in | 2 + - ras-events.c | 6 + - ras-mc-handler.c | 7 + - ras-page-isolation.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++ - ras-page-isolation.h | 66 +++++++++ - 10 files changed, 469 insertions(+), 2 deletions(-) - create mode 100644 misc/rasdaemon.env - create mode 100644 ras-page-isolation.c - create mode 100644 ras-page-isolation.h - -diff --git a/Makefile.am b/Makefile.am -index 843b538..f4822b9 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -2,7 +2,7 @@ ACLOCAL_AMFLAGS=-I m4 - SUBDIRS = libtrace util man - SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) --EXTRA_DIST = $(SYSTEMD_SERVICES_IN) -+EXTRA_DIST = $(SYSTEMD_SERVICES_IN) misc/rasdaemon.env - - # This rule is needed because \@sbindir\@ is expanded to \${exec_prefix\}/sbin - # during ./configure phase, therefore it is not possible to add .service.in -@@ -54,12 +54,15 @@ endif - if WITH_HISI_NS_DECODE - rasdaemon_SOURCES += non-standard-hisi_hip07.c non-standard-hisi_hip08.c - endif -+if WITH_MEMORY_CE_PFA -+ rasdaemon_SOURCES += rbtree.c ras-page-isolation.c -+endif - rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ - ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ -- ras-devlink-handler.h ras-diskerror-handler.h -+ ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that -@@ -87,3 +90,6 @@ upload: - install-data-local: - $(install_sh) -d "$(DESTDIR)@RASSTATEDIR@" - $(install_sh) -d "$(DESTDIR)@sysconfdir@/ras/dimm_labels.d" -+if WITH_MEMORY_CE_PFA -+ $(install_sh) @abs_srcdir@/misc/rasdaemon.env "$(DESTDIR)@sysconfdir@/sysconfig/rasdaemon" -+endif -diff --git a/configure.ac b/configure.ac -index d8c7991..899cbd3 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -131,6 +131,16 @@ AS_IF([test "x$enable_hisi_ns_decode" = "xyes" || test "x$enable_all" == "xyes"] - AM_CONDITIONAL([WITH_HISI_NS_DECODE], [test x$enable_hisi_ns_decode = xyes || test x$enable_all == xyes]) - AM_COND_IF([WITH_HISI_NS_DECODE], [USE_HISI_NS_DECODE="yes"], [USE_HISI_NS_DECODE="no"]) - -+AC_ARG_ENABLE([memory_ce_pfa], -+ AS_HELP_STRING([--enable-memory-ce-pfa], [enable memory Corrected Error predictive failure analysis])) -+ -+AS_IF([test "x$enable_memory_ce_pfa" = "xyes" || test "x$enable_all" == "xyes"], [ -+ AC_DEFINE(HAVE_MEMORY_CE_PFA,1,"have memory corrected error predictive failure analysis") -+ AC_SUBST([WITH_MEMORY_CE_PFA]) -+]) -+AM_CONDITIONAL([WITH_MEMORY_CE_PFA], [test x$enable_memory_ce_pfa = xyes || test x$enable_all == xyes]) -+AM_COND_IF([WITH_MEMORY_CE_PFA], [USE_MEMORY_CE_PFA="yes"], [USE_MEMORY_CE_PFA="no"]) -+ - test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc - - CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" -@@ -162,4 +172,5 @@ compile time options summary - ARM events : $USE_ARM - DEVLINK : $USE_DEVLINK - Disk I/O errors : $USE_DISKERROR -+ Memory CE PFA : $USE_MEMORY_CE_PFA - EOF -diff --git a/man/rasdaemon.1.in b/man/rasdaemon.1.in -index 834df16..833c8e1 100644 ---- a/man/rasdaemon.1.in -+++ b/man/rasdaemon.1.in -@@ -62,6 +62,13 @@ feature. - .BI "--version" - Print the program version and exit. - -+.SH CONFIG FILE -+ -+The \fBrasdaemon\fR program supports a config file to set rasdaemon systemd service -+environment variables. By default the config file is read from /etc/sysconfig/rasdaemon. -+ -+The general format is environmentname=value. -+ - .SH SEE ALSO - \fBras-mc-ctl\fR(8) - -diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env -new file mode 100644 -index 0000000..12fd766 ---- /dev/null -+++ b/misc/rasdaemon.env -@@ -0,0 +1,29 @@ -+# Page Isolation -+# Note: Run-time configuration is unsupported, service restart needed. -+# Note: this file should be installed at /etc/sysconfig/rasdaemon -+ -+# Specify the threshold of isolating buggy pages. -+# -+# Format: -+# [0-9]+[unit] -+# Notice: please make sure match this format, rasdaemon will use default value for exception input cases. -+# -+# Supported units: -+# PAGE_CE_REFRESH_CYCLE: D|d (day), H|h (hour), M|m (min), default is in hour -+# PAGE_CE_THRESHOLD: K|k (x1000), M|m (x1000k), default is none -+# -+# The two configs will only take no effect when PAGE_CE_ACTION is "off". -+PAGE_CE_REFRESH_CYCLE="24h" -+PAGE_CE_THRESHOLD="50" -+ -+# Specify the internal action in rasdaemon to exceeding a page error threshold. -+# -+# off no action -+# account only account errors -+# soft try to soft-offline page without killing any processes -+# This requires an uptodate kernel. Might not be successfull. -+# hard try to hard-offline page by killing processes -+# Requires an uptodate kernel. Might not be successfull. -+# soft-then-hard First try to soft offline, then try hard offlining. -+# Note: default offline choice is "soft". -+PAGE_CE_ACTION="soft" -diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in -index be9ad5a..e73a08a 100644 ---- a/misc/rasdaemon.service.in -+++ b/misc/rasdaemon.service.in -@@ -3,6 +3,7 @@ Description=RAS daemon to log the RAS events - After=syslog.target - - [Service] -+EnvironmentFile=/etc/sysconfig/rasdaemon - ExecStart=@sbindir@/rasdaemon -f -r - ExecStartPost=@sbindir@/rasdaemon --enable - ExecStop=@sbindir@/rasdaemon --disable -diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in -index 0674b5b..790caa0 100644 ---- a/misc/rasdaemon.spec.in -+++ b/misc/rasdaemon.spec.in -@@ -44,6 +44,7 @@ make %{?_smp_mflags} - - %install - make install DESTDIR=%{buildroot} -+install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service - rm INSTALL %{buildroot}/usr/include/*.h -@@ -56,6 +57,7 @@ rm INSTALL %{buildroot}/usr/include/*.h - %{_unitdir}/*.service - %{_sharedstatedir}/rasdaemon - %{_sysconfdir}/ras/dimm_labels.d -+%config(noreplace) %{_sysconfdir}/sysconfig/%{name} - - %changelog - -diff --git a/ras-events.c b/ras-events.c -index 3cdac19..358fcc0 100644 ---- a/ras-events.c -+++ b/ras-events.c -@@ -37,6 +37,7 @@ - #include "ras-diskerror-handler.h" - #include "ras-record.h" - #include "ras-logger.h" -+#include "ras-page-isolation.h" - - /* - * Polling time, if read() doesn't block. Currently, trace_pipe_raw never -@@ -735,6 +736,11 @@ int handle_ras_events(int record_events) - ras->page_size = page_size; - ras->record_events = record_events; - -+#ifdef HAVE_MEMORY_CE_PFA -+ /* FIXME: enable memory isolation unconditionally */ -+ ras_page_account_init(); -+#endif -+ - rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", - ras_mc_event_handler, NULL, MC_EVENT); - if (!rc) -diff --git a/ras-mc-handler.c b/ras-mc-handler.c -index deb7e05..42b05cd 100644 ---- a/ras-mc-handler.c -+++ b/ras-mc-handler.c -@@ -23,6 +23,7 @@ - #include "ras-mc-handler.h" - #include "ras-record.h" - #include "ras-logger.h" -+#include "ras-page-isolation.h" - #include "ras-report.h" - - int ras_mc_event_handler(struct trace_seq *s, -@@ -183,6 +184,12 @@ int ras_mc_event_handler(struct trace_seq *s, - - ras_store_mc_event(ras, &ev); - -+#ifdef HAVE_MEMORY_CE_PFA -+ /* Account page corrected errors */ -+ if (!strcmp(ev.error_type, "Corrected")) -+ ras_record_page_error(ev.address, ev.error_count, now); -+#endif -+ - #ifdef HAVE_ABRT_REPORT - /* Report event to ABRT */ - ras_report_mc_event(ras, &ev); -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -new file mode 100644 -index 0000000..50e4406 ---- /dev/null -+++ b/ras-page-isolation.c -@@ -0,0 +1,332 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include "ras-logger.h" -+#include "ras-page-isolation.h" -+ -+#define PARSED_ENV_LEN 50 -+static const struct config threshold_units[] = { -+ { "m", 1000 }, -+ { "k", 1000 }, -+ { "", 1 }, -+ {} -+}; -+ -+static const struct config cycle_units[] = { -+ { "d", 24 }, -+ { "h", 60 }, -+ { "m", 60 }, -+ { "s", 1 }, -+ {} -+}; -+ -+static struct isolation threshold = { -+ .name = "PAGE_CE_THRESHOLD", -+ .units = threshold_units, -+ .env = "50", -+ .unit = "", -+}; -+ -+static struct isolation cycle = { -+ .name = "PAGE_CE_REFRESH_CYCLE", -+ .units = cycle_units, -+ .env = "24h", -+ .unit = "h", -+}; -+ -+static const char *kernel_offline[] = { -+ [OFFLINE_SOFT] = "/sys/devices/system/memory/soft_offline_page", -+ [OFFLINE_HARD] = "/sys/devices/system/memory/hard_offline_page", -+ [OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page", -+}; -+ -+static const struct config offline_choice[] = { -+ { "off", OFFLINE_OFF }, -+ { "account", OFFLINE_ACCOUNT }, -+ { "soft", OFFLINE_SOFT }, -+ { "hard", OFFLINE_HARD }, -+ { "soft-then-hard", OFFLINE_SOFT_THEN_HARD }, -+ {} -+}; -+ -+static const char *page_state[] = { -+ [PAGE_ONLINE] = "online", -+ [PAGE_OFFLINE] = "offlined", -+ [PAGE_OFFLINE_FAILED] = "offline-failed", -+}; -+ -+static enum otype offline = OFFLINE_SOFT; -+static struct rb_root page_records; -+ -+static void page_offline_init(void) -+{ -+ const char *env = "PAGE_CE_ACTION"; -+ char *choice = getenv(env); -+ const struct config *c = NULL; -+ int matched = 0; -+ -+ if (choice) { -+ for (c = offline_choice; c->name; c++) { -+ if (!strcasecmp(choice, c->name)) { -+ offline = c->val; -+ matched = 1; -+ break; -+ } -+ } -+ } -+ -+ if (!matched) -+ log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env); -+ -+ if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) { -+ log(TERM, LOG_INFO, "Kernel does not support page offline interface\n"); -+ offline = OFFLINE_ACCOUNT; -+ } -+ -+ log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n", -+ offline_choice[offline].name); -+} -+ -+static void parse_isolation_env(struct isolation *config) -+{ -+ char *env = getenv(config->name); -+ char *unit = NULL; -+ const struct config *units = NULL; -+ int i, no_unit; -+ int valid = 0; -+ int unit_matched = 0; -+ unsigned long value, tmp; -+ -+ /* check if env is vaild */ -+ if (env && strlen(env)) { -+ /* All the character before unit must be digit */ -+ for (i = 0; i < strlen(env) - 1; i++) { -+ if (!isdigit(env[i])) -+ goto parse; -+ } -+ if (sscanf(env, "%lu", &value) < 1 || !value) -+ goto parse; -+ /* check if the unit is vaild */ -+ unit = env + strlen(env) - 1; -+ /* no unit, all the character are value character */ -+ if (isdigit(*unit)) { -+ valid = 1; -+ no_unit = 1; -+ goto parse; -+ } -+ for (units = config->units; units->name; units++) { -+ /* value character and unit character are both valid */ -+ if (!strcasecmp(unit, units->name)) { -+ valid = 1; -+ no_unit = 0; -+ break; -+ } -+ } -+ } -+ -+parse: -+ /* if invalid, use default env */ -+ if (valid) { -+ config->env = env; -+ if (!no_unit) -+ config->unit = unit; -+ } else { -+ log(TERM, LOG_INFO, "Improper %s, set to default %s.\n", -+ config->name, config->env); -+ } -+ -+ /* if env value string is greater than ulong_max, truncate the last digit */ -+ sscanf(config->env, "%lu", &value); -+ for (units = config->units; units->name; units++) { -+ if (!strcasecmp(config->unit, units->name)) -+ unit_matched = 1; -+ if (unit_matched) { -+ tmp = value; -+ value *= units->val; -+ if (tmp != 0 && value / tmp != units->val) -+ config->overflow = true; -+ } -+ } -+ config->val = value; -+ /* In order to output value and unit perfectly */ -+ config->unit = no_unit ? config->unit : ""; -+} -+ -+static void parse_env_string(struct isolation *config, char *str) -+{ -+ int i; -+ -+ if (config->overflow) { -+ /* when overflow, use basic unit */ -+ for (i = 0; config->units[i].name; i++) ; -+ sprintf(str, "%lu%s", config->val, config->units[i-1].name); -+ log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n", -+ config->name, config->env); -+ } else { -+ sprintf(str, "%s%s", config->env, config->unit); -+ } -+} -+ -+static void page_isolation_init(void) -+{ -+ char threshold_string[PARSED_ENV_LEN]; -+ char cycle_string[PARSED_ENV_LEN]; -+ /** -+ * It's unnecessary to parse threshold configuration when offline -+ * choice is off. -+ */ -+ if (offline == OFFLINE_OFF) -+ return; -+ -+ parse_isolation_env(&threshold); -+ parse_isolation_env(&cycle); -+ parse_env_string(&threshold, threshold_string); -+ parse_env_string(&cycle, cycle_string); -+ log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n", -+ threshold_string, cycle_string); -+} -+ -+void ras_page_account_init(void) -+{ -+ page_offline_init(); -+ page_isolation_init(); -+} -+ -+static int do_page_offline(unsigned long long addr, enum otype type) -+{ -+ FILE *offline_file; -+ int err; -+ -+ offline_file = fopen(kernel_offline[type], "w"); -+ if (!offline_file) -+ return -1; -+ -+ fprintf(offline_file, "%#llx", addr); -+ err = ferror(offline_file) ? -1 : 0; -+ fclose(offline_file); -+ -+ return err; -+} -+ -+static void page_offline(struct page_record *pr) -+{ -+ unsigned long long addr = pr->addr; -+ int ret; -+ -+ /* Offlining page is not required */ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ /* Ignore offlined pages */ -+ if (pr->offlined != PAGE_ONLINE) -+ return; -+ -+ /* Time to silence this noisy page */ -+ if (offline == OFFLINE_SOFT_THEN_HARD) { -+ ret = do_page_offline(addr, OFFLINE_SOFT); -+ if (ret < 0) -+ ret = do_page_offline(addr, OFFLINE_HARD); -+ } else { -+ ret = do_page_offline(addr, offline); -+ } -+ -+ pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE; -+ -+ log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n", -+ addr, page_state[pr->offlined]); -+} -+ -+static void page_record(struct page_record *pr, unsigned count, time_t time) -+{ -+ unsigned long period = time - pr->start; -+ unsigned long tolerate; -+ -+ if (period >= cycle.val) { -+ /** -+ * Since we don't refresh automatically, it is possible that the period -+ * between two occurences will be longer than the pre-configured refresh cycle. -+ * In this case, we tolerate the frequency of the whole period up to -+ * the pre-configured threshold. -+ */ -+ tolerate = (period / (double)cycle.val) * threshold.val; -+ pr->count -= (tolerate > pr->count) ? pr->count : tolerate; -+ pr->start = time; -+ pr->excess = 0; -+ } -+ -+ pr->count += count; -+ if (pr->count >= threshold.val) { -+ log(TERM, LOG_INFO, "Corrected Errors at %#llx exceeded threshold\n", pr->addr); -+ -+ /** -+ * Backup ce count of current cycle to enable next round, which actually -+ * should never happen if we can disable overflow completely in the same -+ * time unit (but sadly we can't). -+ */ -+ pr->excess += pr->count; -+ pr->count = 0; -+ page_offline(pr); -+ } -+} -+ -+static struct page_record *page_lookup_insert(unsigned long long addr) -+{ -+ struct rb_node **entry = &page_records.rb_node; -+ struct rb_node *parent = NULL; -+ struct page_record *pr = NULL, *find = NULL; -+ -+ while (*entry) { -+ parent = *entry; -+ pr = rb_entry(parent, struct page_record, entry); -+ if (addr == pr->addr) { -+ return pr; -+ } else if (addr < pr->addr) { -+ entry = &(*entry)->rb_left; -+ } else { -+ entry = &(*entry)->rb_right; -+ } -+ } -+ -+ find = calloc(1, sizeof(struct page_record)); -+ if (!find) { -+ log(TERM, LOG_ERR, "No memory for page records\n"); -+ return NULL; -+ } -+ -+ find->addr = addr; -+ rb_link_node(&find->entry, parent, entry); -+ rb_insert_color(&find->entry, &page_records); -+ -+ return find; -+} -+ -+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) -+{ -+ struct page_record *pr = NULL; -+ -+ if (offline == OFFLINE_OFF) -+ return; -+ -+ pr = page_lookup_insert(addr & PAGE_MASK); -+ if (pr) { -+ if (!pr->start) -+ pr->start = time; -+ page_record(pr, count, time); -+ } -+} -diff --git a/ras-page-isolation.h b/ras-page-isolation.h -new file mode 100644 -index 0000000..3d03cef ---- /dev/null -+++ b/ras-page-isolation.h -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#ifndef __RAS_PAGE_ISOLATION_H -+#define __RAS_PAGE_ISOLATION_H -+ -+#include -+#include -+#include "rbtree.h" -+ -+#define PAGE_SHIFT 12 -+#define PAGE_SIZE (1 << PAGE_SHIFT) -+#define PAGE_MASK (~(PAGE_SIZE-1)) -+ -+struct config { -+ char *name; -+ unsigned long val; -+}; -+ -+enum otype { -+ OFFLINE_OFF, -+ OFFLINE_ACCOUNT, -+ OFFLINE_SOFT, -+ OFFLINE_HARD, -+ OFFLINE_SOFT_THEN_HARD, -+}; -+ -+enum pstate { -+ PAGE_ONLINE, -+ PAGE_OFFLINE, -+ PAGE_OFFLINE_FAILED, -+}; -+ -+struct page_record { -+ struct rb_node entry; -+ unsigned long long addr; -+ time_t start; -+ enum pstate offlined; -+ unsigned long count; -+ unsigned long excess; -+}; -+ -+struct isolation { -+ char *name; -+ char *env; -+ const struct config *units; -+ unsigned long val; -+ bool overflow; -+ char *unit; -+}; -+ -+void ras_page_account_init(void); -+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time); -+ -+#endif --- -1.8.3.1 - diff --git a/0003-rasdaemon-Add-notification-support-when-page-goes-of.patch b/0003-rasdaemon-Add-notification-support-when-page-goes-of.patch deleted file mode 100644 index a3459f2d310f6fbd1d6d0d1d3df42fcfc447b219..0000000000000000000000000000000000000000 --- a/0003-rasdaemon-Add-notification-support-when-page-goes-of.patch +++ /dev/null @@ -1,261 +0,0 @@ -From bb7a6906612734fda9d18bb48fc2dee1d11c2536 Mon Sep 17 00:00:00 2001 -From: Bixuan Cui -Date: Sun, 5 Jun 2022 02:10:24 +0800 -Subject: [PATCH] rasdaemon: Add notification support when page goes - offline for Memory Corrected Error - -When the page goes offline, it may affect the user's processes. -The user needs to do some special actions (such as restarting the -process) before or after going offline. - -So add page-ce-offline-pre-notice and page-ce-offline-post-notice -to env file of rasdaemon for notifying the user when doing page -offline. - -Signed-off-by: Bixuan Cui ---- - Makefile.am | 2 +- - misc/notices/page-ce-offline-post-notice | 17 ++++++ - misc/notices/page-ce-offline-pre-notice | 17 ++++++ - misc/rasdaemon.env | 4 ++ - misc/rasdaemon.spec.in | 3 ++ - ras-page-isolation.c | 90 ++++++++++++++++++++++++++++++++ - 6 files changed, 132 insertions(+), 1 deletion(-) - create mode 100755 misc/notices/page-ce-offline-post-notice - create mode 100755 misc/notices/page-ce-offline-pre-notice - -diff --git a/Makefile.am b/Makefile.am -index f4822b9..b9cb8bb 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -2,7 +2,7 @@ ACLOCAL_AMFLAGS=-I m4 - SUBDIRS = libtrace util man - SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) --EXTRA_DIST = $(SYSTEMD_SERVICES_IN) misc/rasdaemon.env -+EXTRA_DIST = $(SYSTEMD_SERVICES_IN) misc/rasdaemon.env misc/notices - - # This rule is needed because \@sbindir\@ is expanded to \${exec_prefix\}/sbin - # during ./configure phase, therefore it is not possible to add .service.in -diff --git a/misc/notices/page-ce-offline-post-notice b/misc/notices/page-ce-offline-post-notice -new file mode 100755 -index 0000000..d78b1b0 ---- /dev/null -+++ b/misc/notices/page-ce-offline-post-notice -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon after a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-post-notice.local ] && . ./page-ce-offline-post-notice.local $1 -+ -+if [ -d page-ce-offline-post-notice.extern ] -+then -+ ls page-ce-offline-post-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-post-notice.extern/$item ] && . ./page-ce-offline-post-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff --git a/misc/notices/page-ce-offline-pre-notice b/misc/notices/page-ce-offline-pre-notice -new file mode 100755 -index 0000000..d1038a3 ---- /dev/null -+++ b/misc/notices/page-ce-offline-pre-notice -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon before a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-pre-notice.local ] && . ./page-ce-offline-pre-notice.local $1 -+ -+if [ -d page-ce-offline-pre-notice.extern ] -+then -+ ls page-ce-offline-pre-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-pre-notice.extern/$item ] && . ./page-ce-offline-pre-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env -index 12fd766..713875a 100644 ---- a/misc/rasdaemon.env -+++ b/misc/rasdaemon.env -@@ -27,3 +27,7 @@ PAGE_CE_THRESHOLD="50" - # soft-then-hard First try to soft offline, then try hard offlining. - # Note: default offline choice is "soft". - PAGE_CE_ACTION="soft" -+ -+# Notices script when doing memory offline -+PAGE_CE_OFFLINE_PRE_NOTICE="page-ce-offline-pre-notice" -+PAGE_CE_OFFLINE_POST_NOTICE="page-ce-offline-post-notice" -diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in -index 790caa0..ffaee5b 100644 ---- a/misc/rasdaemon.spec.in -+++ b/misc/rasdaemon.spec.in -@@ -47,6 +47,8 @@ make install DESTDIR=%{buildroot} - install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service -+install -d %{buildroot}%{_sysconfdir}/rasdaemon_notices/ -+install -D -p -m 0755 misc/notices/* %{buildroot}%{_sysconfdir}/rasdaemon_notices/ - rm INSTALL %{buildroot}/usr/include/*.h - - %files -@@ -58,6 +60,7 @@ rm INSTALL %{buildroot}/usr/include/*.h - %{_sharedstatedir}/rasdaemon - %{_sysconfdir}/ras/dimm_labels.d - %config(noreplace) %{_sysconfdir}/sysconfig/%{name} -+%config(noreplace) %{_sysconfdir}/rasdaemon_notices/* - - %changelog - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index 50e4406..f4f3bc1 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -17,9 +17,13 @@ - #include - #include - #include -+#include -+#include -+#include - #include "ras-logger.h" - #include "ras-page-isolation.h" - -+#define MAX_PATH_LEN 64 - #define PARSED_ENV_LEN 50 - static const struct config threshold_units[] = { - { "m", 1000 }, -@@ -73,6 +77,8 @@ static const char *page_state[] = { - - static enum otype offline = OFFLINE_SOFT; - static struct rb_root page_records; -+static char pre_notice[MAX_PATH_LEN]; -+static char post_notice[MAX_PATH_LEN]; - - static void page_offline_init(void) - { -@@ -202,16 +208,94 @@ static void page_isolation_init(void) - threshold_string, cycle_string); - } - -+static void page_notice_init(void) -+{ -+ char *notice_root = "/etc/rasdaemon_notices"; -+ char *pre_re = getenv("PAGE_CE_OFFLINE_PRE_NOTICE"); -+ char *post_re = getenv("PAGE_CE_OFFLINE_POST_NOTICE"); -+ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ snprintf(pre_notice, sizeof(pre_notice), "%s/%s", notice_root, pre_re); -+ if (access(pre_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", pre_notice); -+ -+ snprintf(post_notice, sizeof(post_notice), "%s/%s", notice_root, post_re); -+ if (access(post_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", post_notice); -+} -+ - void ras_page_account_init(void) - { - page_offline_init(); - page_isolation_init(); -+ page_notice_init(); -+} -+ -+static void finish_child(pid_t child, int status) -+{ -+ if (WIFEXITED(status) && WEXITSTATUS(status)) { -+ log(TERM, LOG_INFO, "notice exited with status %d\n", WEXITSTATUS(status)); -+ } else if (WIFSIGNALED(status)) { -+ log(TERM, LOG_INFO,"notice died with signal %s\n", strsignal(WTERMSIG(status))); -+ } -+ -+ return; -+} -+ -+static void __run_notice(char *argv[], char **env) -+{ -+ pid_t child; -+ int status; -+ -+ child = fork(); -+ if (child < 0) { -+ log(TERM, LOG_ERR, "Cannot create process for offline notice"); -+ return; -+ } -+ if (child == 0) { -+ execve(argv[0], argv, env); -+ _exit(127); -+ } -+ else { -+ waitpid(child, &status, 0); -+ finish_child(child, status); -+ } -+} -+ -+static void run_notice(char *argv[]) -+{ -+ int MAX_ENV = 20; -+ char *env[MAX_ENV]; -+ int ei = 0; -+ int i; -+ -+ asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin"); -+ env[ei] = NULL; -+ assert(ei < MAX_ENV); -+ -+ __run_notice(argv, env); -+ -+ for (i = 0; i < ei; i++) -+ free(env[i]); - } - - static int do_page_offline(unsigned long long addr, enum otype type) - { - FILE *offline_file; - int err; -+ char *args; -+ char *argv[] = { -+ NULL, -+ NULL, -+ NULL, -+ }; -+ -+ asprintf(&args, "%llu", addr); -+ argv[0] = (char*)&pre_notice; -+ argv[1] = args; -+ run_notice(argv); - - offline_file = fopen(kernel_offline[type], "w"); - if (!offline_file) -@@ -221,6 +305,11 @@ static int do_page_offline(unsigned long long addr, enum otype type) - err = ferror(offline_file) ? -1 : 0; - fclose(offline_file); - -+ argv[0] = (char*)&post_notice; -+ run_notice(argv); -+ -+ free(args); -+ - return err; - } - -@@ -329,4 +418,5 @@ void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) - pr->start = time; - page_record(pr, count, time); - } -+ - } --- -1.8.3.1 - diff --git a/1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch b/1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch new file mode 100644 index 0000000000000000000000000000000000000000..99a9ba608c6fc6886c53b316064f1565cf78bd73 --- /dev/null +++ b/1ff5f3d2a0fcd48add9462567c30fe0e14585fb4.patch @@ -0,0 +1,32 @@ +commit 1ff5f3d2a0fcd48add9462567c30fe0e14585fb4 +Author: Matt Whitlock +Date: Wed Jun 9 10:25:18 2021 -0400 + + configure.ac: fix SYSCONFDEFDIR default value + + configure.ac was using AC_ARG_WITH incorrectly, yielding a generated configure script like: + + # Check whether --with-sysconfdefdir was given. + if test "${with_sysconfdefdir+set}" = set; then : + withval=$with_sysconfdefdir; SYSCONFDEFDIR=$withval + else + "/etc/sysconfig" + fi + + This commit fixes the default case so that the SYSCONFDEFDIR variable is assigned the value "/etc/sysconfig" rather than trying to execute "/etc/sysconfig" as a command. + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/configure.ac b/configure.ac +index f7d1947..33b81fe 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -172,7 +172,7 @@ AC_SUBST([RASSTATEDIR]) + AC_ARG_WITH(sysconfdefdir, + AC_HELP_STRING([--with-sysconfdefdir=DIR], [rasdaemon environment file dir]), + [SYSCONFDEFDIR=$withval], +- ["/etc/sysconfig"]) ++ [SYSCONFDEFDIR=/etc/sysconfig]) + AC_SUBST([SYSCONFDEFDIR]) + + AC_DEFINE([RAS_DB_FNAME], ["ras-mc_event.db"], [ras events database]) diff --git a/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch b/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch new file mode 100644 index 0000000000000000000000000000000000000000..fdc509b3352ad03a3ad002a2c21bcc9d50f66b74 --- /dev/null +++ b/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch @@ -0,0 +1,28 @@ +commit 28ea956acc2dab7c18b4701f9657afb9ab3ddc79 +Author: Muralidhara M K +Date: Mon Jul 12 05:18:43 2021 -0500 + + rasdaemon: set SMCA maximum number of banks to 64 + + Newer AMD systems with SMCA banks support up to 64 MCA banks per CPU. + + This patch is based on the commit below upstremed into the kernel: + a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64") + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index e0cf512..3c346f4 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -75,6 +75,9 @@ enum smca_bank_types { + N_SMCA_BANK_TYPES + }; + ++/* Maximum number of MCA banks per CPU. */ ++#define MAX_NR_BANKS 64 ++ + /* SMCA Extended error strings */ + /* Load Store */ + static const char * const smca_ls_mce_desc[] = { diff --git a/2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch b/2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb45db0c742249cc08ba267cfcb04b4decb4f5c0 --- /dev/null +++ b/2b37a26dcec389723f75d69d3da9c2f15f6c317d.patch @@ -0,0 +1,63 @@ +commit 2b37a26dcec389723f75d69d3da9c2f15f6c317d +Author: Mauro Carvalho Chehab +Date: Wed May 26 12:41:27 2021 +0200 + + ci.yml: Fix the job for it to run on a single arch + + There were some issues on the previous content. Fix them, in + order to allow it to build on a single architecture. + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml +index 5b3e757..747a844 100644 +--- a/.github/workflows/ci.yml ++++ b/.github/workflows/ci.yml +@@ -1,34 +1,23 @@ + name: CI + +-# Should run only on branches and PR, as "on_tag.yml" will handle tags + on: ++ workflow_dispatch: + push: +- branches: master test + pull_request: +- branches: master + + jobs: +- +-# +-# Linux +-# + Ubuntu: + name: Ubuntu +- runs-on: ubuntu-20.04 +- strategy: +- matrix: +- arch: [x64_64, aarch64, armv7, ppc64le] ++ runs-on: ubuntu-latest + steps: +- - uses: actions/checkout@v2 +- with: +- arch: ${{ matrix.arch }} +- - name: prepare +- run: | +- sudo apt-get update +- sudo apt-get install -y build-essential sqlite3 +- - name: build +- run: | +- autoreconf -vfi +- ./configure --enable-all +- make +- sudo make install ++ - uses: actions/checkout@v2 ++ - name: prepare ++ run: | ++ sudo apt-get update ++ sudo apt-get install -y build-essential sqlite3 ++ - name: build ++ run: | ++ autoreconf -vfi ++ ./configure --enable-all ++ make ++ sudo make install diff --git a/2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch b/2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2a9376e45ab2c678a1f2d09af5a423a06454b16 --- /dev/null +++ b/2b6a54b0d31e02e657171fd27f4e31d996756bc6.patch @@ -0,0 +1,44 @@ +commit 2b6a54b0d31e02e657171fd27f4e31d996756bc6 +Author: DmNosachev +Date: Thu Jul 22 10:25:38 2021 +0300 + + labels/supermicro: added Supermicro X10DRL, X11SPM + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 1e7761f..990fc9e 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -88,6 +88,16 @@ Vendor: Supermicro + P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1; + P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1; + P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; ++ ++ Model: X10DRL-i ++ P1-DIMMA1: 0.0.0; ++ P1-DIMMB1: 0.1.0; ++ P1-DIMMC1: 0.2.0; ++ P1-DIMMD1: 0.3.0; ++ P2-DIMME1: 1.0.0; ++ P2-DIMMF1: 1.1.0; ++ P2-DIMMG1: 1.2.0; ++ P2-DIMMH1: 1.3.0; + + Model: X11DDW-NT, X11DDW-L + P1-DIMMA1: 0.0.0; +@@ -102,6 +112,14 @@ Vendor: Supermicro + P2-DIMMD1: 3.0.0; + P2-DIMME1: 3.1.0; + P2-DIMMF1: 3.2.0; ++ ++ Model: X11SPM-F, X11SPM-TF, X11SPM-TPF ++ DIMMA1: 0.0.0; ++ DIMMB1: 0.1.0; ++ DIMMC1: 0.2.0; ++ DIMMD1: 1.0.0; ++ DIMME1: 1.1.0; ++ DIMMF1: 1.2.0; + + Model: B1DRi + P1_DIMMA1: 0.0.0; diff --git a/50565005b10fe909c66f1c90f2feb95712427c7d.patch b/50565005b10fe909c66f1c90f2feb95712427c7d.patch new file mode 100644 index 0000000000000000000000000000000000000000..dba01162fea30d9d446568226277d7eb5049e18e --- /dev/null +++ b/50565005b10fe909c66f1c90f2feb95712427c7d.patch @@ -0,0 +1,43 @@ +commit 50565005b10fe909c66f1c90f2feb95712427c7d +Author: DmNosachev +Date: Tue Jun 29 14:07:54 2021 +0300 + + labels/supermicro: added Supermicro X11DDW-NT(-L) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 86e4617..373de07 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -69,7 +69,7 @@ Vendor: Supermicro + P2_DIMM4B: 2.0.1; + P2_DIMM4B: 2.1.1; + +- Model: X11DPH-i ++ Model: X11DPH-i, X11DPH-T, X11DPH-TQ + P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; + P1-DIMMB1: 0.1.0; + P1-DIMMC1: 0.2.0; +@@ -91,4 +91,18 @@ Vendor: Supermicro + P2-DIMME1: 1.0.0; P2-DIMME2: 1.0.1; + P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1; + P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1; +- P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; +\ No newline at end of file ++ P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; ++ ++ Model: X11DDW-NT, X11DDW-L ++ P1-DIMMA1: 0.0.0; ++ P1-DIMMB1: 0.1.0; ++ P1-DIMMC1: 0.2.0; ++ P1-DIMMD1: 1.0.0; ++ P1-DIMME1: 1.1.0; ++ P1-DIMMF1: 1.2.0; ++ P2-DIMMA1: 2.0.0; ++ P2-DIMMB1: 2.1.0; ++ P2-DIMMC1: 2.2.0; ++ P2-DIMMD1: 3.0.0; ++ P2-DIMME1: 3.1.0; ++ P2-DIMMF1: 3.2.0; +\ No newline at end of file diff --git a/6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch b/6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d3bd32a232bb3e84e361a2f8d46921f509dbc57 --- /dev/null +++ b/6bc43db1b6b3d73805179c21d1dd5521e8dc0f74.patch @@ -0,0 +1,37 @@ +commit 6bc43db1b6b3d73805179c21d1dd5521e8dc0f74 +Author: DmNosachev +Date: Fri Jul 2 13:13:46 2021 +0300 + + labels/supermicro: added Supermicro X11SCA(-F) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index b924a32..1e7761f 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -10,11 +10,7 @@ + # + + Vendor: Supermicro +- Model: A2SDi-8C-HLN4F +- DIMMA1: 0.0.0; DIMMA2: 0.0.1; +- DIMMB1: 0.1.0; DIMMB2: 0.1.1; +- +- Model: A2SDi-8C+-HLN4F ++ Model: A2SDi-8C-HLN4F, A2SDi-8C+-HLN4F + DIMMA1: 0.0.0; DIMMA2: 0.0.1; + DIMMB1: 0.1.0; DIMMB2: 0.1.1; + +@@ -115,4 +111,8 @@ Vendor: Supermicro + P2_DIMME1: 1.0.0; + P2_DIMMF1: 1.1.0; + P2_DIMMG1: 1.2.0; +- P2_DIMMH1: 1.3.0; +\ No newline at end of file ++ P2_DIMMH1: 1.3.0; ++ ++ Model: X11SCA, X11SCA-F ++ DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0; ++ DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; +\ No newline at end of file diff --git a/738bafafdcb2e8b0ced32fff31b13754d571090b.patch b/738bafafdcb2e8b0ced32fff31b13754d571090b.patch new file mode 100644 index 0000000000000000000000000000000000000000..a3ba3248032d547870d5de44f6d0467fe92e6efc --- /dev/null +++ b/738bafafdcb2e8b0ced32fff31b13754d571090b.patch @@ -0,0 +1,610 @@ +commit 738bafafdcb2e8b0ced32fff31b13754d571090b +Author: Jason Tian +Date: Fri May 28 11:35:43 2021 +0800 + + Add error handling for Ampere-specific errors. + + Save Ampere-specific errors' decode into sqlite3 data + base and log PCIe segment, bus/device/function number + into BMC SEL. + + Signed-off-by: Jason Tian + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/non-standard-ampere.c b/non-standard-ampere.c +index 8cceb26..05b5252 100644 +--- a/non-standard-ampere.c ++++ b/non-standard-ampere.c +@@ -216,6 +216,13 @@ static const char * const err_bert_sub_type[] = { + "PMPRO Fatal", + }; + ++static char *sqlite3_table_list[] = { ++ "amp_payload0_event_tab", ++ "amp_payload1_event_tab", ++ "amp_payload2_event_tab", ++ "amp_payload3_event_tab", ++}; ++ + struct amp_ras_type_info { + int id; + const char *name; +@@ -352,6 +359,359 @@ static const char *oem_subtype_name(const struct amp_ras_type_info *info, + return "unknown"; + } + ++#ifdef HAVE_SQLITE3 ++/*key pair definition for ampere specific error payload type 0*/ ++static const struct db_fields amp_payload0_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "status_reg", .type = "INTEGER" }, ++ { .name = "addr_reg", .type = "INTEGER" }, ++ { .name = "misc0", .type = "INTEGER" }, ++ { .name = "misc1", .type = "INTEGER" }, ++ { .name = "misc2", .type = "INTEGER" }, ++ { .name = "misc3", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload0_event_tab = { ++ .name = "amp_payload0_event", ++ .fields = amp_payload0_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload0_event_fields), ++}; ++ ++/*key pair definition for ampere specific error payload type 1*/ ++static const struct db_fields amp_payload1_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "uncore_err_status", .type = "INTEGER" }, ++ { .name = "uncore_err_mask", .type = "INTEGER" }, ++ { .name = "uncore_err_sev", .type = "INTEGER" }, ++ { .name = "core_err_status", .type = "INTEGER" }, ++ { .name = "core_err_mask", .type = "INTEGER" }, ++ { .name = "root_err_cmd", .type = "INTEGER" }, ++ { .name = "root_err_status", .type = "INTEGER" }, ++ { .name = "src_id", .type = "INTEGER" }, ++ { .name = "reserved1", .type = "INTEGER" }, ++ { .name = "reserverd2", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload1_event_tab = { ++ .name = "amp_payload1_event", ++ .fields = amp_payload1_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload1_event_fields), ++}; ++ ++/*key pair definition for ampere specific error payload type 2*/ ++static const struct db_fields amp_payload2_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "ce_report_reg", .type = "INTEGER" }, ++ { .name = "ce_location", .type = "INTEGER" }, ++ { .name = "ce_addr", .type = "INTEGER" }, ++ { .name = "ue_report_reg", .type = "INTEGER" }, ++ { .name = "ue_location", .type = "INTEGER" }, ++ { .name = "ue_addr", .type = "INTEGER" }, ++ { .name = "reserved1", .type = "INTEGER" }, ++ { .name = "reserved2", .type = "INTEGER" }, ++ { .name = "reserved2", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload2_event_tab = { ++ .name = "amp_payload2_event", ++ .fields = amp_payload2_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload2_event_fields), ++}; ++ ++/*key pair definition for ampere specific error payload type 3*/ ++static const struct db_fields amp_payload3_event_fields[] = { ++ { .name = "id", .type = "INTEGER PRIMARY KEY" }, ++ { .name = "timestamp", .type = "TEXT" }, ++ { .name = "type", .type = "TEXT" }, ++ { .name = "subtype", .type = "TEXT" }, ++ { .name = "instance", .type = "INTEGER" }, ++ { .name = "socket_num", .type = "INTEGER" }, ++ { .name = "fw_spec_data0", .type = "INTEGER" }, ++ { .name = "fw_spec_data1", .type = "INTEGER" }, ++ { .name = "fw_spec_data2", .type = "INTEGER" }, ++ { .name = "fw_spec_data3", .type = "INTEGER" }, ++ { .name = "fw_spec_data4", .type = "INTEGER" }, ++ { .name = "fw_spec_data5", .type = "INTEGER" }, ++}; ++ ++static const struct db_table_descriptor amp_payload3_event_tab = { ++ .name = "amp_payload3_event", ++ .fields = amp_payload3_event_fields, ++ .num_fields = ARRAY_SIZE(amp_payload3_event_fields), ++}; ++ ++/*Save data with different type into sqlite3 db*/ ++static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder, ++ enum amp_oem_data_type data_type, ++ int id, int64_t data, const char *text) ++{ ++ switch (data_type) { ++ case AMP_OEM_DATA_TYPE_INT: ++ sqlite3_bind_int(ev_decoder->stmt_dec_record, id, data); ++ break; ++ case AMP_OEM_DATA_TYPE_INT64: ++ sqlite3_bind_int64(ev_decoder->stmt_dec_record, id, data); ++ break; ++ case AMP_OEM_DATA_TYPE_TEXT: ++ sqlite3_bind_text(ev_decoder->stmt_dec_record, id, ++ text, -1, NULL); ++ break; ++ default: ++ break; ++ } ++} ++ ++static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, ++ const char *name) ++{ ++ int rc; ++ ++ rc = sqlite3_step(ev_decoder->stmt_dec_record); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to do %s step on sqlite: error = %d\n", name, rc); ++ ++ rc = sqlite3_reset(ev_decoder->stmt_dec_record); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to reset %s on sqlite: error = %d\n", name, rc); ++ ++ rc = sqlite3_clear_bindings(ev_decoder->stmt_dec_record); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to clear bindings %s on sqlite: error = %d\n", ++ name, rc); ++ ++ return rc; ++} ++ ++/*save all Ampere Specific Error Payload type 0 to sqlite3 database*/ ++static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload0_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD0_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD0_FIELD_SUB_TYPE, 0, subtype_str); ++ ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD0_FIELD_INS, INSTANCE(err->instance), NULL); ++ ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD0_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD0_FIELD_STATUS_REG, err->err_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_ADDR_REG, ++ err->err_addr, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC0, ++ err->err_misc_0, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC1, ++ err->err_misc_1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC2, ++ err->err_misc_2, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD0_FIELD_MISC3, ++ err->err_misc_3, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload0_event_tab"); ++ } ++} ++ ++/*save all Ampere Specific Error Payload type 1 to sqlite3 database*/ ++static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload1_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD1_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD1_FIELD_SUB_TYPE, 0, subtype_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_INS, ++ INSTANCE(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_STATUS, ++ err->uncore_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_MASK, ++ err->uncore_mask, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_SEV, ++ err->uncore_sev, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_STATUS, ++ err->core_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_MASK, ++ err->core_mask, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_CMD, ++ err->root_err_cmd, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_STATUS, ++ err->root_status, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_SRC_ID, ++ err->src_id, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD1_FIELD_RESERVED1, ++ err->reserved1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD1_FIELD_RESERVED2, ++ err->reserved2, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload1_event_tab"); ++ } ++} ++ ++/*save all Ampere Specific Error Payload type 2 to sqlite3 database*/ ++static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload2_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD2_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD2_FIELD_SUB_TYPE, 0, subtype_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_INS, INSTANCE(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_CE_REPORT_REG, ++ err->ce_register, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_CE_LOACATION, ++ err->ce_location, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_CE_ADDR, ++ err->ce_addr, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_UE_REPORT_REG, ++ err->ue_register, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_UE_LOCATION, ++ err->ue_location, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_UE_ADDR, ++ err->ue_addr, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD2_FIELD_RESERVED1, ++ err->reserved1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD2_FIELD_RESERVED2, ++ err->reserved2, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD2_FIELD_RESERVED3, ++ err->reserved3, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload2_event_tab"); ++ } ++} ++ ++/*save all Ampere Specific Error Payload type 3 to sqlite3 database*/ ++static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload3_type_sec *err) ++{ ++ if (ev_decoder != NULL) { ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD3_FIELD_TYPE, 0, type_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ AMP_PAYLOAD3_FIELD_SUB_TYPE, 0, subtype_str); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD3_FIELD_INS, INSTANCE(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD3_FIELD_SOCKET_NUM, ++ SOCKET_NUM(err->instance), NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0, ++ err->fw_speci_data0, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1, ++ err->fw_speci_data1, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2, ++ err->fw_speci_data2, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3, ++ err->fw_speci_data3, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4, ++ err->fw_speci_data4, NULL); ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_INT64, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5, ++ err->fw_speci_data5, NULL); ++ store_amp_err_data(ev_decoder, "amp_payload3_event_tab"); ++ } ++} ++ ++#else ++static void record_amp_data(struct ras_ns_ev_decoder *ev_decoder, ++ enum amp_oem_data_type data_type, ++ int id, int64_t data, const char *text) ++{ ++ return 0; ++} ++ ++static void record_amp_payload0_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload0_type_sec *err) ++{ ++ return 0; ++} ++ ++static void record_amp_payload1_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload1_type_sec *err) ++{ ++ return 0; ++} ++ ++static void record_amp_payload2_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload2_type_sec *err) ++{ ++ return 0; ++} ++ ++static void record_amp_payload3_err(struct ras_ns_ev_decoder *ev_decoder, ++ const char *type_str, const char *subtype_str, ++ const struct amp_payload3_type_sec *err) ++{ ++ return 0; ++} ++ ++static int store_amp_err_data(struct ras_ns_ev_decoder *ev_decoder, char *name) ++{ ++ return 0; ++} ++#endif + + /*decode ampere specific error payload type 0, the CPU's data is save*/ + /*to sqlite by ras-arm-handler, others are saved by this function.*/ +@@ -434,6 +794,7 @@ void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload0_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -517,6 +878,7 @@ static void decode_amp_payload1_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload1_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -601,6 +963,7 @@ static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload2_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -673,6 +1036,7 @@ static void decode_amp_payload3_err_regs(struct ras_ns_ev_decoder *ev_decoder, + *p = '\0'; + } + ++ record_amp_payload3_err(ev_decoder, type_str, subtype_str, err); + i = 0; + p = NULL; + end = NULL; +@@ -687,6 +1051,38 @@ static int decode_amp_oem_type_error(struct ras_events *ras, + { + int payload_type = PAYLOAD_TYPE(event->error[0]); + ++#ifdef HAVE_SQLITE3 ++ struct db_table_descriptor db_tab; ++ int id = 0; ++ ++ if (payload_type == PAYLOAD_TYPE_0) { ++ db_tab = amp_payload0_event_tab; ++ id = AMP_PAYLOAD0_FIELD_TIMESTAMP; ++ } else if (payload_type == PAYLOAD_TYPE_1) { ++ db_tab = amp_payload1_event_tab; ++ id = AMP_PAYLOAD1_FIELD_TIMESTAMP; ++ } else if (payload_type == PAYLOAD_TYPE_2) { ++ db_tab = amp_payload2_event_tab; ++ id = AMP_PAYLOAD2_FIELD_TIMESTAMP; ++ } else if (payload_type == PAYLOAD_TYPE_3) { ++ db_tab = amp_payload3_event_tab; ++ id = AMP_PAYLOAD3_FIELD_TIMESTAMP; ++ } else ++ return -1; ++ ++ if (!ev_decoder->stmt_dec_record) { ++ if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, ++ &db_tab) != SQLITE_OK) { ++ trace_seq_printf(s, ++ "create sql %s fail\n", ++ sqlite3_table_list[payload_type]); ++ return -1; ++ } ++ } ++ record_amp_data(ev_decoder, AMP_OEM_DATA_TYPE_TEXT, ++ id, 0, event->timestamp); ++#endif ++ + if (payload_type == PAYLOAD_TYPE_0) { + const struct amp_payload0_type_sec *err = + (struct amp_payload0_type_sec *)event->error; +diff --git a/non-standard-ampere.h b/non-standard-ampere.h +index aacf3a8..f463c53 100644 +--- a/non-standard-ampere.h ++++ b/non-standard-ampere.h +@@ -102,6 +102,79 @@ struct amp_payload3_type_sec { + uint64_t fw_speci_data5; + }; + ++enum amp_oem_data_type { ++ AMP_OEM_DATA_TYPE_INT, ++ AMP_OEM_DATA_TYPE_INT64, ++ AMP_OEM_DATA_TYPE_TEXT, ++}; ++ ++enum { ++ AMP_PAYLOAD0_FIELD_ID, ++ AMP_PAYLOAD0_FIELD_TIMESTAMP, ++ AMP_PAYLOAD0_FIELD_TYPE, ++ AMP_PAYLOAD0_FIELD_SUB_TYPE, ++ AMP_PAYLOAD0_FIELD_INS, ++ AMP_PAYLOAD0_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD0_FIELD_STATUS_REG, ++ AMP_PAYLOAD0_FIELD_ADDR_REG, ++ AMP_PAYLOAD0_FIELD_MISC0, ++ AMP_PAYLOAD0_FIELD_MISC1, ++ AMP_PAYLOAD0_FIELD_MISC2, ++ AMP_PAYLOAD0_FIELD_MISC3, ++}; ++ ++enum { ++ AMP_PAYLOAD1_FIELD_ID, ++ AMP_PAYLOAD1_FIELD_TIMESTAMP, ++ AMP_PAYLOAD1_FIELD_TYPE, ++ AMP_PAYLOAD1_FIELD_SUB_TYPE, ++ AMP_PAYLOAD1_FIELD_INS, ++ AMP_PAYLOAD1_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_STATUS, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_MASK, ++ AMP_PAYLOAD1_FIELD_UNCORE_ERR_SEV, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_STATUS, ++ AMP_PAYLOAD1_FIELD_CORE_ERR_MASK, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_CMD, ++ AMP_PAYLOAD1_FIELD_ROOT_ERR_STATUS, ++ AMP_PAYLOAD1_FIELD_SRC_ID, ++ AMP_PAYLOAD1_FIELD_RESERVED1, ++ AMP_PAYLOAD1_FIELD_RESERVED2, ++}; ++ ++enum { ++ AMP_PAYLOAD2_FIELD_ID, ++ AMP_PAYLOAD2_FIELD_TIMESTAMP, ++ AMP_PAYLOAD2_FIELD_TYPE, ++ AMP_PAYLOAD2_FIELD_SUB_TYPE, ++ AMP_PAYLOAD2_FIELD_INS, ++ AMP_PAYLOAD2_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD2_FIELD_CE_REPORT_REG, ++ AMP_PAYLOAD2_FIELD_CE_LOACATION, ++ AMP_PAYLOAD2_FIELD_CE_ADDR, ++ AMP_PAYLOAD2_FIELD_UE_REPORT_REG, ++ AMP_PAYLOAD2_FIELD_UE_LOCATION, ++ AMP_PAYLOAD2_FIELD_UE_ADDR, ++ AMP_PAYLOAD2_FIELD_RESERVED1, ++ AMP_PAYLOAD2_FIELD_RESERVED2, ++ AMP_PAYLOAD2_FIELD_RESERVED3, ++}; ++ ++enum { ++ AMP_PAYLOAD3_FIELD_ID, ++ AMP_PAYLOAD3_FIELD_TIMESTAMP, ++ AMP_PAYLOAD3_FIELD_TYPE, ++ AMP_PAYLOAD3_FIELD_SUB_TYPE, ++ AMP_PAYLOAD3_FIELD_INS, ++ AMP_PAYLOAD3_FIELD_SOCKET_NUM, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA0, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA1, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA2, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA3, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA4, ++ AMP_PAYLOAD3_FIELD_FW_SPEC_DATA5 ++}; ++ + void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload0_type_sec *err); +diff --git a/ras-aer-handler.c b/ras-aer-handler.c +index 8ddd439..6f4cb2b 100644 +--- a/ras-aer-handler.c ++++ b/ras-aer-handler.c +@@ -67,6 +67,9 @@ int ras_aer_event_handler(struct trace_seq *s, + struct tm *tm; + struct ras_aer_event ev; + char buf[BUF_LEN]; ++ char ipmi_add_sel[105]; ++ uint8_t sel_data[5]; ++ int seg, bus, dev, fn; + + /* + * Newer kernels (3.10-rc1 or upper) provide an uptime clock. +@@ -129,15 +132,19 @@ int ras_aer_event_handler(struct trace_seq *s, + switch (severity_val) { + case HW_EVENT_AER_UNCORRECTED_NON_FATAL: + ev.error_type = "Uncorrected (Non-Fatal)"; ++ sel_data[0] = 0xca; + break; + case HW_EVENT_AER_UNCORRECTED_FATAL: + ev.error_type = "Uncorrected (Fatal)"; ++ sel_data[0] = 0xca; + break; + case HW_EVENT_AER_CORRECTED: + ev.error_type = "Corrected"; ++ sel_data[0] = 0xbf; + break; + default: + ev.error_type = "Unknown severity"; ++ sel_data[0] = 0xbf; + } + trace_seq_puts(s, ev.error_type); + +@@ -151,5 +158,29 @@ int ras_aer_event_handler(struct trace_seq *s, + ras_report_aer_event(ras, &ev); + #endif + ++#ifdef HAVE_AMP_NS_DECODE ++ /* ++ * Get PCIe AER error source seg/bus/dev/fn and save it into ++ * BMC OEM SEL, ipmitool raw 0x0a 0x44 is IPMI command-Add SEL ++ * entry, please refer IPMI specificaiton chapter 31.6. 0xcd3a ++ * is manufactuer ID(ampere),byte 12 is sensor num(CE is 0xBF, ++ * UE is 0xCA), byte 13~14 is segment number, byte 15 is bus ++ * number, byte 16[7:3] is device number, byte 16[2:0] is ++ * function number ++ */ ++ sscanf(ev.dev_name, "%x:%x:%x.%x", &seg, &bus, &dev, &fn); ++ ++ sel_data[1] = seg & 0xff; ++ sel_data[2] = (seg & 0xff00) >> 8; ++ sel_data[3] = bus; ++ sel_data[4] = (((dev & 0x1f) << 3) | (fn & 0x7)); ++ ++ sprintf(ipmi_add_sel, ++ "ipmitool raw 0x0a 0x44 0x00 0x00 0xc0 0x00 0x00 0x00 0x00 0x3a 0xcd 0x00 0xc0 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x", ++ sel_data[0], sel_data[1], sel_data[2], sel_data[3], sel_data[4]); ++ ++ system(ipmi_add_sel); ++#endif ++ + return 0; + } diff --git a/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch b/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch new file mode 100644 index 0000000000000000000000000000000000000000..76afc8e061f00d6f0966076e1afde5dc5717c959 --- /dev/null +++ b/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch @@ -0,0 +1,24 @@ +commit 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4 +Author: Muralidhara M K +Date: Wed Jul 28 01:52:12 2021 -0500 + + rasdaemon: Support MCE for AMD CPU family 19h + + Add support for family 19h x86 CPUs from AMD. + + Signed-off-by: Muralidhara M K + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-mce-handler.c b/ras-mce-handler.c +index 805004a..f2b53d4 100644 +--- a/ras-mce-handler.c ++++ b/ras-mce-handler.c +@@ -208,7 +208,7 @@ static int detect_cpu(struct ras_events *ras) + mce->cputype = CPU_AMD_SMCA; + goto ret; + } +- if (mce->family > 23) { ++ if (mce->family > 25) { + log(ALL, LOG_INFO, + "Can't parse MCE for this AMD CPU yet %d\n", + mce->family); diff --git a/7ccf12f5ae26a055926d175d908c7930293438c4.patch b/7ccf12f5ae26a055926d175d908c7930293438c4.patch new file mode 100644 index 0000000000000000000000000000000000000000..5a7a860ec3f654ebb980a963ca1e14c00b14e89b --- /dev/null +++ b/7ccf12f5ae26a055926d175d908c7930293438c4.patch @@ -0,0 +1,26 @@ +commit 7ccf12f5ae26a055926d175d908c7930293438c4 +Author: DmNosachev +Date: Fri Jul 23 17:28:33 2021 +0300 + + labels/supermicro: added Supermicro X11SCW + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 990fc9e..aea7c3c 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -133,4 +133,10 @@ Vendor: Supermicro + + Model: X11SCA, X11SCA-F + DIMMA1: 0.0.0, 0.1.0; DIMMA2: 0.2.0, 0.3.0; +- DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; +\ No newline at end of file ++ DIMMB1: 0.0.1, 0.1.1; DIMMB2: 0.2.1, 0.3.1; ++ ++ Model: X11SCW-F ++ DIMMA1: 0.1.0; ++ DIMMA2: 0.0.0; ++ DIMMB1: 0.1.1; ++ DIMMB2: 0.0.1; +\ No newline at end of file diff --git a/9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b.patch b/9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b.patch new file mode 100644 index 0000000000000000000000000000000000000000..5267fc8d11c9eec9e3f4a3ebdc76e8c3861475cf --- /dev/null +++ b/9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b.patch @@ -0,0 +1,51 @@ +commit 9415b7449c70f5ea4a0209ddb89c2f5f392d3b4b +Author: Muralidhara M K +Date: Tue Jul 27 06:36:45 2021 -0500 + + rasdaemon: ras-mc-ctl: Fix script to parse dimm sizes + + Removes trailing spaces at the end of a line from + file location and fixes --layout option to parse dimm nodes + to get the size of each dimm from ras-mc-ctl. + + Issue is reported https://github.com/mchehab/rasdaemon/issues/43 + Where '> ras-mc-ctl --layout' reports all 0s + + With this change the layout option prints the correct dimm sizes + > sudo ras-mc-ctl --layout + +-----------------------------------------------+ + | mc0 | + | csrow0 | csrow1 | csrow2 | csrow3 | + ----------+-----------------------------------------------+ + ... + channel7: | 16384 MB | 0 MB | 0 MB | 0 MB | + channel6: | 16384 MB | 0 MB | 0 MB | 0 MB | + ... + ----------+-----------------------------------------------+ + + Signed-off-by: Muralidhara M K + Signed-off-by: Naveen Krishna Chatradhi + Cc: Yazen Ghannam + Signed-off-by: Mauro Carvalho Chehab + Link: https://lkml.kernel.org/r/20210810183855.129076-1-nchatrad@amd.com/ + +diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in +index 1e3aeb7..b22dd60 100755 +--- a/util/ras-mc-ctl.in ++++ b/util/ras-mc-ctl.in +@@ -246,6 +246,7 @@ sub parse_dimm_nodes + if (($file =~ /max_location$/)) { + open IN, $file; + my $location = ; ++ $location =~ s/\s+$//; + close IN; + my @temp = split(/ /, $location); + +@@ -288,6 +289,7 @@ sub parse_dimm_nodes + + open IN, $file; + my $location = ; ++ $location =~ s/\s+$//; + close IN; + + my @pos; diff --git a/9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch b/9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch new file mode 100644 index 0000000000000000000000000000000000000000..1a221ea0a362ba29dd14fded727c6a9e52d6dab1 --- /dev/null +++ b/9a5baed97b21af31064d9995ffcfaac0e9d7983e.patch @@ -0,0 +1,40 @@ +commit 9a5baed97b21af31064d9995ffcfaac0e9d7983e +Author: DmNosachev +Date: Tue Jun 29 13:37:48 2021 +0300 + + labels/supermicro: supermicro db syntax + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index bfaed93..47ea05f 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -18,17 +18,17 @@ Vendor: Supermicro + DIMMA1: 0.0.0; DIMMA2: 0.0.1; + DIMMB1: 0.1.0; DIMMB2: 0.1.1; + +- Product: X10SRA-F +- DIMMA1: 0.0.0 +- DIMMA2: 0.0.1 +- DIMMB1: 0.1.0 +- DIMMB2: 0.1.1 +- DIMMC1: 1.0.0 +- DIMMC2: 1.0.1 +- DIMMD1: 1.1.0 +- DIMMD2: 1.1.1 ++ Model: X10SRA-F ++ DIMMA1: 0.0.0; ++ DIMMA2: 0.0.1; ++ DIMMB1: 0.1.0; ++ DIMMB2: 0.1.1; ++ DIMMC1: 1.0.0; ++ DIMMC2: 1.0.1; ++ DIMMD1: 1.1.0; ++ DIMMD2: 1.1.1; + +- Product: H8DGU ++ Model: H8DGU + P1_DIMM1A: 0.2.0; + P1_DIMM1A: 0.3.0; + P2_DIMM1A: 3.2.0; diff --git a/9acef39f13833f7d53ef96abc5a72e79384260f4.patch b/9acef39f13833f7d53ef96abc5a72e79384260f4.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4c8af11658bebc7ff91bd6a5ded12bf9c2d4e41 --- /dev/null +++ b/9acef39f13833f7d53ef96abc5a72e79384260f4.patch @@ -0,0 +1,230 @@ +commit 9acef39f13833f7d53ef96abc5a72e79384260f4 +Author: Naveen Krishna Chatradhi +Date: Tue Jun 1 11:01:17 2021 +0530 + + rasdaemon: Add new SMCA bank types with error decoding + + Upcoming systems with Scalable Machine Check Architecture (SMCA) have + new MCA banks added. + + This patch adds the (HWID, MCATYPE) tuple, name and error decoding for + those new SMCA banks. + While at it, optimize the string names in smca_bank_name[]. + + Signed-off-by: Muralidhara M K + Signed-off-by: Naveen Krishna Chatradhi + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 7c619fd..e0cf512 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -47,7 +47,7 @@ + /* These may be used by multiple smca_hwid_mcatypes */ + enum smca_bank_types { + SMCA_LS = 0, /* Load Store */ +- SMCA_LS_V2, /* Load Store */ ++ SMCA_LS_V2, + SMCA_IF, /* Instruction Fetch */ + SMCA_L2_CACHE, /* L2 Cache */ + SMCA_DE, /* Decoder Unit */ +@@ -56,17 +56,22 @@ enum smca_bank_types { + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ +- SMCA_CS_V2, /* Coherent Slave V2 */ ++ SMCA_CS_V2, + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ ++ SMCA_UMC_V2, + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ +- SMCA_PSP_V2, /* Platform Security Processor V2 */ ++ SMCA_PSP_V2, + SMCA_SMU, /* System Management Unit */ +- SMCA_SMU_V2, /* System Management Unit V2 */ ++ SMCA_SMU_V2, + SMCA_MP5, /* Microprocessor 5 Unit */ + SMCA_NBIO, /* Northbridge IO Unit */ + SMCA_PCIE, /* PCI Express Unit */ ++ SMCA_PCIE_V2, ++ SMCA_XGMI_PCS, /* xGMI PCS Unit */ ++ SMCA_XGMI_PHY, /* xGMI PHY Unit */ ++ SMCA_WAFL_PHY, /* WAFL PHY Unit */ + N_SMCA_BANK_TYPES + }; + +@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = { + "Command/address parity error", + "Write data CRC error", + }; ++ ++static const char * const smca_umc2_mce_desc[] = { ++ "DRAM ECC error", ++ "Data poison error", ++ "SDP parity error", ++ "Reserved", ++ "Address/Command parity error", ++ "Write data parity error", ++ "DCQ SRAM ECC error", ++ "Reserved", ++ "Read data parity error", ++ "Rdb SRAM ECC error", ++ "RdRsp SRAM ECC error", ++ "LM32 MP errors", ++}; ++ + /* Parameter Block */ + static const char * const smca_pb_mce_desc[] = { + "Parameter Block RAM ECC error", +@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = { + "CCIX Non-okay write response with data error", + }; + ++static const char * const smca_pcie2_mce_desc[] = { ++ "SDP Parity Error logging", ++}; ++ ++static const char * const smca_xgmipcs_mce_desc[] = { ++ "Data Loss Error", ++ "Training Error", ++ "Flow Control Acknowledge Error", ++ "Rx Fifo Underflow Error", ++ "Rx Fifo Overflow Error", ++ "CRC Error", ++ "BER Exceeded Error", ++ "Tx Vcid Data Error", ++ "Replay Buffer Parity Error", ++ "Data Parity Error", ++ "Replay Fifo Overflow Error", ++ "Replay Fifo Underflow Error", ++ "Elastic Fifo Overflow Error", ++ "Deskew Error", ++ "Flow Control CRC Error", ++ "Data Startup Limit Error", ++ "FC Init Timeout Error", ++ "Recovery Timeout Error", ++ "Ready Serial Timeout Error", ++ "Ready Serial Attempt Error", ++ "Recovery Attempt Error", ++ "Recovery Relock Attempt Error", ++ "Replay Attempt Error", ++ "Sync Header Error", ++ "Tx Replay Timeout Error", ++ "Rx Replay Timeout Error", ++ "LinkSub Tx Timeout Error", ++ "LinkSub Rx Timeout Error", ++ "Rx CMD Pocket Error", ++}; ++ ++static const char * const smca_xgmiphy_mce_desc[] = { ++ "RAM ECC Error", ++ "ARC instruction buffer parity error", ++ "ARC data buffer parity error", ++ "PHY APB error", ++}; ++ ++static const char * const smca_waflphy_mce_desc[] = { ++ "RAM ECC Error", ++ "ARC instruction buffer parity error", ++ "ARC data buffer parity error", ++ "PHY APB error", ++}; + + struct smca_mce_desc { + const char * const *descs; +@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, ++ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, + [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, + [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, +@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, + [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, + [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, ++ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, ++ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, ++ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, ++ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, + }; + + struct smca_hwid { +@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* Unified Memory Controller MCA type */ + { SMCA_UMC, 0x00000096 }, ++ /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ ++ { SMCA_UMC_V2, 0x00010096 }, + + /* Parameter Block MCA type */ + { SMCA_PB, 0x00000005 }, +@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* PCI Express Unit MCA type */ + { SMCA_PCIE, 0x00000046 }, ++ { SMCA_PCIE_V2, 0x00010046 }, ++ ++ /* Ext Global Memory Interconnect PCS MCA type */ ++ { SMCA_XGMI_PCS, 0x00000050 }, ++ ++ /* Ext Global Memory Interconnect PHY MCA type */ ++ { SMCA_XGMI_PHY, 0x00000259 }, ++ ++ /* WAFL PHY MCA type */ ++ { SMCA_WAFL_PHY, 0x00000267 }, + }; + + struct smca_bank_name { +@@ -396,27 +483,28 @@ struct smca_bank_name { + }; + + static struct smca_bank_name smca_names[] = { +- [SMCA_LS] = { "Load Store Unit" }, +- [SMCA_LS_V2] = { "Load Store Unit" }, +- [SMCA_IF] = { "Instruction Fetch Unit" }, +- [SMCA_L2_CACHE] = { "L2 Cache" }, +- [SMCA_DE] = { "Decode Unit" }, +- [SMCA_RESERVED] = { "Reserved" }, +- [SMCA_EX] = { "Execution Unit" }, +- [SMCA_FP] = { "Floating Point Unit" }, +- [SMCA_L3_CACHE] = { "L3 Cache" }, +- [SMCA_CS] = { "Coherent Slave" }, +- [SMCA_CS_V2] = { "Coherent Slave" }, +- [SMCA_PIE] = { "Power, Interrupts, etc." }, +- [SMCA_UMC] = { "Unified Memory Controller" }, +- [SMCA_PB] = { "Parameter Block" }, +- [SMCA_PSP] = { "Platform Security Processor" }, +- [SMCA_PSP_V2] = { "Platform Security Processor" }, +- [SMCA_SMU] = { "System Management Unit" }, +- [SMCA_SMU_V2] = { "System Management Unit" }, +- [SMCA_MP5] = { "Microprocessor 5 Unit" }, +- [SMCA_NBIO] = { "Northbridge IO Unit" }, +- [SMCA_PCIE] = { "PCI Express Unit" }, ++ [SMCA_LS ... SMCA_LS_V2] = { "Load Store Unit" }, ++ [SMCA_IF] = { "Instruction Fetch Unit" }, ++ [SMCA_L2_CACHE] = { "L2 Cache" }, ++ [SMCA_DE] = { "Decode Unit" }, ++ [SMCA_RESERVED] = { "Reserved" }, ++ [SMCA_EX] = { "Execution Unit" }, ++ [SMCA_FP] = { "Floating Point Unit" }, ++ [SMCA_L3_CACHE] = { "L3 Cache" }, ++ [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" }, ++ [SMCA_PIE] = { "Power, Interrupts, etc." }, ++ [SMCA_UMC] = { "Unified Memory Controller" }, ++ [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, ++ [SMCA_PB] = { "Parameter Block" }, ++ [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, ++ [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, ++ [SMCA_MP5] = { "Microprocessor 5 Unit" }, ++ [SMCA_NBIO] = { "Northbridge IO Unit" }, ++ [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" }, ++ [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" }, ++ [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" }, ++ [SMCA_WAFL_PHY] = { "WAFL PHY Unit" }, ++ + }; + + static void amd_decode_errcode(struct mce_event *e) diff --git a/aecf33aa70331670c06db6b652712b476e24051c.patch b/aecf33aa70331670c06db6b652712b476e24051c.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd557eccce613c47d085014b187d53b9b39b788d --- /dev/null +++ b/aecf33aa70331670c06db6b652712b476e24051c.patch @@ -0,0 +1,107 @@ +commit aecf33aa70331670c06db6b652712b476e24051c +Author: Muralidhara M K +Date: Mon Jul 12 05:40:46 2021 -0500 + + rasdaemon: Enumerate memory on noncpu nodes + + On newer heterogeneous systems from AMD with GPU nodes (with HBM2 memory + banks) connected via xGMI links to the CPUs. + + The node id information is available in the InstanceHI[47:44] of + the IPID register. + + The UMC Phys on Aldeberan nodes are enumerated as csrow + The UMC channels connected to HBMs are enumerated as ranks. + + Signed-off-by: Muralidhara M K + Signed-off-by: Naveen Krishna Chatradhi + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 3c346f4..f3379fc 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -78,6 +78,12 @@ enum smca_bank_types { + /* Maximum number of MCA banks per CPU. */ + #define MAX_NR_BANKS 64 + ++/* ++ * On Newer heterogeneous systems from AMD with CPU and GPU nodes connected ++ * via xGMI links, the NON CPU Nodes are enumerated from index 8 ++ */ ++#define NONCPU_NODE_INDEX 8 ++ + /* SMCA Extended error strings */ + /* Load Store */ + static const char * const smca_ls_mce_desc[] = { +@@ -531,6 +537,26 @@ static int find_umc_channel(struct mce_event *e) + { + return EXTRACT(e->ipid, 0, 31) >> 20; + } ++ ++/* ++ * The HBM memory managed by the UMCCH of the noncpu node ++ * can be calculated based on the [15:12]bits of IPID ++ */ ++static int find_hbm_channel(struct mce_event *e) ++{ ++ int umc, tmp; ++ ++ umc = EXTRACT(e->ipid, 0, 31) >> 20; ++ ++ /* ++ * The HBM channel managed by the UMC of the noncpu node ++ * can be calculated based on the [15:12]bits of IPID as follows ++ */ ++ tmp = ((e->ipid >> 12) & 0xf); ++ ++ return (umc % 2) ? tmp + 4 : tmp; ++} ++ + /* Decode extended errors according to Scalable MCA specification */ + static void decode_smca_error(struct mce_event *e) + { +@@ -539,6 +565,7 @@ static void decode_smca_error(struct mce_event *e) + unsigned short xec = (e->status >> 16) & 0x3f; + const struct smca_hwid *s_hwid; + uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63); ++ uint8_t mcatype_instancehi = EXTRACT(e->ipid, 44, 47); + unsigned int csrow = -1, channel = -1; + unsigned int i; + +@@ -548,14 +575,16 @@ static void decode_smca_error(struct mce_event *e) + bank_type = s_hwid->bank_type; + break; + } ++ if (mcatype_instancehi >= NONCPU_NODE_INDEX) ++ bank_type = SMCA_UMC_V2; + } + +- if (i >= ARRAY_SIZE(smca_hwid_mcatypes)) { ++ if (i >= MAX_NR_BANKS) { + strcpy(e->mcastatus_msg, "Couldn't find bank type with IPID"); + return; + } + +- if (bank_type >= N_SMCA_BANK_TYPES) { ++ if (bank_type >= MAX_NR_BANKS) { + strcpy(e->mcastatus_msg, "Don't know how to decode this bank"); + return; + } +@@ -580,6 +609,16 @@ static void decode_smca_error(struct mce_event *e) + mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", + channel, csrow); + } ++ ++ if (bank_type == SMCA_UMC_V2 && xec == 0) { ++ /* The UMCPHY is reported as csrow in case of noncpu nodes */ ++ csrow = find_umc_channel(e) / 2; ++ /* UMCCH is managing the HBM memory */ ++ channel = find_hbm_channel(e); ++ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", ++ channel, csrow); ++ } ++ + } + + int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) diff --git a/b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch b/b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch new file mode 100644 index 0000000000000000000000000000000000000000..30cc19e2098d9c1f95148ace0d27203b33c3c0f1 --- /dev/null +++ b/b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4.patch @@ -0,0 +1,30 @@ +commit b4402d36e1b42fb7b0d8ddccc83463a6e622dbc4 +Author: DmNosachev +Date: Tue Jun 29 13:48:55 2021 +0300 + + labels/supermicro: added Supermicro X10DRI(-T) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 47ea05f..86e4617 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -81,4 +81,14 @@ Vendor: Supermicro + P2-DIMMC1: 2.2.0; + P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1; + P2-DIMME1: 3.1.0; +- P2-DIMMF1: 3.2.0; +\ No newline at end of file ++ P2-DIMMF1: 3.2.0; ++ ++ Model: X10DRI, X10DRI-T ++ P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; ++ P1-DIMMB1: 0.1.0; P1-DIMMB2: 0.1.1; ++ P1-DIMMC1: 0.2.0; P1-DIMMC2: 0.2.1; ++ P1-DIMMD1: 0.3.0; P1-DIMMD2: 0.3.1; ++ P2-DIMME1: 1.0.0; P2-DIMME2: 1.0.1; ++ P2-DIMMF1: 1.1.0; P2-DIMMF2: 1.1.1; ++ P2-DIMMG1: 1.2.0; P2-DIMMG2: 1.2.1; ++ P2-DIMMH1: 1.3.0; P2-DIMMH2: 1.3.1; +\ No newline at end of file diff --git a/d0e0bb3d73c4bc5060da20270a089857bba2a64c.patch b/d0e0bb3d73c4bc5060da20270a089857bba2a64c.patch new file mode 100644 index 0000000000000000000000000000000000000000..d28ce9c7db5e6861044aa68d839615a6c1d3dfff --- /dev/null +++ b/d0e0bb3d73c4bc5060da20270a089857bba2a64c.patch @@ -0,0 +1,42 @@ +commit d0e0bb3d73c4bc5060da20270a089857bba2a64c +Author: Justin Vreeland +Date: Tue Nov 2 19:51:50 2021 -0700 + + Update ras-mc-ctl manpage to match current options + + Signed-off-by: Justin Vreeland + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/man/ras-mc-ctl.8.in b/man/ras-mc-ctl.8.in +index 26230e0..a605122 100644 +--- a/man/ras-mc-ctl.8.in ++++ b/man/ras-mc-ctl.8.in +@@ -79,9 +79,27 @@ Specify an alternate location for the labels database. + Specify a delay of \fBtime\fR seconds before registering DIMM labels. + Only meaninful if used together with --register-labels. + .TP +-.BI "--layout ++.BI "--layout" + Prints the memory layout as detected by the EDAC driver. Useful to check + if the EDAC driver is properly detecting the memory controller architecture. ++.TP ++.BI "--summary" ++Presents a summary of the logged errors. ++.TP ++.BI "--errors" ++Shows the errors stored at the error database. ++.TP ++.BI "--error-count" ++Shows the corrected and uncorrected error counts using sysfs. ++.TP ++.BI "--vendor-errors-summary="platform-id ++Pressents a summary of the vendor-specific logged errors. ++.TP ++.BI "--vendor-errors="platform-id ++Shows the vendor-specific errors stored in the error database. ++.TP ++.BI "--vendor-platforms" ++Shows the supported platforms with platform-ids for the vendor-specific errors. + + .SH MAINBOARD CONFIGURATION + .PP diff --git a/dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch b/dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9eec5adf863ace7ee3cb60030592da4985b70ce --- /dev/null +++ b/dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b.patch @@ -0,0 +1,27 @@ +commit dda7d95bcbbb95e0db557a7a9325ee9815ab4e9b +Author: Mauro Carvalho Chehab +Date: Wed May 26 12:55:54 2021 +0200 + + Add support for multi-arch builds + + Allow building rasdaemon on several architectures: + - x86_64 + - arm 64 + - ppc 64 LE + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml +index 747a844..898687c 100644 +--- a/.github/workflows/ci.yml ++++ b/.github/workflows/ci.yml +@@ -9,6 +9,9 @@ jobs: + Ubuntu: + name: Ubuntu + runs-on: ubuntu-latest ++ strategy: ++ matrix: ++ arch: [x64_64, aarch64, ppc64le] + steps: + - uses: actions/checkout@v2 + - name: prepare diff --git a/dist b/dist new file mode 100644 index 0000000000000000000000000000000000000000..89c1faffc18349bb12eee2371e9dc43bf419b95c --- /dev/null +++ b/dist @@ -0,0 +1 @@ +an9 diff --git a/download b/download new file mode 100644 index 0000000000000000000000000000000000000000..9d0b9dfe63f420a80777a1b21191449fb557022c --- /dev/null +++ b/download @@ -0,0 +1 @@ +8404c50ab6ba72f41e9c948b8ac3c2cb rasdaemon-0.6.7.tar.bz2 diff --git a/ec443ec0add059fa897f844349e1a2345d81713c.patch b/ec443ec0add059fa897f844349e1a2345d81713c.patch new file mode 100644 index 0000000000000000000000000000000000000000..cf778c1ea3119adbcd3a0f2db418f8e82ed108a2 --- /dev/null +++ b/ec443ec0add059fa897f844349e1a2345d81713c.patch @@ -0,0 +1,31 @@ +commit ec443ec0add059fa897f844349e1a2345d81713c +Author: DmNosachev +Date: Tue Jun 29 11:33:10 2021 +0300 + + labels/supermicro: added x11dph-i labels + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 3fd6fee..bfaed93 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -68,3 +68,17 @@ Vendor: Supermicro + P1_DIMM4B: 1.1.1; + P2_DIMM4B: 2.0.1; + P2_DIMM4B: 2.1.1; ++ ++ Model: X11DPH-i ++ P1-DIMMA1: 0.0.0; P1-DIMMA2: 0.0.1; ++ P1-DIMMB1: 0.1.0; ++ P1-DIMMC1: 0.2.0; ++ P1-DIMMD1: 1.0.0; P1-DIMMD2: 1.0.1; ++ P1-DIMME1: 1.1.0; ++ P1-DIMMF1: 1.2.0; ++ P2-DIMMA1: 2.0.0; P2-DIMMA2: 2.0.1; ++ P2-DIMMB1: 2.1.0; ++ P2-DIMMC1: 2.2.0; ++ P2-DIMMD1: 3.0.0; P2-DIMMD2: 3.0.1; ++ P2-DIMME1: 3.1.0; ++ P2-DIMMF1: 3.2.0; +\ No newline at end of file diff --git a/f7cdd720297cd17e405a7170c04df89d1d9536f8.patch b/f7cdd720297cd17e405a7170c04df89d1d9536f8.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2732e883c934590ea9d16d8b5479e0d2a17227c --- /dev/null +++ b/f7cdd720297cd17e405a7170c04df89d1d9536f8.patch @@ -0,0 +1,48 @@ +commit f7cdd720297cd17e405a7170c04df89d1d9536f8 +Author: Mauro Carvalho Chehab +Date: Wed May 26 12:35:55 2021 +0200 + + Add a github workflow for CI automation + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml +new file mode 100644 +index 0000000..5b3e757 +--- /dev/null ++++ b/.github/workflows/ci.yml +@@ -0,0 +1,34 @@ ++name: CI ++ ++# Should run only on branches and PR, as "on_tag.yml" will handle tags ++on: ++ push: ++ branches: master test ++ pull_request: ++ branches: master ++ ++jobs: ++ ++# ++# Linux ++# ++ Ubuntu: ++ name: Ubuntu ++ runs-on: ubuntu-20.04 ++ strategy: ++ matrix: ++ arch: [x64_64, aarch64, armv7, ppc64le] ++ steps: ++ - uses: actions/checkout@v2 ++ with: ++ arch: ${{ matrix.arch }} ++ - name: prepare ++ run: | ++ sudo apt-get update ++ sudo apt-get install -y build-essential sqlite3 ++ - name: build ++ run: | ++ autoreconf -vfi ++ ./configure --enable-all ++ make ++ sudo make install diff --git a/fc1dd37d422fc907416afd028514fff59b63ae12.patch b/fc1dd37d422fc907416afd028514fff59b63ae12.patch new file mode 100644 index 0000000000000000000000000000000000000000..460d2c1f974db84fe927d2b11ef1add691349712 --- /dev/null +++ b/fc1dd37d422fc907416afd028514fff59b63ae12.patch @@ -0,0 +1,30 @@ +commit fc1dd37d422fc907416afd028514fff59b63ae12 +Author: DmNosachev +Date: Wed Jun 30 16:49:18 2021 +0300 + + labels/supermicro: added Supermicro B1DRi + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/labels/supermicro b/labels/supermicro +index 373de07..b924a32 100644 +--- a/labels/supermicro ++++ b/labels/supermicro +@@ -105,4 +105,14 @@ Vendor: Supermicro + P2-DIMMC1: 2.2.0; + P2-DIMMD1: 3.0.0; + P2-DIMME1: 3.1.0; +- P2-DIMMF1: 3.2.0; +\ No newline at end of file ++ P2-DIMMF1: 3.2.0; ++ ++ Model: B1DRi ++ P1_DIMMA1: 0.0.0; ++ P1_DIMMB1: 0.1.0; ++ P1_DIMMC1: 0.2.0; ++ P1_DIMMD1: 0.3.0; ++ P2_DIMME1: 1.0.0; ++ P2_DIMMF1: 1.1.0; ++ P2_DIMMG1: 1.2.0; ++ P2_DIMMH1: 1.3.0; +\ No newline at end of file diff --git a/fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch b/fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch new file mode 100644 index 0000000000000000000000000000000000000000..a549df75ef10d4e6a2c6d468a58ae4497a92df5d --- /dev/null +++ b/fcdffdcb28ece67ed78e3575a3dce45d9dd4f015.patch @@ -0,0 +1,28 @@ +commit fcdffdcb28ece67ed78e3575a3dce45d9dd4f015 +Author: Mauro Carvalho Chehab +Date: Wed May 26 10:37:52 2021 +0200 + + rasdaemon.spec.in: Fix the description on this example file + + While this is used just to test if building it is OK, better + to keep the logs nice ;-) + + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in +index 6ef223f..afa4359 100644 +--- a/misc/rasdaemon.spec.in ++++ b/misc/rasdaemon.spec.in +@@ -61,10 +61,10 @@ rm INSTALL %{buildroot}/usr/include/*.h + %changelog + + * Wed May 26 2021 Mauro Carvalho Chehab 0.6.7-1 +-- Bump to version 0.6.5 with several fixes and additions ++- Bump to version 0.6.7 with several fixes and additions + + * Tue Jul 21 2020 Mauro Carvalho Chehab 0.6.6-1 +-- Bump to version 0.6.5 with several fixes, new hip08 events and memory prediction analysis ++- Bump to version 0.6.6 with several fixes, new hip08 events and memory prediction analysis + + * Wed Nov 20 2019 Mauro Carvalho Chehab 0.6.5-1 + - Bump to version 0.6.5 with several fixes and improves PCIe events record diff --git a/labels.patch b/labels.patch new file mode 100644 index 0000000000000000000000000000000000000000..3eb072ecea477a31a1c3f6bcd52baac7f8b140c2 --- /dev/null +++ b/labels.patch @@ -0,0 +1,263 @@ +Add labels directory from upstream + +Labels directory doesn't get exported by tarball releases. + +Signed-off-by: Aristeu Rozanski + +--- + labels/asus | 20 +++++++ + labels/dell | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + labels/supermicro | 70 ++++++++++++++++++++++++ + 3 files changed, 242 insertions(+) + +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ rasdaemon-0.6.7/labels/asus 2022-02-08 15:44:53.563362010 -0500 +@@ -0,0 +1,20 @@ ++# RASDAEMON Motherboard DIMM labels Database file. ++# ++# Vendor-name and model-name are found from the program 'dmidecode' ++# labels are found from the silk screen on the motherboard. ++# ++#Vendor: ++# Product: ++# Model: ++#