From 17e331676d1dcc5b8d4b98e56c018a1437dfbd87 Mon Sep 17 00:00:00 2001 From: "taifu.gc" Date: Wed, 14 Dec 2022 01:55:34 +0800 Subject: [PATCH 1/4] update to rasdaemon-0.6.4-6.el9.src.rpm --- ...n-add-rbtree-support-for-page-record.patch | 584 --------------- ...pport-for-memory-Corrected-Error-pre.patch | 646 ----------------- ...tification-support-when-page-goes-of.patch | 259 ------- ...a096c3a1d0f993703ab3299f1ddfadf53d7f.patch | 85 --- ...29b024c31d54a7f8a72eab094376c7be27f5.patch | 32 - ...d65b97311dd5736838f1e285355f7f357046.patch | 538 -------------- ...956acc2dab7c18b4701f9657afb9ab3ddc79.patch | 28 - ...217660351c08eb2f8bccebf939abba2f7e69.patch | 66 -- ...f713f667437fb6e283cc3dc090679eb47d08.patch | 372 ---------- ...1e4da4f2daf2b10143fc148a8043312b61e5.patch | 149 ---- ...f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch | 24 - ...64ba44aee9bc5646f6537fc744b0b54aff37.patch | 38 - ...a85d8dc3483423ec2934fee8132f85f8fdb6.patch | 207 ------ ...f39f13833f7d53ef96abc5a72e79384260f4.patch | 230 ------ ...a0711001957ee98f2c124abce0fa1f801529.patch | 670 ------------------ ...76ed94f68ae31d7b5f74e19545698898c13c.patch | 138 ---- add_upstream_labels.patch | 159 ----- ...33aa70331670c06db6b652712b476e24051c.patch | 107 --- ...e68453b2497e86cbd273b9cd56fadc5859e3.patch | 37 - ...a3d6a39d402c41065e9284d49114b97e3bfe.patch | 148 ---- ...e5c65ed5a42eaa97aa3659854add6d808da5.patch | 94 --- ...7864f11f709c4f803828fbc8e507d115d03b.patch | 611 ---------------- download | 1 - rasdaemon-0.6.4.tar.bz2 | Bin 0 -> 392946 bytes ...rasdaemon-avoid-multiple-definitions.patch | 0 ...c-ctl-Fix-script-to-parse-dimm-sizes.patch | 47 -- rasdaemon.spec | 139 +--- 27 files changed, 34 insertions(+), 5375 deletions(-) delete mode 100644 0021-rasdaemon-add-rbtree-support-for-page-record.patch delete mode 100644 0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch delete mode 100644 0023-rasdaemon-Add-notification-support-when-page-goes-of.patch delete mode 100644 0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch delete mode 100644 16d929b024c31d54a7f8a72eab094376c7be27f5.patch delete mode 100644 2290d65b97311dd5736838f1e285355f7f357046.patch delete mode 100644 28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch delete mode 100644 2a1d217660351c08eb2f8bccebf939abba2f7e69.patch delete mode 100644 546cf713f667437fb6e283cc3dc090679eb47d08.patch delete mode 100644 60a91e4da4f2daf2b10143fc148a8043312b61e5.patch delete mode 100644 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch delete mode 100644 854364ba44aee9bc5646f6537fc744b0b54aff37.patch delete mode 100644 8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch delete mode 100644 9acef39f13833f7d53ef96abc5a72e79384260f4.patch delete mode 100644 a16ca0711001957ee98f2c124abce0fa1f801529.patch delete mode 100644 a8c776ed94f68ae31d7b5f74e19545698898c13c.patch delete mode 100644 add_upstream_labels.patch delete mode 100644 aecf33aa70331670c06db6b652712b476e24051c.patch delete mode 100644 b22be68453b2497e86cbd273b9cd56fadc5859e3.patch delete mode 100644 b497a3d6a39d402c41065e9284d49114b97e3bfe.patch delete mode 100644 cc2ce5c65ed5a42eaa97aa3659854add6d808da5.patch delete mode 100644 ce6e7864f11f709c4f803828fbc8e507d115d03b.patch delete mode 100644 download create mode 100644 rasdaemon-0.6.4.tar.bz2 rename 1001-rasdaemon-avoid-multiple-definitions.patch => rasdaemon-avoid-multiple-definitions.patch (100%) delete mode 100644 rasdaemon-ras-mc-ctl-Fix-script-to-parse-dimm-sizes.patch diff --git a/0021-rasdaemon-add-rbtree-support-for-page-record.patch b/0021-rasdaemon-add-rbtree-support-for-page-record.patch deleted file mode 100644 index 4a76a89..0000000 --- a/0021-rasdaemon-add-rbtree-support-for-page-record.patch +++ /dev/null @@ -1,584 +0,0 @@ -From 27794f4a5ff1453490bbcd805ad8e5b54516f015 Mon Sep 17 00:00:00 2001 -From: wuyun -Date: Sat, 20 Jun 2020 20:26:21 +0800 -Subject: [PATCH] rasdaemon: add rbtree support for page record - -commit 5fd96f457262052f7d06435af8a49689ffb6ffcf upstream - -The rbtree is very efficient for recording and querying fault page info. - -Signed-off-by: wuyun -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab -Signed-off-by: Bixuan Cui ---- - rbtree.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - rbtree.h | 165 +++++++++++++++++++++++++++ - 2 files changed, 549 insertions(+) - create mode 100644 rbtree.c - create mode 100644 rbtree.h - -diff --git a/rbtree.c b/rbtree.c -new file mode 100644 -index 0000000..d9b1bd4 ---- /dev/null -+++ b/rbtree.c -@@ -0,0 +1,384 @@ -+/* -+ Red Black Trees -+ (C) 1999 Andrea Arcangeli -+ (C) 2002 David Woodhouse -+ Taken from the Linux 2.6.30 source with some minor modificatons. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ linux/lib/rbtree.c -+*/ -+ -+#include "rbtree.h" -+ -+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *right = node->rb_right; -+ struct rb_node *parent = rb_parent(node); -+ -+ if ((node->rb_right = right->rb_left)) -+ rb_set_parent(right->rb_left, node); -+ right->rb_left = node; -+ -+ rb_set_parent(right, parent); -+ -+ if (parent) -+ { -+ if (node == parent->rb_left) -+ parent->rb_left = right; -+ else -+ parent->rb_right = right; -+ } -+ else -+ root->rb_node = right; -+ rb_set_parent(node, right); -+} -+ -+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *left = node->rb_left; -+ struct rb_node *parent = rb_parent(node); -+ -+ if ((node->rb_left = left->rb_right)) -+ rb_set_parent(left->rb_right, node); -+ left->rb_right = node; -+ -+ rb_set_parent(left, parent); -+ -+ if (parent) -+ { -+ if (node == parent->rb_right) -+ parent->rb_right = left; -+ else -+ parent->rb_left = left; -+ } -+ else -+ root->rb_node = left; -+ rb_set_parent(node, left); -+} -+ -+void rb_insert_color(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *parent, *gparent; -+ -+ while ((parent = rb_parent(node)) && rb_is_red(parent)) -+ { -+ gparent = rb_parent(parent); -+ -+ if (parent == gparent->rb_left) -+ { -+ { -+ register struct rb_node *uncle = gparent->rb_right; -+ if (uncle && rb_is_red(uncle)) -+ { -+ rb_set_black(uncle); -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ node = gparent; -+ continue; -+ } -+ } -+ -+ if (parent->rb_right == node) -+ { -+ struct rb_node *tmp; -+ __rb_rotate_left(parent, root); -+ tmp = parent; -+ parent = node; -+ node = tmp; -+ } -+ -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ __rb_rotate_right(gparent, root); -+ } else { -+ { -+ struct rb_node *uncle = gparent->rb_left; -+ if (uncle && rb_is_red(uncle)) -+ { -+ rb_set_black(uncle); -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ node = gparent; -+ continue; -+ } -+ } -+ -+ if (parent->rb_left == node) -+ { -+ struct rb_node *tmp; -+ __rb_rotate_right(parent, root); -+ tmp = parent; -+ parent = node; -+ node = tmp; -+ } -+ -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ __rb_rotate_left(gparent, root); -+ } -+ } -+ -+ rb_set_black(root->rb_node); -+} -+ -+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, -+ struct rb_root *root) -+{ -+ struct rb_node *other; -+ -+ while ((!node || rb_is_black(node)) && node != root->rb_node) -+ { -+ if (parent->rb_left == node) -+ { -+ other = parent->rb_right; -+ if (rb_is_red(other)) -+ { -+ rb_set_black(other); -+ rb_set_red(parent); -+ __rb_rotate_left(parent, root); -+ other = parent->rb_right; -+ } -+ if ((!other->rb_left || rb_is_black(other->rb_left)) && -+ (!other->rb_right || rb_is_black(other->rb_right))) -+ { -+ rb_set_red(other); -+ node = parent; -+ parent = rb_parent(node); -+ } -+ else -+ { -+ if (!other->rb_right || rb_is_black(other->rb_right)) -+ { -+ rb_set_black(other->rb_left); -+ rb_set_red(other); -+ __rb_rotate_right(other, root); -+ other = parent->rb_right; -+ } -+ rb_set_color(other, rb_color(parent)); -+ rb_set_black(parent); -+ rb_set_black(other->rb_right); -+ __rb_rotate_left(parent, root); -+ node = root->rb_node; -+ break; -+ } -+ } -+ else -+ { -+ other = parent->rb_left; -+ if (rb_is_red(other)) -+ { -+ rb_set_black(other); -+ rb_set_red(parent); -+ __rb_rotate_right(parent, root); -+ other = parent->rb_left; -+ } -+ if ((!other->rb_left || rb_is_black(other->rb_left)) && -+ (!other->rb_right || rb_is_black(other->rb_right))) -+ { -+ rb_set_red(other); -+ node = parent; -+ parent = rb_parent(node); -+ } -+ else -+ { -+ if (!other->rb_left || rb_is_black(other->rb_left)) -+ { -+ rb_set_black(other->rb_right); -+ rb_set_red(other); -+ __rb_rotate_left(other, root); -+ other = parent->rb_left; -+ } -+ rb_set_color(other, rb_color(parent)); -+ rb_set_black(parent); -+ rb_set_black(other->rb_left); -+ __rb_rotate_right(parent, root); -+ node = root->rb_node; -+ break; -+ } -+ } -+ } -+ if (node) -+ rb_set_black(node); -+} -+ -+void rb_erase(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *child, *parent; -+ int color; -+ -+ if (!node->rb_left) -+ child = node->rb_right; -+ else if (!node->rb_right) -+ child = node->rb_left; -+ else -+ { -+ struct rb_node *old = node, *left; -+ -+ node = node->rb_right; -+ while ((left = node->rb_left) != NULL) -+ node = left; -+ child = node->rb_right; -+ parent = rb_parent(node); -+ color = rb_color(node); -+ -+ if (child) -+ rb_set_parent(child, parent); -+ if (parent == old) { -+ parent->rb_right = child; -+ parent = node; -+ } else -+ parent->rb_left = child; -+ -+ node->rb_parent_color = old->rb_parent_color; -+ node->rb_right = old->rb_right; -+ node->rb_left = old->rb_left; -+ -+ if (rb_parent(old)) -+ { -+ if (rb_parent(old)->rb_left == old) -+ rb_parent(old)->rb_left = node; -+ else -+ rb_parent(old)->rb_right = node; -+ } else -+ root->rb_node = node; -+ -+ rb_set_parent(old->rb_left, node); -+ if (old->rb_right) -+ rb_set_parent(old->rb_right, node); -+ goto color; -+ } -+ -+ parent = rb_parent(node); -+ color = rb_color(node); -+ -+ if (child) -+ rb_set_parent(child, parent); -+ if (parent) -+ { -+ if (parent->rb_left == node) -+ parent->rb_left = child; -+ else -+ parent->rb_right = child; -+ } -+ else -+ root->rb_node = child; -+ -+ color: -+ if (color == RB_BLACK) -+ __rb_erase_color(child, parent, root); -+} -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+struct rb_node *rb_first(const struct rb_root *root) -+{ -+ struct rb_node *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return NULL; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+ -+struct rb_node *rb_last(const struct rb_root *root) -+{ -+ struct rb_node *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return NULL; -+ while (n->rb_right) -+ n = n->rb_right; -+ return n; -+} -+ -+struct rb_node *rb_next(const struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+ if (rb_parent(node) == node) -+ return NULL; -+ -+ /* If we have a right-hand child, go down and then left as far -+ as we can. */ -+ if (node->rb_right) { -+ node = node->rb_right; -+ while (node->rb_left) -+ node=node->rb_left; -+ return (struct rb_node *)node; -+ } -+ -+ /* No right-hand children. Everything down and left is -+ smaller than us, so any 'next' node must be in the general -+ direction of our parent. Go up the tree; any time the -+ ancestor is a right-hand child of its parent, keep going -+ up. First time it's a left-hand child of its parent, said -+ parent is our 'next' node. */ -+ while ((parent = rb_parent(node)) && node == parent->rb_right) -+ node = parent; -+ -+ return parent; -+} -+ -+struct rb_node *rb_prev(const struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+ if (rb_parent(node) == node) -+ return NULL; -+ -+ /* If we have a left-hand child, go down and then right as far -+ as we can. */ -+ if (node->rb_left) { -+ node = node->rb_left; -+ while (node->rb_right) -+ node=node->rb_right; -+ return (struct rb_node *)node; -+ } -+ -+ /* No left-hand children. Go up till we find an ancestor which -+ is a right-hand child of its parent */ -+ while ((parent = rb_parent(node)) && node == parent->rb_left) -+ node = parent; -+ -+ return parent; -+} -+ -+void rb_replace_node(struct rb_node *victim, struct rb_node *new, -+ struct rb_root *root) -+{ -+ struct rb_node *parent = rb_parent(victim); -+ -+ /* Set the surrounding nodes to point to the replacement */ -+ if (parent) { -+ if (victim == parent->rb_left) -+ parent->rb_left = new; -+ else -+ parent->rb_right = new; -+ } else { -+ root->rb_node = new; -+ } -+ if (victim->rb_left) -+ rb_set_parent(victim->rb_left, new); -+ if (victim->rb_right) -+ rb_set_parent(victim->rb_right, new); -+ -+ /* Copy the pointers/colour from the victim to the replacement */ -+ *new = *victim; -+} -diff --git a/rbtree.h b/rbtree.h -new file mode 100644 -index 0000000..a8a0459 ---- /dev/null -+++ b/rbtree.h -@@ -0,0 +1,165 @@ -+/* -+ Red Black Trees -+ (C) 1999 Andrea Arcangeli -+ Taken from the Linux 2.6.30 source. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ linux/include/linux/rbtree.h -+ -+ To use rbtrees you'll have to implement your own insert and search cores. -+ This will avoid us to use callbacks and to drop drammatically performances. -+ I know it's not the cleaner way, but in C (not in C++) to get -+ performances and genericity... -+ -+ Some example of insert and search follows here. The search is a plain -+ normal search over an ordered tree. The insert instead must be implemented -+ int two steps: as first thing the code must insert the element in -+ order as a red leaf in the tree, then the support library function -+ rb_insert_color() must be called. Such function will do the -+ not trivial work to rebalance the rbtree if necessary. -+ -+----------------------------------------------------------------------- -+static inline struct page * rb_search_page_cache(struct inode * inode, -+ unsigned long offset) -+{ -+ struct rb_node * n = inode->i_rb_page_cache.rb_node; -+ struct page * page; -+ -+ while (n) -+ { -+ page = rb_entry(n, struct page, rb_page_cache); -+ -+ if (offset < page->offset) -+ n = n->rb_left; -+ else if (offset > page->offset) -+ n = n->rb_right; -+ else -+ return page; -+ } -+ return NULL; -+} -+ -+static inline struct page * __rb_insert_page_cache(struct inode * inode, -+ unsigned long offset, -+ struct rb_node * node) -+{ -+ struct rb_node ** p = &inode->i_rb_page_cache.rb_node; -+ struct rb_node * parent = NULL; -+ struct page * page; -+ -+ while (*p) -+ { -+ parent = *p; -+ page = rb_entry(parent, struct page, rb_page_cache); -+ -+ if (offset < page->offset) -+ p = &(*p)->rb_left; -+ else if (offset > page->offset) -+ p = &(*p)->rb_right; -+ else -+ return page; -+ } -+ -+ rb_link_node(node, parent, p); -+ -+ return NULL; -+} -+ -+static inline struct page * rb_insert_page_cache(struct inode * inode, -+ unsigned long offset, -+ struct rb_node * node) -+{ -+ struct page * ret; -+ if ((ret = __rb_insert_page_cache(inode, offset, node))) -+ goto out; -+ rb_insert_color(node, &inode->i_rb_page_cache); -+ out: -+ return ret; -+} -+----------------------------------------------------------------------- -+*/ -+ -+#ifndef _LINUX_RBTREE_H -+#define _LINUX_RBTREE_H -+ -+#include -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct rb_node -+{ -+ unsigned long rb_parent_color; -+#define RB_RED 0 -+#define RB_BLACK 1 -+ struct rb_node *rb_right; -+ struct rb_node *rb_left; -+} __attribute__((aligned(sizeof(long)))); -+ /* The alignment might seem pointless, but allegedly CRIS needs it */ -+ -+struct rb_root -+{ -+ struct rb_node *rb_node; -+}; -+ -+ -+#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -+#define rb_color(r) ((r)->rb_parent_color & 1) -+#define rb_is_red(r) (!rb_color(r)) -+#define rb_is_black(r) rb_color(r) -+#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -+#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) -+ -+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -+{ -+ rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -+} -+static inline void rb_set_color(struct rb_node *rb, int color) -+{ -+ rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -+} -+ -+#define RB_ROOT (struct rb_root) { NULL, } -+#define rb_entry(ptr, type, member) container_of(ptr, type, member) -+ -+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -+#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -+#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) -+ -+extern void rb_insert_color(struct rb_node *, struct rb_root *); -+extern void rb_erase(struct rb_node *, struct rb_root *); -+ -+/* Find logical next and previous nodes in a tree */ -+extern struct rb_node *rb_next(const struct rb_node *); -+extern struct rb_node *rb_prev(const struct rb_node *); -+extern struct rb_node *rb_first(const struct rb_root *); -+extern struct rb_node *rb_last(const struct rb_root *); -+ -+/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, -+ struct rb_root *root); -+ -+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, -+ struct rb_node ** rb_link) -+{ -+ node->rb_parent_color = (unsigned long )parent; -+ node->rb_left = node->rb_right = NULL; -+ -+ *rb_link = node; -+} -+ -+#endif /* _LINUX_RBTREE_H */ --- -1.8.3.1 - diff --git a/0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch b/0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch deleted file mode 100644 index 531865f..0000000 --- a/0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch +++ /dev/null @@ -1,646 +0,0 @@ -From c62d0466b0e69ac8c724c9d917000f18aa147aae Mon Sep 17 00:00:00 2001 -From: wuyun -Date: Sat, 20 Jun 2020 20:26:22 +0800 -Subject: [PATCH] rasdaemon: add support for memory Corrected Error predictive failure analysis - -commit 9ae6b70effb8adc9572debc800b8e16173f74bb8 upstream - -Memory Corrected Error was corrected by hardware. These errors do not -require immediate software actions, but are still reported for -accounting and predictive failure analysis. - -Based on statistical results, some actions can be taken to prevent -Corrected Error from evoluting to Uncorrected Error. - -Signed-off-by: wuyun -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab -Signed-off-by: Bixuan Cui ---- - Makefile.am | 7 +- - configure.ac | 12 ++ - man/rasdaemon.1.in | 7 + - misc/rasdaemon.env | 29 ++++ - misc/rasdaemon.service.in | 1 + - misc/rasdaemon.spec.in | 4 +- - ras-events.c | 6 + - ras-mc-handler.c | 7 + - ras-page-isolation.c | 332 ++++++++++++++++++++++++++++++++++++++ - ras-page-isolation.h | 66 ++++++++ - 10 files changed, 468 insertions(+), 3 deletions(-) - create mode 100644 misc/rasdaemon.env - create mode 100644 ras-page-isolation.c - create mode 100644 ras-page-isolation.h - -diff --git a/Makefile.am b/Makefile.am -index fccdeba..dc30ae7 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -1,6 +1,6 @@ - ACLOCAL_AMFLAGS=-I m4 - SUBDIRS = libtrace util man --SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in -+SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in misc/rasdaemon.env - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) - EXTRA_DIST = $(SYSTEMD_SERVICES_IN) - -@@ -51,13 +51,16 @@ endif - if WITH_HISI_NS_DECODE - rasdaemon_SOURCES += non-standard-hisi_hip07.c - endif -+if WITH_MEMORY_CE_PFA -+ rasdaemon_SOURCES += rbtree.c ras-page-isolation.c -+endif - - rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ - ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ -- ras-memory-failure-handler.h -+ ras-memory-failure-handler.h rbtree.h ras-page-isolation.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that -diff --git a/configure.ac b/configure.ac -index 8be33d9..1f95459 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -108,6 +108,17 @@ AS_IF([test "x$enable_hisi_ns_decode" = "xyes"], [ - ]) - AM_CONDITIONAL([WITH_HISI_NS_DECODE], [test x$enable_hisi_ns_decode = xyes]) - -+AC_ARG_ENABLE([memory_ce_pfa], -+ AS_HELP_STRING([--enable-memory-ce-pfa], [enable memory Corrected Error predictive failure analysis])) -+ -+AS_IF([test "x$enable_memory_ce_pfa" = "xyes"], [ -+ AC_DEFINE(HAVE_MEMORY_CE_PFA,1,"have memory corrected error predictive failure analysis") -+ AC_SUBST([WITH_MEMORY_CE_PFA]) -+]) -+AM_CONDITIONAL([WITH_MEMORY_CE_PFA], [test x$enable_memory_ce_pfa = xyes]) -+AM_COND_IF([WITH_MEMORY_CE_PFA], [USE_MEMORY_CE_PFA="yes"], [USE_MEMORY_CE_PFA="no"]) -+ -+ - test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc - - CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" -@@ -138,4 +149,5 @@ compile time options summary - HIP07 SAS HW errors : $enable_hisi_ns_decode - ARM events : $enable_arm - Memory Failure : $USE_MEMORY_FAILURE -+ Memory CE PFA : $enable_memory_ce_pfa - EOF -diff --git a/man/rasdaemon.1.in b/man/rasdaemon.1.in -index 834df16..833c8e1 100644 ---- a/man/rasdaemon.1.in -+++ b/man/rasdaemon.1.in -@@ -62,6 +62,13 @@ feature. - .BI "--version" - Print the program version and exit. - -+.SH CONFIG FILE -+ -+The \fBrasdaemon\fR program supports a config file to set rasdaemon systemd service -+environment variables. By default the config file is read from /etc/sysconfig/rasdaemon. -+ -+The general format is environmentname=value. -+ - .SH SEE ALSO - \fBras-mc-ctl\fR(8) - -diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env -new file mode 100644 -index 0000000..12fd766 ---- /dev/null -+++ b/misc/rasdaemon.env -@@ -0,0 +1,29 @@ -+# Page Isolation -+# Note: Run-time configuration is unsupported, service restart needed. -+# Note: this file should be installed at /etc/sysconfig/rasdaemon -+ -+# Specify the threshold of isolating buggy pages. -+# -+# Format: -+# [0-9]+[unit] -+# Notice: please make sure match this format, rasdaemon will use default value for exception input cases. -+# -+# Supported units: -+# PAGE_CE_REFRESH_CYCLE: D|d (day), H|h (hour), M|m (min), default is in hour -+# PAGE_CE_THRESHOLD: K|k (x1000), M|m (x1000k), default is none -+# -+# The two configs will only take no effect when PAGE_CE_ACTION is "off". -+PAGE_CE_REFRESH_CYCLE="24h" -+PAGE_CE_THRESHOLD="50" -+ -+# Specify the internal action in rasdaemon to exceeding a page error threshold. -+# -+# off no action -+# account only account errors -+# soft try to soft-offline page without killing any processes -+# This requires an uptodate kernel. Might not be successfull. -+# hard try to hard-offline page by killing processes -+# Requires an uptodate kernel. Might not be successfull. -+# soft-then-hard First try to soft offline, then try hard offlining. -+# Note: default offline choice is "soft". -+PAGE_CE_ACTION="soft" -diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in -index be9ad5a..e73a08a 100644 ---- a/misc/rasdaemon.service.in -+++ b/misc/rasdaemon.service.in -@@ -3,6 +3,7 @@ Description=RAS daemon to log the RAS events - After=syslog.target - - [Service] -+EnvironmentFile=/etc/sysconfig/rasdaemon - ExecStart=@sbindir@/rasdaemon -f -r - ExecStartPost=@sbindir@/rasdaemon --enable - ExecStop=@sbindir@/rasdaemon --disable -diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in -index 82fae30..f5faffe 100644 ---- a/misc/rasdaemon.spec.in -+++ b/misc/rasdaemon.spec.in -@@ -36,12 +36,13 @@ an utility for reporting current error counts from the EDAC sysfs files. - %setup -q - - %build --%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-non-standard --enable-hisi-ns-decode --enable-arm -+%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-ce-pfa - - make %{?_smp_mflags} - - %install - make install DESTDIR=%{buildroot} -+install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service - rm INSTALL %{buildroot}/usr/include/*.h -@@ -54,6 +55,7 @@ rm INSTALL %{buildroot}/usr/include/*.h - %{_unitdir}/*.service - %{_sharedstatedir}/rasdaemon - %{_sysconfdir}/ras/dimm_labels.d -+%config(noreplace) %{_sysconfdir}/sysconfig/%{name} - - %changelog - -diff --git a/ras-events.c b/ras-events.c -index 27ac1ab..5113c32 100644 ---- a/ras-events.c -+++ b/ras-events.c -@@ -36,6 +36,7 @@ - #include "ras-memory-failure-handler.h" - #include "ras-record.h" - #include "ras-logger.h" -+#include "ras-page-isolation.h" - - /* - * Polling time, if read() doesn't block. Currently, trace_pipe_raw never -@@ -673,6 +674,11 @@ int handle_ras_events(int record_events) - ras->page_size = page_size; - ras->record_events = record_events; - -+#ifdef HAVE_MEMORY_CE_PFA -+ /* FIXME: enable memory isolation unconditionally */ -+ ras_page_account_init(); -+#endif -+ - rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", - ras_mc_event_handler); - if (!rc) -diff --git a/ras-mc-handler.c b/ras-mc-handler.c -index deb7e05..42b05cd 100644 ---- a/ras-mc-handler.c -+++ b/ras-mc-handler.c -@@ -23,6 +23,7 @@ - #include "ras-mc-handler.h" - #include "ras-record.h" - #include "ras-logger.h" -+#include "ras-page-isolation.h" - #include "ras-report.h" - - int ras_mc_event_handler(struct trace_seq *s, -@@ -183,6 +184,12 @@ int ras_mc_event_handler(struct trace_seq *s, - - ras_store_mc_event(ras, &ev); - -+#ifdef HAVE_MEMORY_CE_PFA -+ /* Account page corrected errors */ -+ if (!strcmp(ev.error_type, "Corrected")) -+ ras_record_page_error(ev.address, ev.error_count, now); -+#endif -+ - #ifdef HAVE_ABRT_REPORT - /* Report event to ABRT */ - ras_report_mc_event(ras, &ev); -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -new file mode 100644 -index 0000000..50e4406 ---- /dev/null -+++ b/ras-page-isolation.c -@@ -0,0 +1,332 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include "ras-logger.h" -+#include "ras-page-isolation.h" -+ -+#define PARSED_ENV_LEN 50 -+static const struct config threshold_units[] = { -+ { "m", 1000 }, -+ { "k", 1000 }, -+ { "", 1 }, -+ {} -+}; -+ -+static const struct config cycle_units[] = { -+ { "d", 24 }, -+ { "h", 60 }, -+ { "m", 60 }, -+ { "s", 1 }, -+ {} -+}; -+ -+static struct isolation threshold = { -+ .name = "PAGE_CE_THRESHOLD", -+ .units = threshold_units, -+ .env = "50", -+ .unit = "", -+}; -+ -+static struct isolation cycle = { -+ .name = "PAGE_CE_REFRESH_CYCLE", -+ .units = cycle_units, -+ .env = "24h", -+ .unit = "h", -+}; -+ -+static const char *kernel_offline[] = { -+ [OFFLINE_SOFT] = "/sys/devices/system/memory/soft_offline_page", -+ [OFFLINE_HARD] = "/sys/devices/system/memory/hard_offline_page", -+ [OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page", -+}; -+ -+static const struct config offline_choice[] = { -+ { "off", OFFLINE_OFF }, -+ { "account", OFFLINE_ACCOUNT }, -+ { "soft", OFFLINE_SOFT }, -+ { "hard", OFFLINE_HARD }, -+ { "soft-then-hard", OFFLINE_SOFT_THEN_HARD }, -+ {} -+}; -+ -+static const char *page_state[] = { -+ [PAGE_ONLINE] = "online", -+ [PAGE_OFFLINE] = "offlined", -+ [PAGE_OFFLINE_FAILED] = "offline-failed", -+}; -+ -+static enum otype offline = OFFLINE_SOFT; -+static struct rb_root page_records; -+ -+static void page_offline_init(void) -+{ -+ const char *env = "PAGE_CE_ACTION"; -+ char *choice = getenv(env); -+ const struct config *c = NULL; -+ int matched = 0; -+ -+ if (choice) { -+ for (c = offline_choice; c->name; c++) { -+ if (!strcasecmp(choice, c->name)) { -+ offline = c->val; -+ matched = 1; -+ break; -+ } -+ } -+ } -+ -+ if (!matched) -+ log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env); -+ -+ if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) { -+ log(TERM, LOG_INFO, "Kernel does not support page offline interface\n"); -+ offline = OFFLINE_ACCOUNT; -+ } -+ -+ log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n", -+ offline_choice[offline].name); -+} -+ -+static void parse_isolation_env(struct isolation *config) -+{ -+ char *env = getenv(config->name); -+ char *unit = NULL; -+ const struct config *units = NULL; -+ int i, no_unit; -+ int valid = 0; -+ int unit_matched = 0; -+ unsigned long value, tmp; -+ -+ /* check if env is vaild */ -+ if (env && strlen(env)) { -+ /* All the character before unit must be digit */ -+ for (i = 0; i < strlen(env) - 1; i++) { -+ if (!isdigit(env[i])) -+ goto parse; -+ } -+ if (sscanf(env, "%lu", &value) < 1 || !value) -+ goto parse; -+ /* check if the unit is vaild */ -+ unit = env + strlen(env) - 1; -+ /* no unit, all the character are value character */ -+ if (isdigit(*unit)) { -+ valid = 1; -+ no_unit = 1; -+ goto parse; -+ } -+ for (units = config->units; units->name; units++) { -+ /* value character and unit character are both valid */ -+ if (!strcasecmp(unit, units->name)) { -+ valid = 1; -+ no_unit = 0; -+ break; -+ } -+ } -+ } -+ -+parse: -+ /* if invalid, use default env */ -+ if (valid) { -+ config->env = env; -+ if (!no_unit) -+ config->unit = unit; -+ } else { -+ log(TERM, LOG_INFO, "Improper %s, set to default %s.\n", -+ config->name, config->env); -+ } -+ -+ /* if env value string is greater than ulong_max, truncate the last digit */ -+ sscanf(config->env, "%lu", &value); -+ for (units = config->units; units->name; units++) { -+ if (!strcasecmp(config->unit, units->name)) -+ unit_matched = 1; -+ if (unit_matched) { -+ tmp = value; -+ value *= units->val; -+ if (tmp != 0 && value / tmp != units->val) -+ config->overflow = true; -+ } -+ } -+ config->val = value; -+ /* In order to output value and unit perfectly */ -+ config->unit = no_unit ? config->unit : ""; -+} -+ -+static void parse_env_string(struct isolation *config, char *str) -+{ -+ int i; -+ -+ if (config->overflow) { -+ /* when overflow, use basic unit */ -+ for (i = 0; config->units[i].name; i++) ; -+ sprintf(str, "%lu%s", config->val, config->units[i-1].name); -+ log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n", -+ config->name, config->env); -+ } else { -+ sprintf(str, "%s%s", config->env, config->unit); -+ } -+} -+ -+static void page_isolation_init(void) -+{ -+ char threshold_string[PARSED_ENV_LEN]; -+ char cycle_string[PARSED_ENV_LEN]; -+ /** -+ * It's unnecessary to parse threshold configuration when offline -+ * choice is off. -+ */ -+ if (offline == OFFLINE_OFF) -+ return; -+ -+ parse_isolation_env(&threshold); -+ parse_isolation_env(&cycle); -+ parse_env_string(&threshold, threshold_string); -+ parse_env_string(&cycle, cycle_string); -+ log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n", -+ threshold_string, cycle_string); -+} -+ -+void ras_page_account_init(void) -+{ -+ page_offline_init(); -+ page_isolation_init(); -+} -+ -+static int do_page_offline(unsigned long long addr, enum otype type) -+{ -+ FILE *offline_file; -+ int err; -+ -+ offline_file = fopen(kernel_offline[type], "w"); -+ if (!offline_file) -+ return -1; -+ -+ fprintf(offline_file, "%#llx", addr); -+ err = ferror(offline_file) ? -1 : 0; -+ fclose(offline_file); -+ -+ return err; -+} -+ -+static void page_offline(struct page_record *pr) -+{ -+ unsigned long long addr = pr->addr; -+ int ret; -+ -+ /* Offlining page is not required */ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ /* Ignore offlined pages */ -+ if (pr->offlined != PAGE_ONLINE) -+ return; -+ -+ /* Time to silence this noisy page */ -+ if (offline == OFFLINE_SOFT_THEN_HARD) { -+ ret = do_page_offline(addr, OFFLINE_SOFT); -+ if (ret < 0) -+ ret = do_page_offline(addr, OFFLINE_HARD); -+ } else { -+ ret = do_page_offline(addr, offline); -+ } -+ -+ pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE; -+ -+ log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n", -+ addr, page_state[pr->offlined]); -+} -+ -+static void page_record(struct page_record *pr, unsigned count, time_t time) -+{ -+ unsigned long period = time - pr->start; -+ unsigned long tolerate; -+ -+ if (period >= cycle.val) { -+ /** -+ * Since we don't refresh automatically, it is possible that the period -+ * between two occurences will be longer than the pre-configured refresh cycle. -+ * In this case, we tolerate the frequency of the whole period up to -+ * the pre-configured threshold. -+ */ -+ tolerate = (period / (double)cycle.val) * threshold.val; -+ pr->count -= (tolerate > pr->count) ? pr->count : tolerate; -+ pr->start = time; -+ pr->excess = 0; -+ } -+ -+ pr->count += count; -+ if (pr->count >= threshold.val) { -+ log(TERM, LOG_INFO, "Corrected Errors at %#llx exceeded threshold\n", pr->addr); -+ -+ /** -+ * Backup ce count of current cycle to enable next round, which actually -+ * should never happen if we can disable overflow completely in the same -+ * time unit (but sadly we can't). -+ */ -+ pr->excess += pr->count; -+ pr->count = 0; -+ page_offline(pr); -+ } -+} -+ -+static struct page_record *page_lookup_insert(unsigned long long addr) -+{ -+ struct rb_node **entry = &page_records.rb_node; -+ struct rb_node *parent = NULL; -+ struct page_record *pr = NULL, *find = NULL; -+ -+ while (*entry) { -+ parent = *entry; -+ pr = rb_entry(parent, struct page_record, entry); -+ if (addr == pr->addr) { -+ return pr; -+ } else if (addr < pr->addr) { -+ entry = &(*entry)->rb_left; -+ } else { -+ entry = &(*entry)->rb_right; -+ } -+ } -+ -+ find = calloc(1, sizeof(struct page_record)); -+ if (!find) { -+ log(TERM, LOG_ERR, "No memory for page records\n"); -+ return NULL; -+ } -+ -+ find->addr = addr; -+ rb_link_node(&find->entry, parent, entry); -+ rb_insert_color(&find->entry, &page_records); -+ -+ return find; -+} -+ -+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) -+{ -+ struct page_record *pr = NULL; -+ -+ if (offline == OFFLINE_OFF) -+ return; -+ -+ pr = page_lookup_insert(addr & PAGE_MASK); -+ if (pr) { -+ if (!pr->start) -+ pr->start = time; -+ page_record(pr, count, time); -+ } -+} -diff --git a/ras-page-isolation.h b/ras-page-isolation.h -new file mode 100644 -index 0000000..3d03cef ---- /dev/null -+++ b/ras-page-isolation.h -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#ifndef __RAS_PAGE_ISOLATION_H -+#define __RAS_PAGE_ISOLATION_H -+ -+#include -+#include -+#include "rbtree.h" -+ -+#define PAGE_SHIFT 12 -+#define PAGE_SIZE (1 << PAGE_SHIFT) -+#define PAGE_MASK (~(PAGE_SIZE-1)) -+ -+struct config { -+ char *name; -+ unsigned long val; -+}; -+ -+enum otype { -+ OFFLINE_OFF, -+ OFFLINE_ACCOUNT, -+ OFFLINE_SOFT, -+ OFFLINE_HARD, -+ OFFLINE_SOFT_THEN_HARD, -+}; -+ -+enum pstate { -+ PAGE_ONLINE, -+ PAGE_OFFLINE, -+ PAGE_OFFLINE_FAILED, -+}; -+ -+struct page_record { -+ struct rb_node entry; -+ unsigned long long addr; -+ time_t start; -+ enum pstate offlined; -+ unsigned long count; -+ unsigned long excess; -+}; -+ -+struct isolation { -+ char *name; -+ char *env; -+ const struct config *units; -+ unsigned long val; -+ bool overflow; -+ char *unit; -+}; -+ -+void ras_page_account_init(void); -+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time); -+ -+#endif --- -2.27.0 - diff --git a/0023-rasdaemon-Add-notification-support-when-page-goes-of.patch b/0023-rasdaemon-Add-notification-support-when-page-goes-of.patch deleted file mode 100644 index 9132d67..0000000 --- a/0023-rasdaemon-Add-notification-support-when-page-goes-of.patch +++ /dev/null @@ -1,259 +0,0 @@ -From 07c3c72d18e5c7da2109b5afa918966733039f13 Mon Sep 17 00:00:00 2001 -From: Bixuan Cui -Date: Sun, 5 Jun 2022 02:10:24 +0800 -Subject: [PATCH] rasdaemon: Add notification support when page goes offline for Memory Corrected Error - -When the page goes offline, it may affect the user's processes. -The user needs to do some special actions (such as restarting the -process) before or after going offline. - -So add page-ce-offline-pre-notice and page-ce-offline-post-notice -to env file of rasdaemon for notifying the user when doing page -offline. - -Signed-off-by: Bixuan Cui ---- - Makefile.am | 2 +- - misc/notices/page-ce-offline-post-notice | 17 +++++ - misc/notices/page-ce-offline-pre-notice | 17 +++++ - misc/rasdaemon.env | 4 ++ - misc/rasdaemon.spec.in | 3 + - ras-page-isolation.c | 90 ++++++++++++++++++++++++ - 6 files changed, 132 insertions(+), 1 deletion(-) - create mode 100755 misc/notices/page-ce-offline-post-notice - create mode 100755 misc/notices/page-ce-offline-pre-notice - -diff --git a/Makefile.am b/Makefile.am -index de76301..701b120 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -1,6 +1,6 @@ - ACLOCAL_AMFLAGS=-I m4 - SUBDIRS = libtrace util man --SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in misc/rasdaemon.env -+SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in misc/rasdaemon.env misc/notices - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) - EXTRA_DIST = $(SYSTEMD_SERVICES_IN) - -diff --git a/misc/notices/page-ce-offline-post-notice b/misc/notices/page-ce-offline-post-notice -new file mode 100755 -index 0000000..d78b1b0 ---- /dev/null -+++ b/misc/notices/page-ce-offline-post-notice -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon after a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-post-notice.local ] && . ./page-ce-offline-post-notice.local $1 -+ -+if [ -d page-ce-offline-post-notice.extern ] -+then -+ ls page-ce-offline-post-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-post-notice.extern/$item ] && . ./page-ce-offline-post-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff --git a/misc/notices/page-ce-offline-pre-notice b/misc/notices/page-ce-offline-pre-notice -new file mode 100755 -index 0000000..d1038a3 ---- /dev/null -+++ b/misc/notices/page-ce-offline-pre-notice -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon before a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-pre-notice.local ] && . ./page-ce-offline-pre-notice.local $1 -+ -+if [ -d page-ce-offline-pre-notice.extern ] -+then -+ ls page-ce-offline-pre-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-pre-notice.extern/$item ] && . ./page-ce-offline-pre-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env -index 12fd766..713875a 100644 ---- a/misc/rasdaemon.env -+++ b/misc/rasdaemon.env -@@ -27,3 +27,7 @@ PAGE_CE_THRESHOLD="50" - # soft-then-hard First try to soft offline, then try hard offlining. - # Note: default offline choice is "soft". - PAGE_CE_ACTION="soft" -+ -+# Notices script when doing memory offline -+PAGE_CE_OFFLINE_PRE_NOTICE="page-ce-offline-pre-notice" -+PAGE_CE_OFFLINE_POST_NOTICE="page-ce-offline-post-notice" -diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in -index eff9794..f690575 100644 ---- a/misc/rasdaemon.spec.in -+++ b/misc/rasdaemon.spec.in -@@ -45,6 +45,8 @@ make install DESTDIR=%{buildroot} - install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service -+install -d %{buildroot}%{_sysconfdir}/rasdaemon_notices/ -+install -D -p -m 0755 misc/notices/* %{buildroot}%{_sysconfdir}/rasdaemon_notices/ - rm INSTALL %{buildroot}/usr/include/*.h - - %files -@@ -56,6 +58,7 @@ rm INSTALL %{buildroot}/usr/include/*.h - %{_sharedstatedir}/rasdaemon - %{_sysconfdir}/ras/dimm_labels.d - %config(noreplace) %{_sysconfdir}/sysconfig/%{name} -+%config(noreplace) %{_sysconfdir}/rasdaemon_notices/* - - %changelog - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index 50e4406..f4f3bc1 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -17,9 +17,13 @@ - #include - #include - #include -+#include -+#include -+#include - #include "ras-logger.h" - #include "ras-page-isolation.h" - -+#define MAX_PATH_LEN 64 - #define PARSED_ENV_LEN 50 - static const struct config threshold_units[] = { - { "m", 1000 }, -@@ -73,6 +77,8 @@ static const char *page_state[] = { - - static enum otype offline = OFFLINE_SOFT; - static struct rb_root page_records; -+static char pre_notice[MAX_PATH_LEN]; -+static char post_notice[MAX_PATH_LEN]; - - static void page_offline_init(void) - { -@@ -202,16 +208,94 @@ static void page_isolation_init(void) - threshold_string, cycle_string); - } - -+static void page_notice_init(void) -+{ -+ char *notice_root = "/etc/rasdaemon_notices"; -+ char *pre_re = getenv("PAGE_CE_OFFLINE_PRE_NOTICE"); -+ char *post_re = getenv("PAGE_CE_OFFLINE_POST_NOTICE"); -+ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ snprintf(pre_notice, sizeof(pre_notice), "%s/%s", notice_root, pre_re); -+ if (access(pre_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", pre_notice); -+ -+ snprintf(post_notice, sizeof(post_notice), "%s/%s", notice_root, post_re); -+ if (access(post_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", post_notice); -+} -+ - void ras_page_account_init(void) - { - page_offline_init(); - page_isolation_init(); -+ page_notice_init(); -+} -+ -+static void finish_child(pid_t child, int status) -+{ -+ if (WIFEXITED(status) && WEXITSTATUS(status)) { -+ log(TERM, LOG_INFO, "notice exited with status %d\n", WEXITSTATUS(status)); -+ } else if (WIFSIGNALED(status)) { -+ log(TERM, LOG_INFO,"notice died with signal %s\n", strsignal(WTERMSIG(status))); -+ } -+ -+ return; -+} -+ -+static void __run_notice(char *argv[], char **env) -+{ -+ pid_t child; -+ int status; -+ -+ child = fork(); -+ if (child < 0) { -+ log(TERM, LOG_ERR, "Cannot create process for offline notice"); -+ return; -+ } -+ if (child == 0) { -+ execve(argv[0], argv, env); -+ _exit(127); -+ } -+ else { -+ waitpid(child, &status, 0); -+ finish_child(child, status); -+ } -+} -+ -+static void run_notice(char *argv[]) -+{ -+ int MAX_ENV = 20; -+ char *env[MAX_ENV]; -+ int ei = 0; -+ int i; -+ -+ asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin"); -+ env[ei] = NULL; -+ assert(ei < MAX_ENV); -+ -+ __run_notice(argv, env); -+ -+ for (i = 0; i < ei; i++) -+ free(env[i]); - } - - static int do_page_offline(unsigned long long addr, enum otype type) - { - FILE *offline_file; - int err; -+ char *args; -+ char *argv[] = { -+ NULL, -+ NULL, -+ NULL, -+ }; -+ -+ asprintf(&args, "%llu", addr); -+ argv[0] = (char*)&pre_notice; -+ argv[1] = args; -+ run_notice(argv); - - offline_file = fopen(kernel_offline[type], "w"); - if (!offline_file) -@@ -221,6 +305,11 @@ static int do_page_offline(unsigned long long addr, enum otype type) - err = ferror(offline_file) ? -1 : 0; - fclose(offline_file); - -+ argv[0] = (char*)&post_notice; -+ run_notice(argv); -+ -+ free(args); -+ - return err; - } - -@@ -329,4 +418,5 @@ void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) - pr->start = time; - page_record(pr, count, time); - } -+ - } --- -2.27.0 - diff --git a/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch b/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch deleted file mode 100644 index 852eb4f..0000000 --- a/0862a096c3a1d0f993703ab3299f1ddfadf53d7f.patch +++ /dev/null @@ -1,85 +0,0 @@ -commit 0862a096c3a1d0f993703ab3299f1ddfadf53d7f -Author: Shiju Jose -Date: Tue Aug 11 13:31:46 2020 +0100 - - rasdaemon: ras-mc-ctl: Add ARM processor error information - - Add supporting ARM processor error in the ras-mc-ctl tool. - - Signed-off-by: Shiju Jose - Signed-off-by: Mauro Carvalho Chehab - ---- - util/ras-mc-ctl.in | 40 ++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 40 insertions(+) - ---- rasdaemon-0.6.1.orig/util/ras-mc-ctl.in 2021-10-06 14:14:25.000440090 -0400 -+++ rasdaemon-0.6.1/util/ras-mc-ctl.in 2021-10-06 14:15:59.995598590 -0400 -@@ -1124,6 +1124,7 @@ sub summary - my ($query, $query_handle, $out); - my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg); - my ($etype, $severity, $etype_string, $severity_string); -+ my ($affinity, $mpidr); - - my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); - -@@ -1159,6 +1160,22 @@ sub summary - } - $query_handle->finish; - -+ # ARM processor arm_event errors -+ $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($affinity, $mpidr, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count errors\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events summary:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; -+ - # extlog errors - $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; - $query_handle = $dbh->prepare($query); -@@ -1202,6 +1219,7 @@ sub errors - my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); - my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); - my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); -+ my ($error_count, $affinity, $mpidr, $r_state, $psci_state); - - my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); - -@@ -1241,6 +1259,28 @@ sub errors - } - $query_handle->finish; - -+ # ARM processor arm_event errors -+ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "error_count=$error_count, " if ($error_count); -+ $out .= "affinity_level=$affinity, "; -+ $out .= sprintf "mpidr=0x%x, ", $mpidr; -+ $out .= sprintf "running_state=0x%x, ", $r_state; -+ $out .= sprintf "psci_state=0x%x", $psci_state; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; -+ - # Extlog errors - $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; - $query_handle = $dbh->prepare($query); diff --git a/16d929b024c31d54a7f8a72eab094376c7be27f5.patch b/16d929b024c31d54a7f8a72eab094376c7be27f5.patch deleted file mode 100644 index ab66f52..0000000 --- a/16d929b024c31d54a7f8a72eab094376c7be27f5.patch +++ /dev/null @@ -1,32 +0,0 @@ -commit 16d929b024c31d54a7f8a72eab094376c7be27f5 -Author: Mauro Carvalho Chehab -Date: Wed May 26 10:20:39 2021 +0200 - - Makefile.am: fix build header rules - - non-standard-hisilicon.h was added twice; - ras-memory-failure-handler.h is missing. - - Due to that, the tarball becomes incomplete, causing build - errors. - - While here, also adjust .travis.yml to use --enable-all. - - Signed-off-by: Mauro Carvalho Chehab - ---- - Makefile.am | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/Makefile.am 2021-10-13 13:27:53.402685179 -0400 -+++ b/Makefile.am 2021-10-13 13:28:11.664525173 -0400 -@@ -54,7 +54,8 @@ rasdaemon_LDADD = -lpthread $(SQLITE3_LI - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ -- ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h -+ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ -+ ras-memory-failure-handler.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that diff --git a/2290d65b97311dd5736838f1e285355f7f357046.patch b/2290d65b97311dd5736838f1e285355f7f357046.patch deleted file mode 100644 index 0710974..0000000 --- a/2290d65b97311dd5736838f1e285355f7f357046.patch +++ /dev/null @@ -1,538 +0,0 @@ -commit 2290d65b97311dd5736838f1e285355f7f357046 -Author: Shiju Jose -Date: Mon Mar 8 16:57:26 2021 +0000 - - rasdaemon: add support for memory_failure events - - Add support to log the memory_failure kernel trace - events. - - Example rasdaemon log and SQLite DB output for the - memory_failure event, - ================================================= - rasdaemon: memory_failure_event store: 0x126ce8f8 - rasdaemon: register inserted at db - <...>-785 [000] 0.000024: memory_failure_event: 2020-10-02 13:27:13 -0400 pfn=0x204000000 page_type=free buddy page action_result=Delayed - - CREATE TABLE memory_failure_event (id INTEGER PRIMARY KEY, timestamp TEXT, pfn TEXT, page_type TEXT, action_result TEXT); - INSERT INTO memory_failure_event VALUES(1,'2020-10-02 13:27:13 -0400','0x204000000','free buddy page','Delayed'); - ================================================== - - Signed-off-by: Shiju Jose - Signed-off-by: Mauro Carvalho Chehab - ---- - Makefile.am | 4 - ras-events.c | 15 +++ - ras-memory-failure-handler.c | 179 +++++++++++++++++++++++++++++++++++++++++++ - ras-memory-failure-handler.h | 25 ++++++ - ras-record.c | 56 +++++++++++++ - ras-record.h | 13 +++ - ras-report.c | 68 ++++++++++++++++ - ras-report.h | 5 - - 8 files changed, 364 insertions(+), 1 deletion(-) - ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ b/ras-memory-failure-handler.c 2021-10-14 16:31:36.840657728 -0400 -@@ -0,0 +1,179 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+#include -+#include "libtrace/kbuffer.h" -+#include "ras-memory-failure-handler.h" -+#include "ras-record.h" -+#include "ras-logger.h" -+#include "ras-report.h" -+ -+/* Memory failure - various types of pages */ -+enum mf_action_page_type { -+ MF_MSG_KERNEL, -+ MF_MSG_KERNEL_HIGH_ORDER, -+ MF_MSG_SLAB, -+ MF_MSG_DIFFERENT_COMPOUND, -+ MF_MSG_POISONED_HUGE, -+ MF_MSG_HUGE, -+ MF_MSG_FREE_HUGE, -+ MF_MSG_NON_PMD_HUGE, -+ MF_MSG_UNMAP_FAILED, -+ MF_MSG_DIRTY_SWAPCACHE, -+ MF_MSG_CLEAN_SWAPCACHE, -+ MF_MSG_DIRTY_MLOCKED_LRU, -+ MF_MSG_CLEAN_MLOCKED_LRU, -+ MF_MSG_DIRTY_UNEVICTABLE_LRU, -+ MF_MSG_CLEAN_UNEVICTABLE_LRU, -+ MF_MSG_DIRTY_LRU, -+ MF_MSG_CLEAN_LRU, -+ MF_MSG_TRUNCATED_LRU, -+ MF_MSG_BUDDY, -+ MF_MSG_BUDDY_2ND, -+ MF_MSG_DAX, -+ MF_MSG_UNSPLIT_THP, -+ MF_MSG_UNKNOWN, -+}; -+ -+/* Action results for various types of pages */ -+enum mf_action_result { -+ MF_IGNORED, /* Error: cannot be handled */ -+ MF_FAILED, /* Error: handling failed */ -+ MF_DELAYED, /* Will be handled later */ -+ MF_RECOVERED, /* Successfully recovered */ -+}; -+ -+/* memory failure page types */ -+static const struct { -+ int type; -+ const char *page_type; -+} mf_page_type[] = { -+ { MF_MSG_KERNEL, "reserved kernel page" }, -+ { MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"}, -+ { MF_MSG_SLAB, "kernel slab page"}, -+ { MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"}, -+ { MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"}, -+ { MF_MSG_HUGE, "huge page"}, -+ { MF_MSG_FREE_HUGE, "free huge page"}, -+ { MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"}, -+ { MF_MSG_UNMAP_FAILED, "unmapping failed page"}, -+ { MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"}, -+ { MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"}, -+ { MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page"}, -+ { MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page"}, -+ { MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page"}, -+ { MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page"}, -+ { MF_MSG_DIRTY_LRU, "dirty LRU page"}, -+ { MF_MSG_CLEAN_LRU, "clean LRU page"}, -+ { MF_MSG_TRUNCATED_LRU, "already truncated LRU page"}, -+ { MF_MSG_BUDDY, "free buddy page"}, -+ { MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"}, -+ { MF_MSG_DAX, "dax page"}, -+ { MF_MSG_UNSPLIT_THP, "unsplit thp"}, -+ { MF_MSG_UNKNOWN, "unknown page"}, -+}; -+ -+/* memory failure action results */ -+static const struct { -+ int result; -+ const char *action_result; -+} mf_action_result[] = { -+ { MF_IGNORED, "Ignored" }, -+ { MF_FAILED, "Failed" }, -+ { MF_DELAYED, "Delayed" }, -+ { MF_RECOVERED, "Recovered" }, -+}; -+ -+static const char *get_page_type(int page_type) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mf_page_type); i++) -+ if (mf_page_type[i].type == page_type) -+ return mf_page_type[i].page_type; -+ -+ return "unknown page"; -+} -+ -+static const char *get_action_result(int result) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(mf_action_result); i++) -+ if (mf_action_result[i].result == result) -+ return mf_action_result[i].action_result; -+ -+ return "unknown"; -+} -+ -+ -+int ras_memory_failure_event_handler(struct trace_seq *s, -+ struct pevent_record *record, -+ struct event_format *event, void *context) -+{ -+ unsigned long long val; -+ struct ras_events *ras = context; -+ time_t now; -+ struct tm *tm; -+ struct ras_mf_event ev; -+ -+ /* -+ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. -+ * On previous kernels, the way to properly generate an event would -+ * be to inject a fake one, measure its timestamp and diff it against -+ * gettimeofday. We won't do it here. Instead, let's use uptime, -+ * falling-back to the event report's time, if "uptime" clock is -+ * not available (legacy kernels). -+ */ -+ -+ if (ras->use_uptime) -+ now = record->ts/user_hz + ras->uptime_diff; -+ else -+ now = time(NULL); -+ -+ tm = localtime(&now); -+ if (tm) -+ strftime(ev.timestamp, sizeof(ev.timestamp), -+ "%Y-%m-%d %H:%M:%S %z", tm); -+ trace_seq_printf(s, "%s ", ev.timestamp); -+ -+ if (pevent_get_field_val(s, event, "pfn", record, &val, 1) < 0) -+ return -1; -+ sprintf(ev.pfn, "0x%llx", val); -+ trace_seq_printf(s, "pfn=0x%llx ", val); -+ -+ if (pevent_get_field_val(s, event, "type", record, &val, 1) < 0) -+ return -1; -+ ev.page_type = get_page_type(val); -+ trace_seq_printf(s, "page_type=%s ", ev.page_type); -+ -+ if (pevent_get_field_val(s, event, "result", record, &val, 1) < 0) -+ return -1; -+ ev.action_result = get_action_result(val); -+ trace_seq_printf(s, "action_result=%s ", ev.action_result); -+ -+ /* Store data into the SQLite DB */ -+#ifdef HAVE_SQLITE3 -+ ras_store_mf_event(ras, &ev); -+#endif -+ -+#ifdef HAVE_ABRT_REPORT -+ /* Report event to ABRT */ -+ ras_report_mf_event(ras, &ev); -+#endif -+ -+ return 0; -+} ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ b/ras-memory-failure-handler.h 2021-10-14 16:31:36.840657728 -0400 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#ifndef __RAS_MEMORY_FAILURE_HANDLER_H -+#define __RAS_MEMORY_FAILURE_HANDLER_H -+ -+#include "ras-events.h" -+#include "libtrace/event-parse.h" -+ -+int ras_memory_failure_event_handler(struct trace_seq *s, -+ struct pevent_record *record, -+ struct event_format *event, void *context); -+ -+#endif ---- a/ras-record.c 2018-04-25 06:19:03.000000000 -0400 -+++ b/ras-record.c 2021-10-14 16:31:36.840657728 -0400 -@@ -404,6 +404,55 @@ sqlite3_bind_text(priv->stmt_mce_record, - } - #endif - -+/* -+ * Table and functions to handle ras:memory_failure -+ */ -+ -+#ifdef HAVE_MEMORY_FAILURE -+static const struct db_fields mf_event_fields[] = { -+ { .name="id", .type="INTEGER PRIMARY KEY" }, -+ { .name="timestamp", .type="TEXT" }, -+ { .name="pfn", .type="TEXT" }, -+ { .name="page_type", .type="TEXT" }, -+ { .name="action_result", .type="TEXT" }, -+}; -+ -+static const struct db_table_descriptor mf_event_tab = { -+ .name = "memory_failure_event", -+ .fields = mf_event_fields, -+ .num_fields = ARRAY_SIZE(mf_event_fields), -+}; -+ -+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) -+{ -+ int rc; -+ struct sqlite3_priv *priv = ras->db_priv; -+ -+ if (!priv || !priv->stmt_mf_event) -+ return 0; -+ log(TERM, LOG_INFO, "memory_failure_event store: %p\n", priv->stmt_mf_event); -+ -+ sqlite3_bind_text(priv->stmt_mf_event, 1, ev->timestamp, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 2, ev->pfn, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 3, ev->page_type, -1, NULL); -+ sqlite3_bind_text(priv->stmt_mf_event, 4, ev->action_result, -1, NULL); -+ -+ rc = sqlite3_step(priv->stmt_mf_event); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed to do memory_failure_event step on sqlite: error = %d\n", rc); -+ -+ rc = sqlite3_reset(priv->stmt_mf_event); -+ if (rc != SQLITE_OK && rc != SQLITE_DONE) -+ log(TERM, LOG_ERR, -+ "Failed reset memory_failure_event on sqlite: error = %d\n", -+ rc); -+ -+ log(TERM, LOG_INFO, "register inserted at db\n"); -+ -+ return rc; -+} -+#endif - - /* - * Generic code -@@ -567,6 +616,13 @@ usleep(10000); - rc = ras_mc_prepare_stmt(priv, &priv->stmt_arm_record, - &arm_event_tab); - #endif -+#ifdef HAVE_MEMORY_FAILURE -+ rc = ras_mc_create_table(priv, &mf_event_tab); -+ if (rc == SQLITE_OK) { -+ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mf_event, -+ &mf_event_tab); -+ } -+#endif - - ras->db_priv = priv; - return 0; ---- a/ras-record.h 2018-04-25 06:19:03.000000000 -0400 -+++ b/ras-record.h 2021-10-14 16:31:36.840657728 -0400 -@@ -75,12 +75,20 @@ struct ras_arm_event { - int32_t psci_state; - }; - -+struct ras_mf_event { -+ char timestamp[64]; -+ char pfn[30]; -+ const char *page_type; -+ const char *action_result; -+}; -+ - struct ras_mc_event; - struct ras_aer_event; - struct ras_extlog_event; - struct ras_non_standard_event; - struct ras_arm_event; - struct mce_event; -+struct ras_mf_event; - - #ifdef HAVE_SQLITE3 - -@@ -104,6 +112,9 @@ struct sqlite3_priv { - #ifdef HAVE_ARM - sqlite3_stmt *stmt_arm_record; - #endif -+#ifdef HAVE_MEMORY_FAILURE -+ sqlite3_stmt *stmt_mf_event; -+#endif - }; - - int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras); -@@ -113,6 +124,7 @@ int ras_store_mce_record(struct ras_even - int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev); - int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev); - int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); -+int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev); - - #else - static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; -@@ -122,6 +134,7 @@ static inline int ras_store_mce_record(s - static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; }; - static inline int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; - static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; -+static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; - - #endif - ---- a/ras-report.c 2017-10-14 05:11:34.000000000 -0400 -+++ b/ras-report.c 2021-10-14 16:31:36.840657728 -0400 -@@ -255,6 +255,28 @@ "midr=0x%lx\n" \ - return 0; - } - -+static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev) -+{ -+ char bt_buf[MAX_BACKTRACE_SIZE]; -+ -+ if (!buf || !ev) -+ return -1; -+ -+ sprintf(bt_buf, "BACKTRACE=" \ -+ "timestamp=%s\n" \ -+ "pfn=%s\n" \ -+ "page_type=%s\n" \ -+ "action_result=%s\n", \ -+ ev->timestamp, \ -+ ev->pfn, \ -+ ev->page_type, \ -+ ev->action_result); -+ -+ strcat(buf, bt_buf); -+ -+ return 0; -+} -+ - static int commit_report_backtrace(int sockfd, int type, void *ev){ - char buf[MAX_BACKTRACE_SIZE]; - char *pbuf = buf; -@@ -283,6 +305,9 @@ memset(buf, 0, MAX_BACKTRACE_SIZE); - case ARM_EVENT: - rc = set_arm_event_backtrace(buf, (struct ras_arm_event *)ev); - break; -+ case MF_EVENT: -+ rc = set_mf_event_backtrace(buf, (struct ras_mf_event *)ev); -+ break; - default: - return -1; - } -@@ -549,3 +574,46 @@ return 0; - return -1; - } - } -+ -+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) -+{ -+ char buf[MAX_MESSAGE_SIZE]; -+ int sockfd = 0; -+ int done = 0; -+ int rc = -1; -+ -+ memset(buf, 0, sizeof(buf)); -+ -+ sockfd = setup_report_socket(); -+ if (sockfd < 0) -+ return -1; -+ -+ rc = commit_report_basic(sockfd); -+ if (rc < 0) -+ goto mf_fail; -+ -+ rc = commit_report_backtrace(sockfd, MF_EVENT, ev); -+ if (rc < 0) -+ goto mf_fail; -+ -+ sprintf(buf, "ANALYZER=%s", "rasdaemon-memory_failure"); -+ rc = write(sockfd, buf, strlen(buf) + 1); -+ if (rc < strlen(buf) + 1) -+ goto mf_fail; -+ -+ sprintf(buf, "REASON=%s", "memory failure problem"); -+ rc = write(sockfd, buf, strlen(buf) + 1); -+ if (rc < strlen(buf) + 1) -+ goto mf_fail; -+ -+ done = 1; -+ -+mf_fail: -+ if (sockfd > 0) -+ close(sockfd); -+ -+ if (done) -+ return 0; -+ else -+ return -1; -+} ---- a/ras-report.h 2017-10-14 05:11:34.000000000 -0400 -+++ b/ras-report.h 2021-10-14 16:31:36.840657728 -0400 -@@ -34,7 +34,8 @@ enum { - MCE_EVENT, - AER_EVENT, - NON_STANDARD_EVENT, -- ARM_EVENT -+ ARM_EVENT, -+ MF_EVENT, - }; - - #ifdef HAVE_ABRT_REPORT -@@ -44,6 +45,7 @@ int ras_report_aer_event(struct ras_even - int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev); - int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev); - int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); -+int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev); - - #else - -@@ -52,6 +54,7 @@ static inline int ras_report_aer_event(s - static inline int ras_report_mce_event(struct ras_events *ras, struct mce_event *ev) { return 0; }; - static inline int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standard_event *ev) { return 0; }; - static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; -+static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; - - #endif - ---- a/Makefile.am 2018-04-25 06:21:56.000000000 -0400 -+++ b/Makefile.am 2021-10-14 16:37:42.423639762 -0400 -@@ -41,12 +41,16 @@ endif - if WITH_EXTLOG - rasdaemon_SOURCES += ras-extlog-handler.c - endif -+if WITH_MEMORY_FAILURE -+ rasdaemon_SOURCES += ras-memory-failure-handler.c -+endif - if WITH_ABRT_REPORT - rasdaemon_SOURCES += ras-report.c - endif - if WITH_HISI_NS_DECODE - rasdaemon_SOURCES += non-standard-hisi_hip07.c - endif -+ - rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ---- a/ras-events.c 2021-10-14 16:31:36.730658636 -0400 -+++ b/ras-events.c 2021-10-14 16:37:11.043898809 -0400 -@@ -33,6 +33,7 @@ * Foundation, Inc., 51 Franklin Street, - #include "ras-arm-handler.h" - #include "ras-mce-handler.h" - #include "ras-extlog-handler.h" -+#include "ras-memory-failure-handler.h" - #include "ras-record.h" - #include "ras-logger.h" - -@@ -218,6 +219,10 @@ if (rc < 0) { - rc |= __toggle_ras_mc_event(ras, "ras", "arm_event", enable); - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); -+#endif -+ - free_ras: - free(ras); - return rc; -@@ -736,6 +741,16 @@ (void)open("/sys/kernel/debug/ras/daemon - "ras", "aer_event"); - #endif - -+#ifdef HAVE_MEMORY_FAILURE -+ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", -+ ras_memory_failure_event_handler); -+ if (!rc) -+ num_events++; -+ else -+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", -+ "ras", "memory_failure_event"); -+#endif -+ - if (!num_events) { - log(ALL, LOG_INFO, - "Failed to trace all supported RAS events. Aborting.\n"); diff --git a/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch b/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch deleted file mode 100644 index fdc509b..0000000 --- a/28ea956acc2dab7c18b4701f9657afb9ab3ddc79.patch +++ /dev/null @@ -1,28 +0,0 @@ -commit 28ea956acc2dab7c18b4701f9657afb9ab3ddc79 -Author: Muralidhara M K -Date: Mon Jul 12 05:18:43 2021 -0500 - - rasdaemon: set SMCA maximum number of banks to 64 - - Newer AMD systems with SMCA banks support up to 64 MCA banks per CPU. - - This patch is based on the commit below upstremed into the kernel: - a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64") - - Signed-off-by: Muralidhara M K - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index e0cf512..3c346f4 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -75,6 +75,9 @@ enum smca_bank_types { - N_SMCA_BANK_TYPES - }; - -+/* Maximum number of MCA banks per CPU. */ -+#define MAX_NR_BANKS 64 -+ - /* SMCA Extended error strings */ - /* Load Store */ - static const char * const smca_ls_mce_desc[] = { diff --git a/2a1d217660351c08eb2f8bccebf939abba2f7e69.patch b/2a1d217660351c08eb2f8bccebf939abba2f7e69.patch deleted file mode 100644 index 1b5844d..0000000 --- a/2a1d217660351c08eb2f8bccebf939abba2f7e69.patch +++ /dev/null @@ -1,66 +0,0 @@ -commit 2a1d217660351c08eb2f8bccebf939abba2f7e69 -Author: Brian WoodsGhannam, Yazen -Date: Fri Nov 1 15:48:13 2019 +0100 - - rasdaemon: rename CPU_NAPLES cputype - - Change CPU_NAPLES to CPU_AMD_SMCA to reflect that it isn't just NAPLES - that is supported, but AMD's Scalable Machine Check Architecture (SMCA). - - [ Yazen: change family check to feature check, and change CPU name. ] - - CC: "mchehab+samsung@kernel.org" , "Namburu, Chandu-babu" # Thread-Topic: [PATCH 1/2] rasdaemon: rename CPU_NAPLES cputype - Signed-off-by: Brian Woods - Signed-off-by: Yazen Ghannam - Cc: Chandu-babu Namburu - Signed-off-by: Mauro Carvalho Chehab - ---- - ras-mce-handler.c | 10 ++++++---- - ras-mce-handler.h | 2 +- - 2 files changed, 7 insertions(+), 5 deletions(-) - ---- rasdaemon-0.6.1.orig/ras-mce-handler.c 2021-05-26 15:16:24.699096556 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.c 2021-05-26 15:18:06.543162745 -0400 -@@ -55,7 +55,7 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series - [CPU_KNIGHTS_LANDING] = "Knights Landing", - [CPU_KNIGHTS_MILL] = "Knights Mill", - [CPU_SKYLAKE_XEON] = "Skylake server", -- [CPU_NAPLES] = "AMD Family 17h Zen1" -+ [CPU_AMD_SMCA] = "AMD Scalable MCA", - }; - - static enum cputype select_intel_cputype(struct ras_events *ras) -@@ -191,8 +191,10 @@ ret = 0; - if (!strcmp(mce->vendor, "AuthenticAMD")) { - if (mce->family == 15) - mce->cputype = CPU_K8; -- if (mce->family == 23) -- mce->cputype = CPU_NAPLES; -+ if (strstr(mce->processor_flags, "smca")) { -+ mce->cputype = CPU_AMD_SMCA; -+ goto ret; -+ } - if (mce->family > 23) { - log(ALL, LOG_INFO, - "Can't parse MCE for this AMD CPU yet %d\n", -@@ -435,7 +437,7 @@ if (pevent_get_field_val(s, event, "ipid - case CPU_K8: - rc = parse_amd_k8_event(ras, &e); - break; -- case CPU_NAPLES: -+ case CPU_AMD_SMCA: - rc = parse_amd_smca_event(ras, &e); - break; - default: /* All other CPU types are Intel */ ---- rasdaemon-0.6.1.orig/ras-mce-handler.h 2021-05-26 15:17:15.409631590 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.h 2021-05-26 15:18:20.102038424 -0400 -@@ -50,7 +50,7 @@ enum cputype { - CPU_KNIGHTS_LANDING, - CPU_KNIGHTS_MILL, - CPU_SKYLAKE_XEON, -- CPU_NAPLES, -+ CPU_AMD_SMCA, - }; - - struct mce_event { diff --git a/546cf713f667437fb6e283cc3dc090679eb47d08.patch b/546cf713f667437fb6e283cc3dc090679eb47d08.patch deleted file mode 100644 index 448b1f6..0000000 --- a/546cf713f667437fb6e283cc3dc090679eb47d08.patch +++ /dev/null @@ -1,372 +0,0 @@ -commit 546cf713f667437fb6e283cc3dc090679eb47d08 -Author: Subhendu Saha -Date: Tue Jan 12 03:29:55 2021 -0500 - - Fix ras-mc-ctl script. - - When rasdaemon is compiled without enabling aer, mce, devlink, - etc., those tables are not created in the database file. Then - ras-mc-ctl script breaks trying to query data from non-existent - tables. - - Signed-off-by: Subhendu Saha subhends@akamai.com - Signed-off-by: Mauro Carvalho Chehab - ---- - util/ras-mc-ctl.in | 310 ++++++++++++++++++++++++++++------------------------- - 1 file changed, 168 insertions(+), 142 deletions(-) - ---- a/util/ras-mc-ctl.in 2021-10-12 13:45:43.260646935 -0400 -+++ b/util/ras-mc-ctl.in 2021-10-12 13:46:38.610158949 -0400 -@@ -41,6 +41,16 @@ my $sysconfdir = "@sysconfdir@"; - my $dmidecode = find_prog ("dmidecode"); - my $modprobe = find_prog ("modprobe") or exit (1); - -+my $has_aer = 0; -+my $has_arm = 0; -+my $has_extlog = 0; -+my $has_mce = 0; -+ -+@WITH_AER_TRUE@$has_aer = 1; -+@WITH_ARM_TRUE@$has_arm = 1; -+@WITH_EXTLOG_TRUE@$has_extlog = 1; -+@WITH_MCE_TRUE@$has_mce = 1; -+ - my %conf = (); - my %bus = (); - my %dimm_size = (); -@@ -1145,70 +1155,78 @@ sub summary - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($err_type, $msg, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count $err_type errors: $msg\n"; -- } -- if ($out ne "") { -- print "PCIe AER events summary:\n$out\n"; -- } else { -- print "No PCIe AER errors.\n\n"; -+ if ($has_aer == 1) { -+ $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($err_type, $msg, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count $err_type errors: $msg\n"; -+ } -+ if ($out ne "") { -+ print "PCIe AER events summary:\n$out\n"; -+ } else { -+ print "No PCIe AER errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # ARM processor arm_event errors -- $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($affinity, $mpidr, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count errors\n"; -- } -- if ($out ne "") { -- print "ARM processor events summary:\n$out\n"; -- } else { -- print "No ARM processor errors.\n\n"; -+ if ($has_arm == 1) { -+ $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($affinity, $mpidr, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count errors\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events summary:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # extlog errors -- $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($etype, $severity, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $etype_string = get_extlog_type($etype); -- $severity_string = get_extlog_severity($severity); -- $out .= "\t$count $etype_string $severity_string errors\n"; -- } -- if ($out ne "") { -- print "Extlog records summary:\n$out"; -- } else { -- print "No Extlog errors.\n"; -+ if ($has_extlog == 1) { -+ $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($etype, $severity, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $etype_string = get_extlog_type($etype); -+ $severity_string = get_extlog_severity($severity); -+ $out .= "\t$count $etype_string $severity_string errors\n"; -+ } -+ if ($out ne "") { -+ print "Extlog records summary:\n$out"; -+ } else { -+ print "No Extlog errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # MCE mce_record errors -- $query = "select error_msg, count(*) from mce_record group by error_msg"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($msg, $count)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "\t$count $msg errors\n"; -- } -- if ($out ne "") { -- print "MCE records summary:\n$out"; -- } else { -- print "No MCE errors.\n"; -+ if ($has_mce == 1) { -+ $query = "select error_msg, count(*) from mce_record group by error_msg"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($msg, $count)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "\t$count $msg errors\n"; -+ } -+ if ($out ne "") { -+ print "MCE records summary:\n$out"; -+ } else { -+ print "No MCE errors.\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - undef($dbh); - } -@@ -1244,105 +1262,113 @@ sub errors - $query_handle->finish; - - # PCIe AER aer_event errors -- $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $type, $msg)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $time $type error: $msg\n"; -- } -- if ($out ne "") { -- print "PCIe AER events:\n$out\n"; -- } else { -- print "No PCIe AER errors.\n\n"; -+ if ($has_aer == 1) { -+ $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $time, $type, $msg)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $time $type error: $msg\n"; -+ } -+ if ($out ne "") { -+ print "PCIe AER events:\n$out\n"; -+ } else { -+ print "No PCIe AER errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # ARM processor arm_event errors -- $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $timestamp error: "; -- $out .= "error_count=$error_count, " if ($error_count); -- $out .= "affinity_level=$affinity, "; -- $out .= sprintf "mpidr=0x%x, ", $mpidr; -- $out .= sprintf "running_state=0x%x, ", $r_state; -- $out .= sprintf "psci_state=0x%x", $psci_state; -- $out .= "\n"; -- } -- if ($out ne "") { -- print "ARM processor events:\n$out\n"; -- } else { -- print "No ARM processor errors.\n\n"; -+ if ($has_arm == 1) { -+ $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $timestamp error: "; -+ $out .= "error_count=$error_count, " if ($error_count); -+ $out .= "affinity_level=$affinity, "; -+ $out .= sprintf "mpidr=0x%x, ", $mpidr; -+ $out .= sprintf "running_state=0x%x, ", $r_state; -+ $out .= sprintf "psci_state=0x%x", $psci_state; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "ARM processor events:\n$out\n"; -+ } else { -+ print "No ARM processor errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # Extlog errors -- $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); -- $out = ""; -- while($query_handle->fetch()) { -- $etype_string = get_extlog_type($etype); -- $severity_string = get_extlog_severity($severity); -- $out .= "$id $timestamp error: "; -- $out .= "type=$etype_string, "; -- $out .= "severity=$severity_string, "; -- $out .= sprintf "address=0x%08x, ", $addr; -- $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); -- $out .= "fru_text='$fru_text', "; -- $out .= get_cper_data_text($cper_data) if ($cper_data); -- $out .= "\n"; -- } -- if ($out ne "") { -- print "Extlog events:\n$out\n"; -- } else { -- print "No Extlog errors.\n\n"; -+ if ($has_extlog) { -+ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $etype_string = get_extlog_type($etype); -+ $severity_string = get_extlog_severity($severity); -+ $out .= "$id $timestamp error: "; -+ $out .= "type=$etype_string, "; -+ $out .= "severity=$severity_string, "; -+ $out .= sprintf "address=0x%08x, ", $addr; -+ $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); -+ $out .= "fru_text='$fru_text', "; -+ $out .= get_cper_data_text($cper_data) if ($cper_data); -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "Extlog events:\n$out\n"; -+ } else { -+ print "No Extlog errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - # MCE mce_record errors -- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; -- $query_handle = $dbh->prepare($query); -- $query_handle->execute(); -- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); -- $out = ""; -- while($query_handle->fetch()) { -- $out .= "$id $time error: $msg"; -- $out .= ", CPU $cpuvendor" if ($cpuvendor); -- $out .= ", bank $bank_name" if ($bank_name); -- $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); -- $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); -- $out .= ", $mc_location" if ($mc_location); -- $out .= ", $user_action" if ($user_action); -- $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); -- $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); -- $out .= sprintf ", status=0x%08x", $status if ($status); -- $out .= sprintf ", addr=0x%08x", $addr if ($addr); -- $out .= sprintf ", misc=0x%08x", $misc if ($misc); -- $out .= sprintf ", ip=0x%08x", $ip if ($ip); -- $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); -- $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); -- $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); -- $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); -- $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); -- $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); -- $out .= sprintf ", cs=0x%08x", $cs if ($cs); -- $out .= sprintf ", bank=0x%08x", $bank if ($bank); -+ if ($has_mce == 1) { -+ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; -+ $query_handle = $dbh->prepare($query); -+ $query_handle->execute(); -+ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); -+ $out = ""; -+ while($query_handle->fetch()) { -+ $out .= "$id $time error: $msg"; -+ $out .= ", CPU $cpuvendor" if ($cpuvendor); -+ $out .= ", bank $bank_name" if ($bank_name); -+ $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); -+ $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); -+ $out .= ", $mc_location" if ($mc_location); -+ $out .= ", $user_action" if ($user_action); -+ $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); -+ $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); -+ $out .= sprintf ", status=0x%08x", $status if ($status); -+ $out .= sprintf ", addr=0x%08x", $addr if ($addr); -+ $out .= sprintf ", misc=0x%08x", $misc if ($misc); -+ $out .= sprintf ", ip=0x%08x", $ip if ($ip); -+ $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); -+ $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); -+ $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); -+ $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); -+ $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); -+ $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); -+ $out .= sprintf ", cs=0x%08x", $cs if ($cs); -+ $out .= sprintf ", bank=0x%08x", $bank if ($bank); - -- $out .= "\n"; -- } -- if ($out ne "") { -- print "MCE events:\n$out\n"; -- } else { -- print "No MCE errors.\n\n"; -+ $out .= "\n"; -+ } -+ if ($out ne "") { -+ print "MCE events:\n$out\n"; -+ } else { -+ print "No MCE errors.\n\n"; -+ } -+ $query_handle->finish; - } -- $query_handle->finish; - - undef($dbh); - } diff --git a/60a91e4da4f2daf2b10143fc148a8043312b61e5.patch b/60a91e4da4f2daf2b10143fc148a8043312b61e5.patch deleted file mode 100644 index 57a4e46..0000000 --- a/60a91e4da4f2daf2b10143fc148a8043312b61e5.patch +++ /dev/null @@ -1,149 +0,0 @@ -commit 60a91e4da4f2daf2b10143fc148a8043312b61e5 -Author: Aristeu Rozanski -Date: Wed Aug 1 16:29:58 2018 -0400 - - rasdaemon: ras-mc-ctl: add option to show error counts - - In some scenarios it might not be desirable to have a daemon running - to parse and store the errors provided by EDAC and only having the - number of CEs and UEs is enough. This patch implements this feature - as an ras-mc-ctl option. - - Signed-off-by: Aristeu Rozanski - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in -index 38b7824..aee431a 100755 ---- a/util/ras-mc-ctl.in -+++ b/util/ras-mc-ctl.in -@@ -50,6 +50,8 @@ my %dimm_location = (); - my %csrow_size = (); - my %rank_size = (); - my %csrow_ranks = (); -+my %dimm_ce_count = (); -+my %dimm_ue_count = (); - - my @layers; - my @max_pos; -@@ -76,6 +78,7 @@ Usage: $prog [OPTIONS...] - --layout Display the memory layout. - --summary Presents a summary of the logged errors. - --errors Shows the errors stored at the error database. -+ --error-count Shows the corrected and uncorrected error counts using sysfs. - --help This help message. - EOF - -@@ -83,7 +86,7 @@ parse_cmdline(); - - if ( $conf{opt}{mainboard} || $conf{opt}{print_labels} - || $conf{opt}{register_labels} || $conf{opt}{display_memory_layout} -- || $conf{opt}{guess_dimm_label}) { -+ || $conf{opt}{guess_dimm_label} || $conf{opt}{error_count}) { - - get_mainboard_info(); - -@@ -105,6 +108,9 @@ if ( $conf{opt}{mainboard} || $conf{opt}{print_labels} - if ($conf{opt}{guess_dimm_label}) { - guess_dimm_label (); - } -+ if ($conf{opt}{error_count}) { -+ display_error_count (); -+ } - } - - if ($conf{opt}{status}) { -@@ -134,6 +140,7 @@ sub parse_cmdline - $conf{opt}{guess_dimm_label} = 0; - $conf{opt}{summary} = 0; - $conf{opt}{errors} = 0; -+ $conf{opt}{error_count} = 0; - - my $rref = \$conf{opt}{report}; - my $mref = \$conf{opt}{mainboard}; -@@ -150,7 +157,8 @@ sub parse_cmdline - "status" => \$conf{opt}{status}, - "layout" => \$conf{opt}{display_memory_layout}, - "summary" => \$conf{opt}{summary}, -- "errors" => \$conf{opt}{errors} -+ "errors" => \$conf{opt}{errors}, -+ "error-count" => \$conf{opt}{error_count} - ); - - usage(1) if !$rc; -@@ -284,6 +292,30 @@ sub parse_dimm_nodes - $dimm_label_file{$str_loc} = $file; - $dimm_location{$str_loc} = $location; - -+ my $count; -+ -+ $file =~s/dimm_label/dimm_ce_count/; -+ if (-e $file) { -+ open IN, $file; -+ chomp($count = ); -+ close IN; -+ } else { -+ log_error ("dimm_ce_count not found in sysfs. Old kernel?\n"); -+ exit -1; -+ } -+ $dimm_ce_count{$str_loc} = $count; -+ -+ $file =~s/dimm_ce_count/dimm_ue_count/; -+ if (-e $file) { -+ open IN, $file; -+ chomp($count = ); -+ close IN; -+ } else { -+ log_error ("dimm_ue_count not found in sysfs. Old kernel?\n"); -+ exit -1; -+ } -+ $dimm_ue_count{$str_loc} = $count; -+ - return; - } - } -@@ -906,6 +938,45 @@ sub display_memory_layout - dimm_display_mem(); - } - -+sub display_error_count -+{ -+ my $sysfs_dir = "/sys/devices/system/edac/mc"; -+ my $key; -+ my $max_width = 0; -+ my %dimm_labels = (); -+ -+ find ({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); -+ -+ if (!scalar(keys %dimm_node)) { -+ log_error ("No DIMMs found in /sys or new sysfs EDAC interface not found.\n"); -+ exit -1; -+ } -+ -+ foreach $key (keys %dimm_node) { -+ my $label_width; -+ -+ open IN, $dimm_label_file{$key}; -+ chomp(my $label = ); -+ close IN; -+ $label_width = length $label; -+ -+ if ($label_width > $max_width) { -+ $max_width = $label_width; -+ } -+ $dimm_labels{$key} = $label; -+ } -+ my $string = "Label"; -+ $string .= " " x ($max_width - length $string); -+ print($string . "\tCE\tUE\n"); -+ -+ foreach $key (keys %dimm_node) { -+ my $ce_count = $dimm_ce_count{$key}; -+ my $ue_count = $dimm_ue_count{$key}; -+ -+ print("$dimm_labels{$key}\t$ce_count\t$ue_count\n"); -+ } -+} -+ - sub find_prog - { - my ($file) = @_; diff --git a/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch b/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch deleted file mode 100644 index 76afc8e..0000000 --- a/7937f0d6c2aaaed096f3a3d306416743c0dcb7a4.patch +++ /dev/null @@ -1,24 +0,0 @@ -commit 7937f0d6c2aaaed096f3a3d306416743c0dcb7a4 -Author: Muralidhara M K -Date: Wed Jul 28 01:52:12 2021 -0500 - - rasdaemon: Support MCE for AMD CPU family 19h - - Add support for family 19h x86 CPUs from AMD. - - Signed-off-by: Muralidhara M K - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/ras-mce-handler.c b/ras-mce-handler.c -index 805004a..f2b53d4 100644 ---- a/ras-mce-handler.c -+++ b/ras-mce-handler.c -@@ -208,7 +208,7 @@ static int detect_cpu(struct ras_events *ras) - mce->cputype = CPU_AMD_SMCA; - goto ret; - } -- if (mce->family > 23) { -+ if (mce->family > 25) { - log(ALL, LOG_INFO, - "Can't parse MCE for this AMD CPU yet %d\n", - mce->family); diff --git a/854364ba44aee9bc5646f6537fc744b0b54aff37.patch b/854364ba44aee9bc5646f6537fc744b0b54aff37.patch deleted file mode 100644 index 91bad1b..0000000 --- a/854364ba44aee9bc5646f6537fc744b0b54aff37.patch +++ /dev/null @@ -1,38 +0,0 @@ -commit 854364ba44aee9bc5646f6537fc744b0b54aff37 -Author: Muralidhara M K -Date: Thu Aug 20 21:00:57 2020 +0530 - - rasdaemon: Add 8 channel decoding for SMCA systems - - Current Scalable Machine Check Architecture (SMCA) systems support up - to 8 UMC channels. - - To find the UMC channel represented by a bank, look at the 6th nibble - in the MCA_IPID[InstanceId] field. - - Signed-off-by: Muralidhara M K - [ Adjust commit message. ] - Signed-off-by: Yazen Ghannam - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index d0b6cb6..7c619fd 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -438,15 +438,7 @@ static void amd_decode_errcode(struct mce_event *e) - */ - static int find_umc_channel(struct mce_event *e) - { -- uint32_t umc_instance_id[] = {0x50f00, 0x150f00}; -- uint32_t instance_id = EXTRACT(e->ipid, 0, 31); -- int i, channel = -1; -- -- for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++) -- if (umc_instance_id[i] == instance_id) -- channel = i; -- -- return channel; -+ return EXTRACT(e->ipid, 0, 31) >> 20; - } - /* Decode extended errors according to Scalable MCA specification */ - static void decode_smca_error(struct mce_event *e) diff --git a/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch b/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch deleted file mode 100644 index e3617fc..0000000 --- a/8704a85d8dc3483423ec2934fee8132f85f8fdb6.patch +++ /dev/null @@ -1,207 +0,0 @@ -commit 8704a85d8dc3483423ec2934fee8132f85f8fdb6 -Author: Brian WoodsGhannam, Yazen -Date: Fri Nov 1 15:48:14 2019 +0100 - - rasdaemon: add support for new AMD SMCA bank types - - Going forward, the Scalable Machine Check Architecture (SMCA) has some - updated and additional bank types which show up in Zen2. The differing - bank types include: CS_V2, PSP_V2, SMU_V2, MP5, NBIO, and PCIE. The V2 - bank types replace the original bank types but have unique HWID/MCAtype - IDs from the originals so there's no conflicts between different - versions or other bank types. All of the differing bank types have new - MCE descriptions which have been added as well. - - CC: "mchehab+samsung@kernel.org" , "Namburu, Chandu-babu" # Thread-Topic: [PATCH 2/2] rasdaemon: add support for new AMD SMCA bank types - Signed-off-by: Brian Woods - Signed-off-by: Yazen Ghannam - Cc: Chandu-babu Namburu - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index 6c3e8a5..114e786 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -49,11 +49,17 @@ enum smca_bank_types { - SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 Cache */ - SMCA_CS, /* Coherent Slave */ -+ SMCA_CS_V2, /* Coherent Slave V2 */ - SMCA_PIE, /* Power, Interrupts, etc. */ - SMCA_UMC, /* Unified Memory Controller */ - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ -+ SMCA_PSP_V2, /* Platform Security Processor V2 */ - SMCA_SMU, /* System Management Unit */ -+ SMCA_SMU_V2, /* System Management Unit V2 */ -+ SMCA_MP5, /* Microprocessor 5 Unit */ -+ SMCA_NBIO, /* Northbridge IO Unit */ -+ SMCA_PCIE, /* PCI Express Unit */ - N_SMCA_BANK_TYPES - }; - -@@ -165,6 +171,23 @@ static const char * const smca_cs_mce_desc[] = { - "Atomic request parity", - "ECC error on probe filter access", - }; -+/* Coherent Slave Unit V2 */ -+static const char * const smca_cs2_mce_desc[] = { -+ "Illegal Request", -+ "Address Violation", -+ "Security Violation", -+ "Illegal Response", -+ "Unexpected Response", -+ "Request or Probe Parity Error", -+ "Read Response Parity Error", -+ "Atomic Request Parity Error", -+ "SDP read response had no match in the CS queue", -+ "Probe Filter Protocol Error", -+ "Probe Filter ECC Error", -+ "SDP read response had an unexpected RETRY error", -+ "Counter overflow error", -+ "Counter underflow error", -+}; - /* Power, Interrupt, etc.. */ - static const char * const smca_pie_mce_desc[] = { - "HW assert", -@@ -189,10 +212,75 @@ static const char * const smca_pb_mce_desc[] = { - static const char * const smca_psp_mce_desc[] = { - "PSP RAM ECC or parity error", - }; -+/* Platform Security Processor V2 */ -+static const char * const smca_psp2_mce_desc[] = { -+ "High SRAM ECC or parity error", -+ "Low SRAM ECC or parity error", -+ "Instruction Cache Bank 0 ECC or parity error", -+ "Instruction Cache Bank 1 ECC or parity error", -+ "Instruction Tag Ram 0 parity error", -+ "Instruction Tag Ram 1 parity error", -+ "Data Cache Bank 0 ECC or parity error", -+ "Data Cache Bank 1 ECC or parity error", -+ "Data Cache Bank 2 ECC or parity error", -+ "Data Cache Bank 3 ECC or parity error", -+ "Data Tag Bank 0 parity error", -+ "Data Tag Bank 1 parity error", -+ "Data Tag Bank 2 parity error", -+ "Data Tag Bank 3 parity error", -+ "Dirty Data Ram parity error", -+ "TLB Bank 0 parity error", -+ "TLB Bank 1 parity error", -+ "System Hub Read Buffer ECC or parity error", -+}; - /* System Management Unit */ - static const char * const smca_smu_mce_desc[] = { - "SMU RAM ECC or parity error", - }; -+/* System Management Unit V2 */ -+static const char * const smca_smu2_mce_desc[] = { -+ "High SRAM ECC or parity error", -+ "Low SRAM ECC or parity error", -+ "Data Cache Bank A ECC or parity error", -+ "Data Cache Bank B ECC or parity error", -+ "Data Tag Cache Bank A ECC or parity error", -+ "Data Tag Cache Bank B ECC or parity error", -+ "Instruction Cache Bank A ECC or parity error", -+ "Instruction Cache Bank B ECC or parity error", -+ "Instruction Tag Cache Bank A ECC or parity error", -+ "Instruction Tag Cache Bank B ECC or parity error", -+ "System Hub Read Buffer ECC or parity error", -+}; -+/* Microprocessor 5 Unit */ -+static const char * const smca_mp5_mce_desc[] = { -+ "High SRAM ECC or parity error", -+ "Low SRAM ECC or parity error", -+ "Data Cache Bank A ECC or parity error", -+ "Data Cache Bank B ECC or parity error", -+ "Data Tag Cache Bank A ECC or parity error", -+ "Data Tag Cache Bank B ECC or parity error", -+ "Instruction Cache Bank A ECC or parity error", -+ "Instruction Cache Bank B ECC or parity error", -+ "Instruction Tag Cache Bank A ECC or parity error", -+ "Instruction Tag Cache Bank B ECC or parity error", -+}; -+/* Northbridge IO Unit */ -+static const char * const smca_nbio_mce_desc[] = { -+ "ECC or Parity error", -+ "PCIE error", -+ "SDP ErrEvent error", -+ "SDP Egress Poison Error", -+ "IOHC Internal Poison Error", -+}; -+/* PCI Express Unit */ -+static const char * const smca_pcie_mce_desc[] = { -+ "CCIX PER Message logging", -+ "CCIX Read Response with Status: Non-Data Error", -+ "CCIX Write Response with Status: Non-Data Error", -+ "CCIX Read Response with Status: Data Error", -+ "CCIX Non-okay write response with data error", -+}; -+ - - struct smca_mce_desc { - const char * const *descs; -@@ -208,11 +296,17 @@ static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, - [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, - [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, -+ [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, - [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, - [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, - [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, - [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, -+ [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, - [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, -+ [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)}, -+ [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, -+ [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, -+ [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, - }; - - struct smca_hwid { -@@ -235,6 +329,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* Data Fabric MCA types */ - { SMCA_CS, 0x0000002E }, -+ { SMCA_CS_V2, 0x0002002E }, - { SMCA_PIE, 0x0001002E }, - - /* Unified Memory Controller MCA type */ -@@ -245,9 +340,20 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* Platform Security Processor MCA type */ - { SMCA_PSP, 0x000000FF }, -+ { SMCA_PSP_V2, 0x000100FF }, - - /* System Management Unit MCA type */ - { SMCA_SMU, 0x00000001 }, -+ { SMCA_SMU_V2, 0x00010001 }, -+ -+ /* Microprocessor 5 Unit MCA type */ -+ { SMCA_MP5, 0x00020001 }, -+ -+ /* Northbridge IO Unit MCA type */ -+ { SMCA_NBIO, 0x00000018 }, -+ -+ /* PCI Express Unit MCA type */ -+ { SMCA_PCIE, 0x00000046 }, - }; - - struct smca_bank_name { -@@ -264,11 +370,17 @@ static struct smca_bank_name smca_names[] = { - [SMCA_FP] = { "Floating Point Unit" }, - [SMCA_L3_CACHE] = { "L3 Cache" }, - [SMCA_CS] = { "Coherent Slave" }, -+ [SMCA_CS_V2] = { "Coherent Slave" }, - [SMCA_PIE] = { "Power, Interrupts, etc." }, - [SMCA_UMC] = { "Unified Memory Controller" }, - [SMCA_PB] = { "Parameter Block" }, - [SMCA_PSP] = { "Platform Security Processor" }, -+ [SMCA_PSP_V2] = { "Platform Security Processor" }, - [SMCA_SMU] = { "System Management Unit" }, -+ [SMCA_SMU_V2] = { "System Management Unit" }, -+ [SMCA_MP5] = { "Microprocessor 5 Unit" }, -+ [SMCA_NBIO] = { "Northbridge IO Unit" }, -+ [SMCA_PCIE] = { "PCI Express Unit" }, - }; - - static void amd_decode_errcode(struct mce_event *e) diff --git a/9acef39f13833f7d53ef96abc5a72e79384260f4.patch b/9acef39f13833f7d53ef96abc5a72e79384260f4.patch deleted file mode 100644 index c4c8af1..0000000 --- a/9acef39f13833f7d53ef96abc5a72e79384260f4.patch +++ /dev/null @@ -1,230 +0,0 @@ -commit 9acef39f13833f7d53ef96abc5a72e79384260f4 -Author: Naveen Krishna Chatradhi -Date: Tue Jun 1 11:01:17 2021 +0530 - - rasdaemon: Add new SMCA bank types with error decoding - - Upcoming systems with Scalable Machine Check Architecture (SMCA) have - new MCA banks added. - - This patch adds the (HWID, MCATYPE) tuple, name and error decoding for - those new SMCA banks. - While at it, optimize the string names in smca_bank_name[]. - - Signed-off-by: Muralidhara M K - Signed-off-by: Naveen Krishna Chatradhi - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/mce-amd-smca.c b/mce-amd-smca.c -index 7c619fd..e0cf512 100644 ---- a/mce-amd-smca.c -+++ b/mce-amd-smca.c -@@ -47,7 +47,7 @@ - /* These may be used by multiple smca_hwid_mcatypes */ - enum smca_bank_types { - SMCA_LS = 0, /* Load Store */ -- SMCA_LS_V2, /* Load Store */ -+ SMCA_LS_V2, - SMCA_IF, /* Instruction Fetch */ - SMCA_L2_CACHE, /* L2 Cache */ - SMCA_DE, /* Decoder Unit */ -@@ -56,17 +56,22 @@ enum smca_bank_types { - SMCA_FP, /* Floating Point */ - SMCA_L3_CACHE, /* L3 Cache */ - SMCA_CS, /* Coherent Slave */ -- SMCA_CS_V2, /* Coherent Slave V2 */ -+ SMCA_CS_V2, - SMCA_PIE, /* Power, Interrupts, etc. */ - SMCA_UMC, /* Unified Memory Controller */ -+ SMCA_UMC_V2, - SMCA_PB, /* Parameter Block */ - SMCA_PSP, /* Platform Security Processor */ -- SMCA_PSP_V2, /* Platform Security Processor V2 */ -+ SMCA_PSP_V2, - SMCA_SMU, /* System Management Unit */ -- SMCA_SMU_V2, /* System Management Unit V2 */ -+ SMCA_SMU_V2, - SMCA_MP5, /* Microprocessor 5 Unit */ - SMCA_NBIO, /* Northbridge IO Unit */ - SMCA_PCIE, /* PCI Express Unit */ -+ SMCA_PCIE_V2, -+ SMCA_XGMI_PCS, /* xGMI PCS Unit */ -+ SMCA_XGMI_PHY, /* xGMI PHY Unit */ -+ SMCA_WAFL_PHY, /* WAFL PHY Unit */ - N_SMCA_BANK_TYPES - }; - -@@ -237,6 +242,22 @@ static const char * const smca_umc_mce_desc[] = { - "Command/address parity error", - "Write data CRC error", - }; -+ -+static const char * const smca_umc2_mce_desc[] = { -+ "DRAM ECC error", -+ "Data poison error", -+ "SDP parity error", -+ "Reserved", -+ "Address/Command parity error", -+ "Write data parity error", -+ "DCQ SRAM ECC error", -+ "Reserved", -+ "Read data parity error", -+ "Rdb SRAM ECC error", -+ "RdRsp SRAM ECC error", -+ "LM32 MP errors", -+}; -+ - /* Parameter Block */ - static const char * const smca_pb_mce_desc[] = { - "Parameter Block RAM ECC error", -@@ -314,6 +335,55 @@ static const char * const smca_pcie_mce_desc[] = { - "CCIX Non-okay write response with data error", - }; - -+static const char * const smca_pcie2_mce_desc[] = { -+ "SDP Parity Error logging", -+}; -+ -+static const char * const smca_xgmipcs_mce_desc[] = { -+ "Data Loss Error", -+ "Training Error", -+ "Flow Control Acknowledge Error", -+ "Rx Fifo Underflow Error", -+ "Rx Fifo Overflow Error", -+ "CRC Error", -+ "BER Exceeded Error", -+ "Tx Vcid Data Error", -+ "Replay Buffer Parity Error", -+ "Data Parity Error", -+ "Replay Fifo Overflow Error", -+ "Replay Fifo Underflow Error", -+ "Elastic Fifo Overflow Error", -+ "Deskew Error", -+ "Flow Control CRC Error", -+ "Data Startup Limit Error", -+ "FC Init Timeout Error", -+ "Recovery Timeout Error", -+ "Ready Serial Timeout Error", -+ "Ready Serial Attempt Error", -+ "Recovery Attempt Error", -+ "Recovery Relock Attempt Error", -+ "Replay Attempt Error", -+ "Sync Header Error", -+ "Tx Replay Timeout Error", -+ "Rx Replay Timeout Error", -+ "LinkSub Tx Timeout Error", -+ "LinkSub Rx Timeout Error", -+ "Rx CMD Pocket Error", -+}; -+ -+static const char * const smca_xgmiphy_mce_desc[] = { -+ "RAM ECC Error", -+ "ARC instruction buffer parity error", -+ "ARC data buffer parity error", -+ "PHY APB error", -+}; -+ -+static const char * const smca_waflphy_mce_desc[] = { -+ "RAM ECC Error", -+ "ARC instruction buffer parity error", -+ "ARC data buffer parity error", -+ "PHY APB error", -+}; - - struct smca_mce_desc { - const char * const *descs; -@@ -333,6 +403,7 @@ static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, - [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, - [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, -+ [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, - [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, - [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, - [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, -@@ -341,6 +412,10 @@ static struct smca_mce_desc smca_mce_descs[] = { - [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, - [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, - [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, -+ [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, -+ [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, -+ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, -+ [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, - }; - - struct smca_hwid { -@@ -369,6 +444,8 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* Unified Memory Controller MCA type */ - { SMCA_UMC, 0x00000096 }, -+ /* Heterogeneous systems may have both UMC and UMC_v2 types on the same node. */ -+ { SMCA_UMC_V2, 0x00010096 }, - - /* Parameter Block MCA type */ - { SMCA_PB, 0x00000005 }, -@@ -389,6 +466,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = { - - /* PCI Express Unit MCA type */ - { SMCA_PCIE, 0x00000046 }, -+ { SMCA_PCIE_V2, 0x00010046 }, -+ -+ /* Ext Global Memory Interconnect PCS MCA type */ -+ { SMCA_XGMI_PCS, 0x00000050 }, -+ -+ /* Ext Global Memory Interconnect PHY MCA type */ -+ { SMCA_XGMI_PHY, 0x00000259 }, -+ -+ /* WAFL PHY MCA type */ -+ { SMCA_WAFL_PHY, 0x00000267 }, - }; - - struct smca_bank_name { -@@ -396,27 +483,28 @@ struct smca_bank_name { - }; - - static struct smca_bank_name smca_names[] = { -- [SMCA_LS] = { "Load Store Unit" }, -- [SMCA_LS_V2] = { "Load Store Unit" }, -- [SMCA_IF] = { "Instruction Fetch Unit" }, -- [SMCA_L2_CACHE] = { "L2 Cache" }, -- [SMCA_DE] = { "Decode Unit" }, -- [SMCA_RESERVED] = { "Reserved" }, -- [SMCA_EX] = { "Execution Unit" }, -- [SMCA_FP] = { "Floating Point Unit" }, -- [SMCA_L3_CACHE] = { "L3 Cache" }, -- [SMCA_CS] = { "Coherent Slave" }, -- [SMCA_CS_V2] = { "Coherent Slave" }, -- [SMCA_PIE] = { "Power, Interrupts, etc." }, -- [SMCA_UMC] = { "Unified Memory Controller" }, -- [SMCA_PB] = { "Parameter Block" }, -- [SMCA_PSP] = { "Platform Security Processor" }, -- [SMCA_PSP_V2] = { "Platform Security Processor" }, -- [SMCA_SMU] = { "System Management Unit" }, -- [SMCA_SMU_V2] = { "System Management Unit" }, -- [SMCA_MP5] = { "Microprocessor 5 Unit" }, -- [SMCA_NBIO] = { "Northbridge IO Unit" }, -- [SMCA_PCIE] = { "PCI Express Unit" }, -+ [SMCA_LS ... SMCA_LS_V2] = { "Load Store Unit" }, -+ [SMCA_IF] = { "Instruction Fetch Unit" }, -+ [SMCA_L2_CACHE] = { "L2 Cache" }, -+ [SMCA_DE] = { "Decode Unit" }, -+ [SMCA_RESERVED] = { "Reserved" }, -+ [SMCA_EX] = { "Execution Unit" }, -+ [SMCA_FP] = { "Floating Point Unit" }, -+ [SMCA_L3_CACHE] = { "L3 Cache" }, -+ [SMCA_CS ... SMCA_CS_V2] = { "Coherent Slave" }, -+ [SMCA_PIE] = { "Power, Interrupts, etc." }, -+ [SMCA_UMC] = { "Unified Memory Controller" }, -+ [SMCA_UMC_V2] = { "Unified Memory Controller V2" }, -+ [SMCA_PB] = { "Parameter Block" }, -+ [SMCA_PSP ... SMCA_PSP_V2] = { "Platform Security Processor" }, -+ [SMCA_SMU ... SMCA_SMU_V2] = { "System Management Unit" }, -+ [SMCA_MP5] = { "Microprocessor 5 Unit" }, -+ [SMCA_NBIO] = { "Northbridge IO Unit" }, -+ [SMCA_PCIE ... SMCA_PCIE_V2] = { "PCI Express Unit" }, -+ [SMCA_XGMI_PCS] = { "Ext Global Memory Interconnect PCS Unit" }, -+ [SMCA_XGMI_PHY] = { "Ext Global Memory Interconnect PHY Unit" }, -+ [SMCA_WAFL_PHY] = { "WAFL PHY Unit" }, -+ - }; - - static void amd_decode_errcode(struct mce_event *e) diff --git a/a16ca0711001957ee98f2c124abce0fa1f801529.patch b/a16ca0711001957ee98f2c124abce0fa1f801529.patch deleted file mode 100644 index 3a96263..0000000 --- a/a16ca0711001957ee98f2c124abce0fa1f801529.patch +++ /dev/null @@ -1,670 +0,0 @@ -commit a16ca0711001957ee98f2c124abce0fa1f801529 -Author: Chandu-babu Namburu -Date: Wed Jan 30 20:36:45 2019 +0530 - - rasdaemon: add support for AMD Scalable MCA - - Add logic here to decode errors from all known IP blocks for - AMD Scalable MCA supported processors - - Reviewed-by: Yazen Ghannam - Signed-off-by: Chandu-babu Namburu - ---- - mce-amd-smca.c | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ - mce-amd.c | 122 +++++++++++++++++ - ras-mce-handler.c | 24 +++ - ras-mce-handler.h | 15 ++ - 4 files changed, 530 insertions(+), 2 deletions(-) - ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ rasdaemon-0.6.1/mce-amd-smca.c 2019-07-12 11:35:04.836470461 -0400 -@@ -0,0 +1,371 @@ -+/* -+ * Copyright (c) 2018, AMD, Inc. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+ -+#include "ras-mce-handler.h" -+#include "bitfield.h" -+ -+/* MCA_STATUS REGISTER FOR FAMILY 17H -+ *********************** Higher 32-bits ***************************** -+ * 63: VALIDERROR, 62: OVERFLOW, 61: UC, 60: Err ENABLE, -+ * 59: Misc Valid, 58: Addr Valid, 57: PCC, 56: ErrCoreID Valid, -+ * 55: TCC, 54: RES, 53: Syndrom Valid, 52: Transparanet, -+ * 51: RES, 50: RES, 49: RES, 48: RES, -+ * 47: RES, 46: CECC, 45: UECC, 44: Deferred, -+ * 43: Poison, 42: RES, 41: RES, 40: RES, -+ * 39: RES, 38: RES, 37: ErrCoreID[5], 36: ErrCoreID[4], -+ * 35: ErrCoreID[3], 34: ErrCoreID[2] 33: ErrCoreID[1] 32: ErrCoreID[0] -+ *********************** Lower 32-bits ****************************** -+ * 31: RES, 30: RES, 29: RES, 28: RES, -+ * 27: RES, 26: RES, 25: RES, 24: RES -+ * 23: RES, 22: RES, 21: XEC[5], 20: XEC[4], -+ * 19: XEC[3], 18: XEC[2], 17: XEC[1], 16: XEC[0] -+ * 15: EC[15], 14: EC[14], 13: EC[13], 12: EC[12], -+ * 11: EC[11], 10: EC[10], 09: EC[9], 08: EC[8], -+ * 07: EC[7], 06: EC[6], 05: EC[5], 04: EC[4], -+ * 03: EC[3], 02: EC[2], 01: EC[1], 00: EC[0] -+ */ -+ -+/* These may be used by multiple smca_hwid_mcatypes */ -+enum smca_bank_types { -+ SMCA_LS = 0, /* Load Store */ -+ SMCA_IF, /* Instruction Fetch */ -+ SMCA_L2_CACHE, /* L2 Cache */ -+ SMCA_DE, /* Decoder Unit */ -+ SMCA_RESERVED, /* Reserved */ -+ SMCA_EX, /* Execution Unit */ -+ SMCA_FP, /* Floating Point */ -+ SMCA_L3_CACHE, /* L3 Cache */ -+ SMCA_CS, /* Coherent Slave */ -+ SMCA_PIE, /* Power, Interrupts, etc. */ -+ SMCA_UMC, /* Unified Memory Controller */ -+ SMCA_PB, /* Parameter Block */ -+ SMCA_PSP, /* Platform Security Processor */ -+ SMCA_SMU, /* System Management Unit */ -+ N_SMCA_BANK_TYPES -+}; -+ -+/* SMCA Extended error strings */ -+/* Load Store */ -+static const char * const smca_ls_mce_desc[] = { -+ "Load queue parity", -+ "Store queue parity", -+ "Miss address buffer payload parity", -+ "L1 TLB parity", -+ "Reserved", -+ "DC tag error type 6", -+ "DC tag error type 1", -+ "Internal error type 1", -+ "Internal error type 2", -+ "Sys Read data error thread 0", -+ "Sys read data error thread 1", -+ "DC tag error type 2", -+ "DC data error type 1 (poison consumption)", -+ "DC data error type 2", -+ "DC data error type 3", -+ "DC tag error type 4", -+ "L2 TLB parity", -+ "PDC parity error", -+ "DC tag error type 3", -+ "DC tag error type 5", -+ "L2 fill data error", -+}; -+/* Instruction Fetch */ -+static const char * const smca_if_mce_desc[] = { -+ "microtag probe port parity error", -+ "IC microtag or full tag multi-hit error", -+ "IC full tag parity", -+ "IC data array parity", -+ "Decoupling queue phys addr parity error", -+ "L0 ITLB parity error", -+ "L1 ITLB parity error", -+ "L2 ITLB parity error", -+ "BPQ snoop parity on Thread 0", -+ "BPQ snoop parity on Thread 1", -+ "L1 BTB multi-match error", -+ "L2 BTB multi-match error", -+ "L2 Cache Response Poison error", -+ "System Read Data error", -+}; -+/* L2 Cache */ -+static const char * const smca_l2_mce_desc[] = { -+ "L2M tag multi-way-hit error", -+ "L2M tag ECC error", -+ "L2M data ECC error", -+ "HW assert", -+}; -+/* Decoder Unit */ -+static const char * const smca_de_mce_desc[] = { -+ "uop cache tag parity error", -+ "uop cache data parity error", -+ "Insn buffer parity error", -+ "uop queue parity error", -+ "Insn dispatch queue parity error", -+ "Fetch address FIFO parity", -+ "Patch RAM data parity", -+ "Patch RAM sequencer parity", -+ "uop buffer parity" -+}; -+/* Execution Unit */ -+static const char * const smca_ex_mce_desc[] = { -+ "Watchdog timeout error", -+ "Phy register file parity", -+ "Flag register file parity", -+ "Immediate displacement register file parity", -+ "Address generator payload parity", -+ "EX payload parity", -+ "Checkpoint queue parity", -+ "Retire dispatch queue parity", -+ "Retire status queue parity error", -+ "Scheduling queue parity error", -+ "Branch buffer queue parity error", -+}; -+/* Floating Point Unit */ -+static const char * const smca_fp_mce_desc[] = { -+ "Physical register file parity", -+ "Freelist parity error", -+ "Schedule queue parity", -+ "NSQ parity error", -+ "Retire queue parity", -+ "Status register file parity", -+ "Hardware assertion", -+}; -+/* L3 Cache */ -+static const char * const smca_l3_mce_desc[] = { -+ "Shadow tag macro ECC error", -+ "Shadow tag macro multi-way-hit error", -+ "L3M tag ECC error", -+ "L3M tag multi-way-hit error", -+ "L3M data ECC error", -+ "XI parity, L3 fill done channel error", -+ "L3 victim queue parity", -+ "L3 HW assert", -+}; -+/* Coherent Slave Unit */ -+static const char * const smca_cs_mce_desc[] = { -+ "Illegal request from transport layer", -+ "Address violation", -+ "Security violation", -+ "Illegal response from transport layer", -+ "Unexpected response", -+ "Parity error on incoming request or probe response data", -+ "Parity error on incoming read response data", -+ "Atomic request parity", -+ "ECC error on probe filter access", -+}; -+/* Power, Interrupt, etc.. */ -+static const char * const smca_pie_mce_desc[] = { -+ "HW assert", -+ "Internal PIE register security violation", -+ "Error on GMI link", -+ "Poison data written to internal PIE register", -+}; -+/* Unified Memory Controller */ -+static const char * const smca_umc_mce_desc[] = { -+ "DRAM ECC error", -+ "Data poison error on DRAM", -+ "SDP parity error", -+ "Advanced peripheral bus error", -+ "Command/address parity error", -+ "Write data CRC error", -+}; -+/* Parameter Block */ -+static const char * const smca_pb_mce_desc[] = { -+ "Parameter Block RAM ECC error", -+}; -+/* Platform Security Processor */ -+static const char * const smca_psp_mce_desc[] = { -+ "PSP RAM ECC or parity error", -+}; -+/* System Management Unit */ -+static const char * const smca_smu_mce_desc[] = { -+ "SMU RAM ECC or parity error", -+}; -+ -+struct smca_mce_desc { -+ const char * const *descs; -+ unsigned int num_descs; -+}; -+ -+static struct smca_mce_desc smca_mce_descs[] = { -+ [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, -+ [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, -+ [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, -+ [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, -+ [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) }, -+ [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, -+ [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, -+ [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, -+ [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, -+ [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, -+ [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, -+ [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, -+ [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, -+}; -+ -+struct smca_hwid { -+ unsigned int bank_type; /* Use with smca_bank_types for easy indexing.*/ -+ uint32_t mcatype_hwid; /* mcatype,hwid bit 63-32 in MCx_IPID Register*/ -+}; -+ -+static struct smca_hwid smca_hwid_mcatypes[] = { -+ /* { bank_type, mcatype_hwid } */ -+ -+ /* ZN Core (HWID=0xB0) MCA types */ -+ { SMCA_LS, 0x000000B0 }, -+ { SMCA_IF, 0x000100B0 }, -+ { SMCA_L2_CACHE, 0x000200B0 }, -+ { SMCA_DE, 0x000300B0 }, -+ /* HWID 0xB0 MCATYPE 0x4 is Reserved */ -+ { SMCA_EX, 0x000500B0 }, -+ { SMCA_FP, 0x000600B0 }, -+ { SMCA_L3_CACHE, 0x000700B0 }, -+ -+ /* Data Fabric MCA types */ -+ { SMCA_CS, 0x0000002E }, -+ { SMCA_PIE, 0x0001002E }, -+ -+ /* Unified Memory Controller MCA type */ -+ { SMCA_UMC, 0x00000096 }, -+ -+ /* Parameter Block MCA type */ -+ { SMCA_PB, 0x00000005 }, -+ -+ /* Platform Security Processor MCA type */ -+ { SMCA_PSP, 0x000000FF }, -+ -+ /* System Management Unit MCA type */ -+ { SMCA_SMU, 0x00000001 }, -+}; -+ -+struct smca_bank_name { -+ const char *name; -+}; -+ -+static struct smca_bank_name smca_names[] = { -+ [SMCA_LS] = { "Load Store Unit" }, -+ [SMCA_IF] = { "Instruction Fetch Unit" }, -+ [SMCA_L2_CACHE] = { "L2 Cache" }, -+ [SMCA_DE] = { "Decode Unit" }, -+ [SMCA_RESERVED] = { "Reserved" }, -+ [SMCA_EX] = { "Execution Unit" }, -+ [SMCA_FP] = { "Floating Point Unit" }, -+ [SMCA_L3_CACHE] = { "L3 Cache" }, -+ [SMCA_CS] = { "Coherent Slave" }, -+ [SMCA_PIE] = { "Power, Interrupts, etc." }, -+ [SMCA_UMC] = { "Unified Memory Controller" }, -+ [SMCA_PB] = { "Parameter Block" }, -+ [SMCA_PSP] = { "Platform Security Processor" }, -+ [SMCA_SMU] = { "System Management Unit" }, -+}; -+ -+static void amd_decode_errcode(struct mce_event *e) -+{ -+ -+ decode_amd_errcode(e); -+ -+ if (e->status & MCI_STATUS_POISON) -+ mce_snprintf(e->mcistatus_msg, "Poison consumed"); -+ -+ if (e->status & MCI_STATUS_TCC) -+ mce_snprintf(e->mcistatus_msg, "Task_context_corrupt"); -+ -+} -+/* -+ * To find the UMC channel represented by this bank we need to match on its -+ * instance_id. The instance_id of a bank is held in the lower 32 bits of its -+ * IPID. -+ */ -+static int find_umc_channel(struct mce_event *e) -+{ -+ uint32_t umc_instance_id[] = {0x50f00, 0x150f00}; -+ uint32_t instance_id = EXTRACT(e->ipid, 0, 31); -+ int i, channel = -1; -+ -+ for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++) -+ if (umc_instance_id[i] == instance_id) -+ channel = i; -+ -+ return channel; -+} -+/* Decode extended errors according to Scalable MCA specification */ -+static void decode_smca_error(struct mce_event *e) -+{ -+ enum smca_bank_types bank_type; -+ const char *ip_name; -+ unsigned short xec = (e->status >> 16) & 0x3f; -+ const struct smca_hwid *s_hwid; -+ uint32_t mcatype_hwid = EXTRACT(e->ipid, 32, 63); -+ unsigned int csrow = -1, channel = -1; -+ unsigned int i; -+ -+ for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { -+ s_hwid = &smca_hwid_mcatypes[i]; -+ if (mcatype_hwid == s_hwid->mcatype_hwid) { -+ bank_type = s_hwid->bank_type; -+ break; -+ } -+ } -+ -+ if (i >= ARRAY_SIZE(smca_hwid_mcatypes)) { -+ strcpy(e->mcastatus_msg, "Couldn't find bank type with IPID"); -+ return; -+ } -+ -+ if (bank_type >= N_SMCA_BANK_TYPES) { -+ strcpy(e->mcastatus_msg, "Don't know how to decode this bank"); -+ return; -+ } -+ -+ if (bank_type == SMCA_RESERVED) { -+ strcpy(e->mcastatus_msg, "Bank 4 is reserved.\n"); -+ return; -+ } -+ -+ ip_name = smca_names[bank_type].name; -+ -+ mce_snprintf(e->bank_name, "%s (bank=%d)", ip_name, e->bank); -+ -+ /* Only print the descriptor of valid extended error code */ -+ if (xec < smca_mce_descs[bank_type].num_descs) -+ mce_snprintf(e->mcastatus_msg, -+ " %s.\n", smca_mce_descs[bank_type].descs[xec]); -+ -+ if (bank_type == SMCA_UMC && xec == 0) { -+ channel = find_umc_channel(e); -+ csrow = e->synd & 0x7; /* Bit 0, 1 ,2 */ -+ mce_snprintf(e->mc_location, "memory_channel=%d,csrow=%d", -+ channel, csrow); -+ } -+} -+ -+int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e) -+{ -+ uint64_t mcgstatus = e->mcgstatus; -+ -+ mce_snprintf(e->mcgstatus_msg, "mcgstatus=%lld", -+ (long long)e->mcgstatus); -+ -+ if (mcgstatus & MCG_STATUS_RIPV) -+ mce_snprintf(e->mcgstatus_msg, "RIPV"); -+ if (mcgstatus & MCG_STATUS_EIPV) -+ mce_snprintf(e->mcgstatus_msg, "EIPV"); -+ if (mcgstatus & MCG_STATUS_MCIP) -+ mce_snprintf(e->mcgstatus_msg, "MCIP"); -+ -+ decode_smca_error(e); -+ amd_decode_errcode(e); -+ return 0; -+} ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ rasdaemon-0.6.1/mce-amd.c 2019-07-12 11:35:04.836470461 -0400 -@@ -0,0 +1,122 @@ -+/* -+ * Copyright (c) 2018, The AMD, Inc. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 and -+ * only version 2 as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ */ -+ -+#include -+#include -+ -+#include "ras-mce-handler.h" -+ -+/* Error Code Types */ -+#define TLB_ERROR(x) (((x) & 0xFFF0) == 0x0010) -+#define MEM_ERROR(x) (((x) & 0xFF00) == 0x0100) -+#define BUS_ERROR(x) (((x) & 0xF800) == 0x0800) -+#define INT_ERROR(x) (((x) & 0xF4FF) == 0x0400) -+ -+/* Error code: transaction type (TT) */ -+static char *transaction[] = { -+ "instruction", "data", "generic", "reserved" -+}; -+/* Error codes: cache level (LL) */ -+static char *cachelevel[] = { -+ "reserved", "L1", "L2", "L3/generic" -+}; -+/* Error codes: memory transaction type (RRRR) */ -+static char *memtrans[] = { -+ "generic", "generic read", "generic write", "data read", -+ "data write", "instruction fetch", "prefetch", "evict", "snoop", -+ "?", "?", "?", "?", "?", "?", "?" -+}; -+/* Participation Processor */ -+static char *partproc[] = { -+ "local node origin", "local node response", -+ "local node observed", "generic participation" -+}; -+/* Timeout */ -+static char *timeout[] = { -+ "request didn't time out", -+ "request timed out" -+}; -+/* internal unclassified error code */ -+static char *internal[] = { "reserved", -+ "reserved", -+ "hardware assert", -+ "reserved" }; -+ -+#define TT(x) (((x) >> 2) & 0x3) /*bit 2, bit 3*/ -+#define TT_MSG(x) transaction[TT(x)] -+#define LL(x) ((x) & 0x3) /*bit 0, bit 1*/ -+#define LL_MSG(x) cachelevel[LL(x)] -+ -+#define R4(x) (((x) >> 4) & 0xF) /*bit 4, bit 5, bit 6, bit 7 */ -+#define R4_MSG(x) ((R4(x) < 9) ? memtrans[R4(x)] : "Wrong R4!") -+ -+#define TO(x) (((x) >> 8) & 0x1) /*bit 8*/ -+#define TO_MSG(x) timeout[TO(x)] -+#define PP(x) (((x) >> 9) & 0x3) /*bit 9, bit 10*/ -+#define PP_MSG(x) partproc[PP(x)] -+ -+#define UU(x) (((x) >> 8) & 0x3) /*bit 8, bit 9*/ -+#define UU_MSG(x) internal[UU(x)] -+ -+void decode_amd_errcode(struct mce_event *e) -+{ -+ uint16_t ec = e->status & 0xffff; -+ uint16_t ecc = (e->status >> 45) & 0x3; -+ -+ if (e->status & MCI_STATUS_UC) { -+ if (e->status & MCI_STATUS_PCC) -+ strcpy(e->error_msg, "System Fatal error."); -+ if (e->mcgstatus & MCG_STATUS_RIPV) -+ strcpy(e->error_msg, -+ "Uncorrected, software restartable error."); -+ strcpy(e->error_msg, -+ "Uncorrected, software containable error."); -+ } else if (e->status & MCI_STATUS_DEFERRED) -+ strcpy(e->error_msg, "Deferred error, no action required."); -+ else -+ strcpy(e->error_msg, "Corrected error, no action required."); -+ -+ if (!(e->status & MCI_STATUS_VAL)) -+ mce_snprintf(e->mcistatus_msg, "MCE_INVALID"); -+ -+ if (e->status & MCI_STATUS_OVER) -+ mce_snprintf(e->mcistatus_msg, "Error_overflow"); -+ -+ if (e->status & MCI_STATUS_PCC) -+ mce_snprintf(e->mcistatus_msg, "Processor_context_corrupt"); -+ -+ if (ecc) -+ mce_snprintf(e->mcistatus_msg, -+ "%sECC", ((ecc == 2) ? "C" : "U")); -+ -+ if (INT_ERROR(ec)) { -+ mce_snprintf(e->mcastatus_msg, "Internal '%s'", UU_MSG(ec)); -+ return; -+ } -+ -+ if (TLB_ERROR(ec)) -+ mce_snprintf(e->mcastatus_msg, -+ "TLB Error 'tx: %s, level: %s'", -+ TT_MSG(ec), LL_MSG(ec)); -+ else if (MEM_ERROR(ec)) -+ mce_snprintf(e->mcastatus_msg, -+ "Memory Error 'mem-tx: %s, tx: %s, level: %s'", -+ R4_MSG(ec), TT_MSG(ec), LL_MSG(ec)); -+ else if (BUS_ERROR(ec)) -+ mce_snprintf(e->mcastatus_msg, -+ "Bus Error '%s, %s, mem-tx: %s, level: %s'", -+ PP_MSG(ec), TO_MSG(ec), -+ R4_MSG(ec), LL_MSG(ec)); -+ return; -+ -+} ---- rasdaemon-0.6.1.orig/ras-mce-handler.c 2019-07-12 11:35:01.585502811 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.c 2019-07-12 11:35:04.836470461 -0400 -@@ -55,6 +55,7 @@ [CPU_XEON75XX] = "Intel Xeon 7500 series - [CPU_KNIGHTS_LANDING] = "Knights Landing", - [CPU_KNIGHTS_MILL] = "Knights Mill", - [CPU_SKYLAKE_XEON] = "Skylake server", -+ [CPU_NAPLES] = "AMD Family 17h Zen1" - }; - - static enum cputype select_intel_cputype(struct ras_events *ras) -@@ -190,9 +191,12 @@ ret = 0; - if (!strcmp(mce->vendor, "AuthenticAMD")) { - if (mce->family == 15) - mce->cputype = CPU_K8; -- if (mce->family > 15) { -+ if (mce->family == 23) -+ mce->cputype = CPU_NAPLES; -+ if (mce->family > 23) { - log(ALL, LOG_INFO, -- "Can't parse MCE for this AMD CPU yet\n"); -+ "Can't parse MCE for this AMD CPU yet %d\n", -+ mce->family); - ret = EINVAL; - } - goto ret; -@@ -331,6 +335,12 @@ #if 0 - if (e->status & MCI_STATUS_ADDRV) - trace_seq_printf(s, ", addr= %llx", (long long)e->addr); - -+ if (e->status & MCI_STATUS_SYNDV) -+ trace_seq_printf(s, ", synd= %llx", (long long)e->synd); -+ -+ if (e->ipid) -+ trace_seq_printf(s, ", ipid= %llx", (long long)e->ipid); -+ - if (e->mcgstatus_msg) - trace_seq_printf(s, ", %s", e->mcgstatus_msg); - else -@@ -411,6 +421,13 @@ if (pevent_get_field_val(s, event, "bank - if (pevent_get_field_val(s, event, "cpuvendor", record, &val, 1) < 0) - return -1; - e.cpuvendor = val; -+ /* Get New entries */ -+ if (pevent_get_field_val(s, event, "synd", record, &val, 1) < 0) -+ return -1; -+ e.synd = val; -+ if (pevent_get_field_val(s, event, "ipid", record, &val, 1) < 0) -+ return -1; -+ e.ipid = val; - - switch (mce->cputype) { - case CPU_GENERIC: -@@ -418,6 +435,9 @@ if (pevent_get_field_val(s, event, "cpuv - case CPU_K8: - rc = parse_amd_k8_event(ras, &e); - break; -+ case CPU_NAPLES: -+ rc = parse_amd_smca_event(ras, &e); -+ break; - default: /* All other CPU types are Intel */ - rc = parse_intel_event(ras, &e); - } ---- rasdaemon-0.6.1.orig/ras-mce-handler.h 2019-07-12 11:35:01.585502811 -0400 -+++ rasdaemon-0.6.1/ras-mce-handler.h 2019-07-12 11:35:04.836470461 -0400 -@@ -50,6 +50,7 @@ enum cputype { - CPU_KNIGHTS_LANDING, - CPU_KNIGHTS_MILL, - CPU_SKYLAKE_XEON, -+ CPU_NAPLES, - }; - - struct mce_event { -@@ -69,6 +70,8 @@ struct mce_event { - uint8_t cs; - uint8_t bank; - uint8_t cpuvendor; -+ uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */ -+ uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */ - - /* Parsed data */ - char timestamp[64]; -@@ -129,6 +132,9 @@ void broadwell_de_decode_model(struct ra - void broadwell_epex_decode_model(struct ras_events *ras, struct mce_event *e); - void skylake_s_decode_model(struct ras_events *ras, struct mce_event *e); - -+/* AMD error code decode function */ -+void decode_amd_errcode(struct mce_event *e); -+ - /* Software defined banks */ - #define MCE_EXTENDED_BANK 128 - -@@ -144,6 +150,13 @@ #define MCI_STATUS_EN (1ULL<<60) /* - #define MCI_STATUS_S (1ULL<<56) /* signalled */ - #define MCI_STATUS_AR (1ULL<<55) /* action-required */ - -+/* AMD-specific bits */ -+#define MCI_STATUS_TCC (1ULL<<55) /* Task context corrupt */ -+#define MCI_STATUS_SYNDV (1ULL<<53) /* synd reg. valid */ -+/* uncorrected error,deferred exception */ -+#define MCI_STATUS_DEFERRED (1ULL<<44) -+#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ -+ - #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ - #define MCG_STATUS_EIPV (1ULL<<1) /* eip points to correct instruction */ - #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ -@@ -154,4 +167,6 @@ int parse_intel_event(struct ras_events - - int parse_amd_k8_event(struct ras_events *ras, struct mce_event *e); - -+int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e); -+ - #endif ---- rasdaemon-0.6.1.orig/Makefile.in 2018-04-25 06:29:05.000000000 -0400 -+++ rasdaemon-0.6.1/Makefile.in 2019-07-15 14:41:22.308278851 -0400 -@@ -100,7 +100,7 @@ sbin_PROGRAMS = rasdaemon$(EXEEXT) - @WITH_MCE_TRUE@ mce-intel-dunnington.c mce-intel-tulsa.c \ - @WITH_MCE_TRUE@ mce-intel-sb.c mce-intel-ivb.c mce-intel-haswell.c \ - @WITH_MCE_TRUE@ mce-intel-knl.c mce-intel-broadwell-de.c \ --@WITH_MCE_TRUE@ mce-intel-broadwell-epex.c mce-intel-skylake-xeon.c -+@WITH_MCE_TRUE@ mce-intel-broadwell-epex.c mce-intel-skylake-xeon.c mce-amd.c mce-amd-smca.c - - @WITH_EXTLOG_TRUE@am__append_6 = ras-extlog-handler.c - @WITH_ABRT_REPORT_TRUE@am__append_7 = ras-report.c -@@ -132,7 +132,7 @@ am__rasdaemon_SOURCES_DIST = rasdaemon.c - mce-intel-ivb.c mce-intel-haswell.c mce-intel-knl.c \ - mce-intel-broadwell-de.c mce-intel-broadwell-epex.c \ - mce-intel-skylake-xeon.c ras-extlog-handler.c ras-report.c \ -- non-standard-hisi_hip07.c -+ non-standard-hisi_hip07.c mce-amd-smca.c mce-amd.c - @WITH_SQLITE3_TRUE@am__objects_1 = ras-record.$(OBJEXT) - @WITH_AER_TRUE@am__objects_2 = ras-aer-handler.$(OBJEXT) - @WITH_NON_STANDARD_TRUE@am__objects_3 = \ -@@ -149,7 +149,9 @@ non-standard-hisi_hip07.c - @WITH_MCE_TRUE@ mce-intel-knl.$(OBJEXT) \ - @WITH_MCE_TRUE@ mce-intel-broadwell-de.$(OBJEXT) \ - @WITH_MCE_TRUE@ mce-intel-broadwell-epex.$(OBJEXT) \ --@WITH_MCE_TRUE@ mce-intel-skylake-xeon.$(OBJEXT) -+@WITH_MCE_TRUE@ mce-intel-skylake-xeon.$(OBJEXT) \ -+@WITH_MCE_TRUE@ mce-amd-smca.$(OBJEXT) \ -+@WITH_MCE_TRUE@ mce-amd.$(OBJEXT) - @WITH_EXTLOG_TRUE@am__objects_6 = ras-extlog-handler.$(OBJEXT) - @WITH_ABRT_REPORT_TRUE@am__objects_7 = ras-report.$(OBJEXT) - @WITH_HISI_NS_DECODE_TRUE@am__objects_8 = \ -@@ -595,6 +597,8 @@ distclean-compile: - - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bitfield.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-amd-k8.Po@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-amd.Po@am__quote@ -+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-amd-scma.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-intel-broadwell-de.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-intel-broadwell-epex.Po@am__quote@ - @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mce-intel-dunnington.Po@am__quote@ diff --git a/a8c776ed94f68ae31d7b5f74e19545698898c13c.patch b/a8c776ed94f68ae31d7b5f74e19545698898c13c.patch deleted file mode 100644 index 38657d4..0000000 --- a/a8c776ed94f68ae31d7b5f74e19545698898c13c.patch +++ /dev/null @@ -1,138 +0,0 @@ -commit a8c776ed94f68ae31d7b5f74e19545698898c13c -Author: Mauro Carvalho Chehab -Date: Tue Aug 14 13:06:27 2018 -0300 - - mce-intel-*: fix a warning when using FIELD(, NULL) - - Internally, FIELD() macro checks the size of an array, by - using ARRAY_SIZE. Well, this macro causes a division by zero - if NULL is used, as its type is void, as warned: - - mce-intel-dunnington.c:30:2: note: in expansion of macro ‘FIELD’ - FIELD(17, NULL), - ^~~~~ - ras-mce-handler.h:28:33: warning: division ‘sizeof (void *) / sizeof (void)’ does not compute the number of array elements [-Wsizeof-pointer-div] - #define ARRAY_SIZE(x) (sizeof(x)/sizeof(*(x))) - ^ - bitfield.h:37:51: note: in expansion of macro ‘ARRAY_SIZE’ - #define FIELD(start_bit, name) { start_bit, name, ARRAY_SIZE(name) } - ^~~~~~~~~~ - - While this warning is harmless, it may prevent seeing more serios - warnings. So, add a FIELD_NULL() macro to avoid that. - - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/bitfield.h b/bitfield.h -index c7dfeb1..fccbb36 100644 ---- a/bitfield.h -+++ b/bitfield.h -@@ -35,6 +35,7 @@ struct numfield { - }; - - #define FIELD(start_bit, name) { start_bit, name, ARRAY_SIZE(name) } -+#define FIELD_NULL(start_bit) { start_bit, NULL, 0 } - #define SBITFIELD(start_bit, string) { start_bit, ((char * [2]) { NULL, string }), 2 } - - #define NUMBER(start, end, name) { start, end, name, "%Lu", 0 } -diff --git a/mce-intel-dunnington.c b/mce-intel-dunnington.c -index 4b1c7e3..c695c62 100644 ---- a/mce-intel-dunnington.c -+++ b/mce-intel-dunnington.c -@@ -27,14 +27,14 @@ - - static struct field dunnington_bus_status[] = { - SBITFIELD(16, "Parity error detected during FSB request phase"), -- FIELD(17, NULL), -+ FIELD_NULL(17), - SBITFIELD(20, "Hard Failure response received for a local transaction"), - SBITFIELD(21, "Parity error on FSB response field detected"), - SBITFIELD(22, "Parity data error on inbound data detected"), -- FIELD(23, NULL), -- FIELD(25, NULL), -- FIELD(28, NULL), -- FIELD(31, NULL), -+ FIELD_NULL(23), -+ FIELD_NULL(25), -+ FIELD_NULL(28), -+ FIELD_NULL(31), - {} - }; - -diff --git a/mce-intel-p4-p6.c b/mce-intel-p4-p6.c -index 4615e1a..5c6c3ff 100644 ---- a/mce-intel-p4-p6.c -+++ b/mce-intel-p4-p6.c -@@ -60,7 +60,7 @@ static char *bus_queue_error_type[] = { - }; - - static struct field p6_shared_status[] = { -- FIELD(16, NULL), -+ FIELD_NULL(16), - FIELD(19, bus_queue_req_type), - FIELD(25, bus_queue_error_type), - FIELD(25, bus_queue_error_type), -@@ -68,7 +68,7 @@ static struct field p6_shared_status[] = { - SBITFIELD(36, "received parity error on response transaction"), - SBITFIELD(38, "timeout BINIT (ROB timeout)." - " No micro-instruction retired for some time"), -- FIELD(39, NULL), -+ FIELD_NULL(39), - SBITFIELD(42, "bus transaction received hard error response"), - SBITFIELD(43, "failure that caused IERR"), - /* The following are reserved for Core in the SDM. Let's keep them here anyways*/ -@@ -76,15 +76,15 @@ static struct field p6_shared_status[] = { - SBITFIELD(45, "uncorrectable ECC error"), - SBITFIELD(46, "correctable ECC error"), - /* [47..54]: ECC syndrome */ -- FIELD(55, NULL), -+ FIELD_NULL(55), - {}, - }; - - static struct field p6old_status[] = { - SBITFIELD(28, "FRC error"), - SBITFIELD(29, "BERR on this CPU"), -- FIELD(31, NULL), -- FIELD(32, NULL), -+ FIELD_NULL(31), -+ FIELD_NULL(32), - SBITFIELD(35, "BINIT received from external bus"), - SBITFIELD(37, "Received hard error reponse on split transaction (Bus BINIT)"), - {} -@@ -94,9 +94,9 @@ static struct field core2_status[] = { - SBITFIELD(28, "MCE driven"), - SBITFIELD(29, "MCE is observed"), - SBITFIELD(31, "BINIT observed"), -- FIELD(32, NULL), -+ FIELD_NULL(32), - SBITFIELD(34, "PIC or FSB data parity error"), -- FIELD(35, NULL), -+ FIELD_NULL(35), - SBITFIELD(37, "FSB address parity error detected"), - {} - }; -diff --git a/mce-intel-tulsa.c b/mce-intel-tulsa.c -index 6cea421..e59bf06 100644 ---- a/mce-intel-tulsa.c -+++ b/mce-intel-tulsa.c -@@ -39,7 +39,7 @@ static struct field tls_bus_status[] = { - SBITFIELD(16, "Parity error detected during FSB request phase"), - SBITFIELD(17, "Partity error detected on Core 0 request's address field"), - SBITFIELD(18, "Partity error detected on Core 1 request's address field"), -- FIELD(19, NULL), -+ FIELD_NULL(19), - SBITFIELD(20, "Parity error on FSB response field detected"), - SBITFIELD(21, "FSB data parity error on inbound date detected"), - SBITFIELD(22, "Data parity error on data received from Core 0 detected"), -@@ -48,8 +48,8 @@ static struct field tls_bus_status[] = { - SBITFIELD(25, "Data ECC event to error on inbound data correctable or uncorrectable"), - SBITFIELD(26, "Pad logic detected a data strobe glitch or sequencing error"), - SBITFIELD(27, "Pad logic detected a request strobe glitch or sequencing error"), -- FIELD(28, NULL), -- FIELD(31, NULL), -+ FIELD_NULL(28), -+ FIELD_NULL(31), - {} - }; - diff --git a/add_upstream_labels.patch b/add_upstream_labels.patch deleted file mode 100644 index 70a04df..0000000 --- a/add_upstream_labels.patch +++ /dev/null @@ -1,159 +0,0 @@ ---- - labels/dell | 152 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 152 insertions(+) - ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ rasdaemon-0.6.1/labels/dell 2020-02-20 11:53:39.574579258 -0500 -@@ -0,0 +1,152 @@ -+# RASDAEMON Motherboard DIMM labels Database file. -+# -+# Vendor-name and model-name are found from the program 'dmidecode' -+# labels are found from the silk screen on the motherboard. -+# -+#Vendor: -+# Product: -+# Model: -+#