From 139fc9b676da0c385b8b5830d82dd0762a5fa651 Mon Sep 17 00:00:00 2001 From: anolis-bot Date: Tue, 16 May 2023 16:35:40 +0800 Subject: [PATCH] update to rasdaemon-0.6.1-13.el8 Signed-off-by: anolis-bot --- ...n-add-rbtree-support-for-page-record.patch | 584 ---------------- ...pport-for-memory-Corrected-Error-pre.patch | 646 ------------------ ...tification-support-when-page-goes-of.patch | 259 ------- ...rasdaemon-avoid-multiple-definitions.patch | 24 - ...cc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch | 71 ++ ...041e0abfa20054ff5d6874ffbd1ab592558d.patch | 28 + dist | 1 + ...7ec14a11764fedfea50bd4d96ddda43c7fc1.patch | 24 + rasdaemon.spec | 35 +- 9 files changed, 131 insertions(+), 1541 deletions(-) delete mode 100644 0021-rasdaemon-add-rbtree-support-for-page-record.patch delete mode 100644 0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch delete mode 100644 0023-rasdaemon-Add-notification-support-when-page-goes-of.patch delete mode 100644 1001-rasdaemon-avoid-multiple-definitions.patch create mode 100644 899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch create mode 100644 ce33041e0abfa20054ff5d6874ffbd1ab592558d.patch create mode 100644 dist create mode 100644 e8b97ec14a11764fedfea50bd4d96ddda43c7fc1.patch diff --git a/0021-rasdaemon-add-rbtree-support-for-page-record.patch b/0021-rasdaemon-add-rbtree-support-for-page-record.patch deleted file mode 100644 index 4a76a89..0000000 --- a/0021-rasdaemon-add-rbtree-support-for-page-record.patch +++ /dev/null @@ -1,584 +0,0 @@ -From 27794f4a5ff1453490bbcd805ad8e5b54516f015 Mon Sep 17 00:00:00 2001 -From: wuyun -Date: Sat, 20 Jun 2020 20:26:21 +0800 -Subject: [PATCH] rasdaemon: add rbtree support for page record - -commit 5fd96f457262052f7d06435af8a49689ffb6ffcf upstream - -The rbtree is very efficient for recording and querying fault page info. - -Signed-off-by: wuyun -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab -Signed-off-by: Bixuan Cui ---- - rbtree.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - rbtree.h | 165 +++++++++++++++++++++++++++ - 2 files changed, 549 insertions(+) - create mode 100644 rbtree.c - create mode 100644 rbtree.h - -diff --git a/rbtree.c b/rbtree.c -new file mode 100644 -index 0000000..d9b1bd4 ---- /dev/null -+++ b/rbtree.c -@@ -0,0 +1,384 @@ -+/* -+ Red Black Trees -+ (C) 1999 Andrea Arcangeli -+ (C) 2002 David Woodhouse -+ Taken from the Linux 2.6.30 source with some minor modificatons. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ linux/lib/rbtree.c -+*/ -+ -+#include "rbtree.h" -+ -+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *right = node->rb_right; -+ struct rb_node *parent = rb_parent(node); -+ -+ if ((node->rb_right = right->rb_left)) -+ rb_set_parent(right->rb_left, node); -+ right->rb_left = node; -+ -+ rb_set_parent(right, parent); -+ -+ if (parent) -+ { -+ if (node == parent->rb_left) -+ parent->rb_left = right; -+ else -+ parent->rb_right = right; -+ } -+ else -+ root->rb_node = right; -+ rb_set_parent(node, right); -+} -+ -+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *left = node->rb_left; -+ struct rb_node *parent = rb_parent(node); -+ -+ if ((node->rb_left = left->rb_right)) -+ rb_set_parent(left->rb_right, node); -+ left->rb_right = node; -+ -+ rb_set_parent(left, parent); -+ -+ if (parent) -+ { -+ if (node == parent->rb_right) -+ parent->rb_right = left; -+ else -+ parent->rb_left = left; -+ } -+ else -+ root->rb_node = left; -+ rb_set_parent(node, left); -+} -+ -+void rb_insert_color(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *parent, *gparent; -+ -+ while ((parent = rb_parent(node)) && rb_is_red(parent)) -+ { -+ gparent = rb_parent(parent); -+ -+ if (parent == gparent->rb_left) -+ { -+ { -+ register struct rb_node *uncle = gparent->rb_right; -+ if (uncle && rb_is_red(uncle)) -+ { -+ rb_set_black(uncle); -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ node = gparent; -+ continue; -+ } -+ } -+ -+ if (parent->rb_right == node) -+ { -+ struct rb_node *tmp; -+ __rb_rotate_left(parent, root); -+ tmp = parent; -+ parent = node; -+ node = tmp; -+ } -+ -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ __rb_rotate_right(gparent, root); -+ } else { -+ { -+ struct rb_node *uncle = gparent->rb_left; -+ if (uncle && rb_is_red(uncle)) -+ { -+ rb_set_black(uncle); -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ node = gparent; -+ continue; -+ } -+ } -+ -+ if (parent->rb_left == node) -+ { -+ struct rb_node *tmp; -+ __rb_rotate_right(parent, root); -+ tmp = parent; -+ parent = node; -+ node = tmp; -+ } -+ -+ rb_set_black(parent); -+ rb_set_red(gparent); -+ __rb_rotate_left(gparent, root); -+ } -+ } -+ -+ rb_set_black(root->rb_node); -+} -+ -+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, -+ struct rb_root *root) -+{ -+ struct rb_node *other; -+ -+ while ((!node || rb_is_black(node)) && node != root->rb_node) -+ { -+ if (parent->rb_left == node) -+ { -+ other = parent->rb_right; -+ if (rb_is_red(other)) -+ { -+ rb_set_black(other); -+ rb_set_red(parent); -+ __rb_rotate_left(parent, root); -+ other = parent->rb_right; -+ } -+ if ((!other->rb_left || rb_is_black(other->rb_left)) && -+ (!other->rb_right || rb_is_black(other->rb_right))) -+ { -+ rb_set_red(other); -+ node = parent; -+ parent = rb_parent(node); -+ } -+ else -+ { -+ if (!other->rb_right || rb_is_black(other->rb_right)) -+ { -+ rb_set_black(other->rb_left); -+ rb_set_red(other); -+ __rb_rotate_right(other, root); -+ other = parent->rb_right; -+ } -+ rb_set_color(other, rb_color(parent)); -+ rb_set_black(parent); -+ rb_set_black(other->rb_right); -+ __rb_rotate_left(parent, root); -+ node = root->rb_node; -+ break; -+ } -+ } -+ else -+ { -+ other = parent->rb_left; -+ if (rb_is_red(other)) -+ { -+ rb_set_black(other); -+ rb_set_red(parent); -+ __rb_rotate_right(parent, root); -+ other = parent->rb_left; -+ } -+ if ((!other->rb_left || rb_is_black(other->rb_left)) && -+ (!other->rb_right || rb_is_black(other->rb_right))) -+ { -+ rb_set_red(other); -+ node = parent; -+ parent = rb_parent(node); -+ } -+ else -+ { -+ if (!other->rb_left || rb_is_black(other->rb_left)) -+ { -+ rb_set_black(other->rb_right); -+ rb_set_red(other); -+ __rb_rotate_left(other, root); -+ other = parent->rb_left; -+ } -+ rb_set_color(other, rb_color(parent)); -+ rb_set_black(parent); -+ rb_set_black(other->rb_left); -+ __rb_rotate_right(parent, root); -+ node = root->rb_node; -+ break; -+ } -+ } -+ } -+ if (node) -+ rb_set_black(node); -+} -+ -+void rb_erase(struct rb_node *node, struct rb_root *root) -+{ -+ struct rb_node *child, *parent; -+ int color; -+ -+ if (!node->rb_left) -+ child = node->rb_right; -+ else if (!node->rb_right) -+ child = node->rb_left; -+ else -+ { -+ struct rb_node *old = node, *left; -+ -+ node = node->rb_right; -+ while ((left = node->rb_left) != NULL) -+ node = left; -+ child = node->rb_right; -+ parent = rb_parent(node); -+ color = rb_color(node); -+ -+ if (child) -+ rb_set_parent(child, parent); -+ if (parent == old) { -+ parent->rb_right = child; -+ parent = node; -+ } else -+ parent->rb_left = child; -+ -+ node->rb_parent_color = old->rb_parent_color; -+ node->rb_right = old->rb_right; -+ node->rb_left = old->rb_left; -+ -+ if (rb_parent(old)) -+ { -+ if (rb_parent(old)->rb_left == old) -+ rb_parent(old)->rb_left = node; -+ else -+ rb_parent(old)->rb_right = node; -+ } else -+ root->rb_node = node; -+ -+ rb_set_parent(old->rb_left, node); -+ if (old->rb_right) -+ rb_set_parent(old->rb_right, node); -+ goto color; -+ } -+ -+ parent = rb_parent(node); -+ color = rb_color(node); -+ -+ if (child) -+ rb_set_parent(child, parent); -+ if (parent) -+ { -+ if (parent->rb_left == node) -+ parent->rb_left = child; -+ else -+ parent->rb_right = child; -+ } -+ else -+ root->rb_node = child; -+ -+ color: -+ if (color == RB_BLACK) -+ __rb_erase_color(child, parent, root); -+} -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+struct rb_node *rb_first(const struct rb_root *root) -+{ -+ struct rb_node *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return NULL; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+ -+struct rb_node *rb_last(const struct rb_root *root) -+{ -+ struct rb_node *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return NULL; -+ while (n->rb_right) -+ n = n->rb_right; -+ return n; -+} -+ -+struct rb_node *rb_next(const struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+ if (rb_parent(node) == node) -+ return NULL; -+ -+ /* If we have a right-hand child, go down and then left as far -+ as we can. */ -+ if (node->rb_right) { -+ node = node->rb_right; -+ while (node->rb_left) -+ node=node->rb_left; -+ return (struct rb_node *)node; -+ } -+ -+ /* No right-hand children. Everything down and left is -+ smaller than us, so any 'next' node must be in the general -+ direction of our parent. Go up the tree; any time the -+ ancestor is a right-hand child of its parent, keep going -+ up. First time it's a left-hand child of its parent, said -+ parent is our 'next' node. */ -+ while ((parent = rb_parent(node)) && node == parent->rb_right) -+ node = parent; -+ -+ return parent; -+} -+ -+struct rb_node *rb_prev(const struct rb_node *node) -+{ -+ struct rb_node *parent; -+ -+ if (rb_parent(node) == node) -+ return NULL; -+ -+ /* If we have a left-hand child, go down and then right as far -+ as we can. */ -+ if (node->rb_left) { -+ node = node->rb_left; -+ while (node->rb_right) -+ node=node->rb_right; -+ return (struct rb_node *)node; -+ } -+ -+ /* No left-hand children. Go up till we find an ancestor which -+ is a right-hand child of its parent */ -+ while ((parent = rb_parent(node)) && node == parent->rb_left) -+ node = parent; -+ -+ return parent; -+} -+ -+void rb_replace_node(struct rb_node *victim, struct rb_node *new, -+ struct rb_root *root) -+{ -+ struct rb_node *parent = rb_parent(victim); -+ -+ /* Set the surrounding nodes to point to the replacement */ -+ if (parent) { -+ if (victim == parent->rb_left) -+ parent->rb_left = new; -+ else -+ parent->rb_right = new; -+ } else { -+ root->rb_node = new; -+ } -+ if (victim->rb_left) -+ rb_set_parent(victim->rb_left, new); -+ if (victim->rb_right) -+ rb_set_parent(victim->rb_right, new); -+ -+ /* Copy the pointers/colour from the victim to the replacement */ -+ *new = *victim; -+} -diff --git a/rbtree.h b/rbtree.h -new file mode 100644 -index 0000000..a8a0459 ---- /dev/null -+++ b/rbtree.h -@@ -0,0 +1,165 @@ -+/* -+ Red Black Trees -+ (C) 1999 Andrea Arcangeli -+ Taken from the Linux 2.6.30 source. -+ -+ This program is free software; you can redistribute it and/or modify -+ it under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 2 of the License, or -+ (at your option) any later version. -+ -+ This program is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ GNU General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with this program; if not, write to the Free Software -+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+ linux/include/linux/rbtree.h -+ -+ To use rbtrees you'll have to implement your own insert and search cores. -+ This will avoid us to use callbacks and to drop drammatically performances. -+ I know it's not the cleaner way, but in C (not in C++) to get -+ performances and genericity... -+ -+ Some example of insert and search follows here. The search is a plain -+ normal search over an ordered tree. The insert instead must be implemented -+ int two steps: as first thing the code must insert the element in -+ order as a red leaf in the tree, then the support library function -+ rb_insert_color() must be called. Such function will do the -+ not trivial work to rebalance the rbtree if necessary. -+ -+----------------------------------------------------------------------- -+static inline struct page * rb_search_page_cache(struct inode * inode, -+ unsigned long offset) -+{ -+ struct rb_node * n = inode->i_rb_page_cache.rb_node; -+ struct page * page; -+ -+ while (n) -+ { -+ page = rb_entry(n, struct page, rb_page_cache); -+ -+ if (offset < page->offset) -+ n = n->rb_left; -+ else if (offset > page->offset) -+ n = n->rb_right; -+ else -+ return page; -+ } -+ return NULL; -+} -+ -+static inline struct page * __rb_insert_page_cache(struct inode * inode, -+ unsigned long offset, -+ struct rb_node * node) -+{ -+ struct rb_node ** p = &inode->i_rb_page_cache.rb_node; -+ struct rb_node * parent = NULL; -+ struct page * page; -+ -+ while (*p) -+ { -+ parent = *p; -+ page = rb_entry(parent, struct page, rb_page_cache); -+ -+ if (offset < page->offset) -+ p = &(*p)->rb_left; -+ else if (offset > page->offset) -+ p = &(*p)->rb_right; -+ else -+ return page; -+ } -+ -+ rb_link_node(node, parent, p); -+ -+ return NULL; -+} -+ -+static inline struct page * rb_insert_page_cache(struct inode * inode, -+ unsigned long offset, -+ struct rb_node * node) -+{ -+ struct page * ret; -+ if ((ret = __rb_insert_page_cache(inode, offset, node))) -+ goto out; -+ rb_insert_color(node, &inode->i_rb_page_cache); -+ out: -+ return ret; -+} -+----------------------------------------------------------------------- -+*/ -+ -+#ifndef _LINUX_RBTREE_H -+#define _LINUX_RBTREE_H -+ -+#include -+ -+#define container_of(ptr, type, member) ({ \ -+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ -+ (type *)( (char *)__mptr - offsetof(type,member) );}) -+ -+struct rb_node -+{ -+ unsigned long rb_parent_color; -+#define RB_RED 0 -+#define RB_BLACK 1 -+ struct rb_node *rb_right; -+ struct rb_node *rb_left; -+} __attribute__((aligned(sizeof(long)))); -+ /* The alignment might seem pointless, but allegedly CRIS needs it */ -+ -+struct rb_root -+{ -+ struct rb_node *rb_node; -+}; -+ -+ -+#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -+#define rb_color(r) ((r)->rb_parent_color & 1) -+#define rb_is_red(r) (!rb_color(r)) -+#define rb_is_black(r) rb_color(r) -+#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -+#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) -+ -+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -+{ -+ rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -+} -+static inline void rb_set_color(struct rb_node *rb, int color) -+{ -+ rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -+} -+ -+#define RB_ROOT (struct rb_root) { NULL, } -+#define rb_entry(ptr, type, member) container_of(ptr, type, member) -+ -+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -+#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -+#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) -+ -+extern void rb_insert_color(struct rb_node *, struct rb_root *); -+extern void rb_erase(struct rb_node *, struct rb_root *); -+ -+/* Find logical next and previous nodes in a tree */ -+extern struct rb_node *rb_next(const struct rb_node *); -+extern struct rb_node *rb_prev(const struct rb_node *); -+extern struct rb_node *rb_first(const struct rb_root *); -+extern struct rb_node *rb_last(const struct rb_root *); -+ -+/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, -+ struct rb_root *root); -+ -+static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, -+ struct rb_node ** rb_link) -+{ -+ node->rb_parent_color = (unsigned long )parent; -+ node->rb_left = node->rb_right = NULL; -+ -+ *rb_link = node; -+} -+ -+#endif /* _LINUX_RBTREE_H */ --- -1.8.3.1 - diff --git a/0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch b/0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch deleted file mode 100644 index 531865f..0000000 --- a/0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch +++ /dev/null @@ -1,646 +0,0 @@ -From c62d0466b0e69ac8c724c9d917000f18aa147aae Mon Sep 17 00:00:00 2001 -From: wuyun -Date: Sat, 20 Jun 2020 20:26:22 +0800 -Subject: [PATCH] rasdaemon: add support for memory Corrected Error predictive failure analysis - -commit 9ae6b70effb8adc9572debc800b8e16173f74bb8 upstream - -Memory Corrected Error was corrected by hardware. These errors do not -require immediate software actions, but are still reported for -accounting and predictive failure analysis. - -Based on statistical results, some actions can be taken to prevent -Corrected Error from evoluting to Uncorrected Error. - -Signed-off-by: wuyun -Signed-off-by: lvying6 -Signed-off-by: Mauro Carvalho Chehab -Signed-off-by: Bixuan Cui ---- - Makefile.am | 7 +- - configure.ac | 12 ++ - man/rasdaemon.1.in | 7 + - misc/rasdaemon.env | 29 ++++ - misc/rasdaemon.service.in | 1 + - misc/rasdaemon.spec.in | 4 +- - ras-events.c | 6 + - ras-mc-handler.c | 7 + - ras-page-isolation.c | 332 ++++++++++++++++++++++++++++++++++++++ - ras-page-isolation.h | 66 ++++++++ - 10 files changed, 468 insertions(+), 3 deletions(-) - create mode 100644 misc/rasdaemon.env - create mode 100644 ras-page-isolation.c - create mode 100644 ras-page-isolation.h - -diff --git a/Makefile.am b/Makefile.am -index fccdeba..dc30ae7 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -1,6 +1,6 @@ - ACLOCAL_AMFLAGS=-I m4 - SUBDIRS = libtrace util man --SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in -+SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in misc/rasdaemon.env - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) - EXTRA_DIST = $(SYSTEMD_SERVICES_IN) - -@@ -51,13 +51,16 @@ endif - if WITH_HISI_NS_DECODE - rasdaemon_SOURCES += non-standard-hisi_hip07.c - endif -+if WITH_MEMORY_CE_PFA -+ rasdaemon_SOURCES += rbtree.c ras-page-isolation.c -+endif - - rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a - - include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ - ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ - ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ -- ras-memory-failure-handler.h -+ ras-memory-failure-handler.h rbtree.h ras-page-isolation.h - - # This rule can't be called with more than one Makefile job (like make -j8) - # I can't figure out a way to fix that -diff --git a/configure.ac b/configure.ac -index 8be33d9..1f95459 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -108,6 +108,17 @@ AS_IF([test "x$enable_hisi_ns_decode" = "xyes"], [ - ]) - AM_CONDITIONAL([WITH_HISI_NS_DECODE], [test x$enable_hisi_ns_decode = xyes]) - -+AC_ARG_ENABLE([memory_ce_pfa], -+ AS_HELP_STRING([--enable-memory-ce-pfa], [enable memory Corrected Error predictive failure analysis])) -+ -+AS_IF([test "x$enable_memory_ce_pfa" = "xyes"], [ -+ AC_DEFINE(HAVE_MEMORY_CE_PFA,1,"have memory corrected error predictive failure analysis") -+ AC_SUBST([WITH_MEMORY_CE_PFA]) -+]) -+AM_CONDITIONAL([WITH_MEMORY_CE_PFA], [test x$enable_memory_ce_pfa = xyes]) -+AM_COND_IF([WITH_MEMORY_CE_PFA], [USE_MEMORY_CE_PFA="yes"], [USE_MEMORY_CE_PFA="no"]) -+ -+ - test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc - - CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" -@@ -138,4 +149,5 @@ compile time options summary - HIP07 SAS HW errors : $enable_hisi_ns_decode - ARM events : $enable_arm - Memory Failure : $USE_MEMORY_FAILURE -+ Memory CE PFA : $enable_memory_ce_pfa - EOF -diff --git a/man/rasdaemon.1.in b/man/rasdaemon.1.in -index 834df16..833c8e1 100644 ---- a/man/rasdaemon.1.in -+++ b/man/rasdaemon.1.in -@@ -62,6 +62,13 @@ feature. - .BI "--version" - Print the program version and exit. - -+.SH CONFIG FILE -+ -+The \fBrasdaemon\fR program supports a config file to set rasdaemon systemd service -+environment variables. By default the config file is read from /etc/sysconfig/rasdaemon. -+ -+The general format is environmentname=value. -+ - .SH SEE ALSO - \fBras-mc-ctl\fR(8) - -diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env -new file mode 100644 -index 0000000..12fd766 ---- /dev/null -+++ b/misc/rasdaemon.env -@@ -0,0 +1,29 @@ -+# Page Isolation -+# Note: Run-time configuration is unsupported, service restart needed. -+# Note: this file should be installed at /etc/sysconfig/rasdaemon -+ -+# Specify the threshold of isolating buggy pages. -+# -+# Format: -+# [0-9]+[unit] -+# Notice: please make sure match this format, rasdaemon will use default value for exception input cases. -+# -+# Supported units: -+# PAGE_CE_REFRESH_CYCLE: D|d (day), H|h (hour), M|m (min), default is in hour -+# PAGE_CE_THRESHOLD: K|k (x1000), M|m (x1000k), default is none -+# -+# The two configs will only take no effect when PAGE_CE_ACTION is "off". -+PAGE_CE_REFRESH_CYCLE="24h" -+PAGE_CE_THRESHOLD="50" -+ -+# Specify the internal action in rasdaemon to exceeding a page error threshold. -+# -+# off no action -+# account only account errors -+# soft try to soft-offline page without killing any processes -+# This requires an uptodate kernel. Might not be successfull. -+# hard try to hard-offline page by killing processes -+# Requires an uptodate kernel. Might not be successfull. -+# soft-then-hard First try to soft offline, then try hard offlining. -+# Note: default offline choice is "soft". -+PAGE_CE_ACTION="soft" -diff --git a/misc/rasdaemon.service.in b/misc/rasdaemon.service.in -index be9ad5a..e73a08a 100644 ---- a/misc/rasdaemon.service.in -+++ b/misc/rasdaemon.service.in -@@ -3,6 +3,7 @@ Description=RAS daemon to log the RAS events - After=syslog.target - - [Service] -+EnvironmentFile=/etc/sysconfig/rasdaemon - ExecStart=@sbindir@/rasdaemon -f -r - ExecStartPost=@sbindir@/rasdaemon --enable - ExecStop=@sbindir@/rasdaemon --disable -diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in -index 82fae30..f5faffe 100644 ---- a/misc/rasdaemon.spec.in -+++ b/misc/rasdaemon.spec.in -@@ -36,12 +36,13 @@ an utility for reporting current error counts from the EDAC sysfs files. - %setup -q - - %build --%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-non-standard --enable-hisi-ns-decode --enable-arm -+%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-ce-pfa - - make %{?_smp_mflags} - - %install - make install DESTDIR=%{buildroot} -+install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service - rm INSTALL %{buildroot}/usr/include/*.h -@@ -54,6 +55,7 @@ rm INSTALL %{buildroot}/usr/include/*.h - %{_unitdir}/*.service - %{_sharedstatedir}/rasdaemon - %{_sysconfdir}/ras/dimm_labels.d -+%config(noreplace) %{_sysconfdir}/sysconfig/%{name} - - %changelog - -diff --git a/ras-events.c b/ras-events.c -index 27ac1ab..5113c32 100644 ---- a/ras-events.c -+++ b/ras-events.c -@@ -36,6 +36,7 @@ - #include "ras-memory-failure-handler.h" - #include "ras-record.h" - #include "ras-logger.h" -+#include "ras-page-isolation.h" - - /* - * Polling time, if read() doesn't block. Currently, trace_pipe_raw never -@@ -673,6 +674,11 @@ int handle_ras_events(int record_events) - ras->page_size = page_size; - ras->record_events = record_events; - -+#ifdef HAVE_MEMORY_CE_PFA -+ /* FIXME: enable memory isolation unconditionally */ -+ ras_page_account_init(); -+#endif -+ - rc = add_event_handler(ras, pevent, page_size, "ras", "mc_event", - ras_mc_event_handler); - if (!rc) -diff --git a/ras-mc-handler.c b/ras-mc-handler.c -index deb7e05..42b05cd 100644 ---- a/ras-mc-handler.c -+++ b/ras-mc-handler.c -@@ -23,6 +23,7 @@ - #include "ras-mc-handler.h" - #include "ras-record.h" - #include "ras-logger.h" -+#include "ras-page-isolation.h" - #include "ras-report.h" - - int ras_mc_event_handler(struct trace_seq *s, -@@ -183,6 +184,12 @@ int ras_mc_event_handler(struct trace_seq *s, - - ras_store_mc_event(ras, &ev); - -+#ifdef HAVE_MEMORY_CE_PFA -+ /* Account page corrected errors */ -+ if (!strcmp(ev.error_type, "Corrected")) -+ ras_record_page_error(ev.address, ev.error_count, now); -+#endif -+ - #ifdef HAVE_ABRT_REPORT - /* Report event to ABRT */ - ras_report_mc_event(ras, &ev); -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -new file mode 100644 -index 0000000..50e4406 ---- /dev/null -+++ b/ras-page-isolation.c -@@ -0,0 +1,332 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#include -+#include -+#include -+#include -+#include -+#include "ras-logger.h" -+#include "ras-page-isolation.h" -+ -+#define PARSED_ENV_LEN 50 -+static const struct config threshold_units[] = { -+ { "m", 1000 }, -+ { "k", 1000 }, -+ { "", 1 }, -+ {} -+}; -+ -+static const struct config cycle_units[] = { -+ { "d", 24 }, -+ { "h", 60 }, -+ { "m", 60 }, -+ { "s", 1 }, -+ {} -+}; -+ -+static struct isolation threshold = { -+ .name = "PAGE_CE_THRESHOLD", -+ .units = threshold_units, -+ .env = "50", -+ .unit = "", -+}; -+ -+static struct isolation cycle = { -+ .name = "PAGE_CE_REFRESH_CYCLE", -+ .units = cycle_units, -+ .env = "24h", -+ .unit = "h", -+}; -+ -+static const char *kernel_offline[] = { -+ [OFFLINE_SOFT] = "/sys/devices/system/memory/soft_offline_page", -+ [OFFLINE_HARD] = "/sys/devices/system/memory/hard_offline_page", -+ [OFFLINE_SOFT_THEN_HARD] = "/sys/devices/system/memory/soft_offline_page", -+}; -+ -+static const struct config offline_choice[] = { -+ { "off", OFFLINE_OFF }, -+ { "account", OFFLINE_ACCOUNT }, -+ { "soft", OFFLINE_SOFT }, -+ { "hard", OFFLINE_HARD }, -+ { "soft-then-hard", OFFLINE_SOFT_THEN_HARD }, -+ {} -+}; -+ -+static const char *page_state[] = { -+ [PAGE_ONLINE] = "online", -+ [PAGE_OFFLINE] = "offlined", -+ [PAGE_OFFLINE_FAILED] = "offline-failed", -+}; -+ -+static enum otype offline = OFFLINE_SOFT; -+static struct rb_root page_records; -+ -+static void page_offline_init(void) -+{ -+ const char *env = "PAGE_CE_ACTION"; -+ char *choice = getenv(env); -+ const struct config *c = NULL; -+ int matched = 0; -+ -+ if (choice) { -+ for (c = offline_choice; c->name; c++) { -+ if (!strcasecmp(choice, c->name)) { -+ offline = c->val; -+ matched = 1; -+ break; -+ } -+ } -+ } -+ -+ if (!matched) -+ log(TERM, LOG_INFO, "Improper %s, set to default soft\n", env); -+ -+ if (offline > OFFLINE_ACCOUNT && access(kernel_offline[offline], W_OK)) { -+ log(TERM, LOG_INFO, "Kernel does not support page offline interface\n"); -+ offline = OFFLINE_ACCOUNT; -+ } -+ -+ log(TERM, LOG_INFO, "Page offline choice on Corrected Errors is %s\n", -+ offline_choice[offline].name); -+} -+ -+static void parse_isolation_env(struct isolation *config) -+{ -+ char *env = getenv(config->name); -+ char *unit = NULL; -+ const struct config *units = NULL; -+ int i, no_unit; -+ int valid = 0; -+ int unit_matched = 0; -+ unsigned long value, tmp; -+ -+ /* check if env is vaild */ -+ if (env && strlen(env)) { -+ /* All the character before unit must be digit */ -+ for (i = 0; i < strlen(env) - 1; i++) { -+ if (!isdigit(env[i])) -+ goto parse; -+ } -+ if (sscanf(env, "%lu", &value) < 1 || !value) -+ goto parse; -+ /* check if the unit is vaild */ -+ unit = env + strlen(env) - 1; -+ /* no unit, all the character are value character */ -+ if (isdigit(*unit)) { -+ valid = 1; -+ no_unit = 1; -+ goto parse; -+ } -+ for (units = config->units; units->name; units++) { -+ /* value character and unit character are both valid */ -+ if (!strcasecmp(unit, units->name)) { -+ valid = 1; -+ no_unit = 0; -+ break; -+ } -+ } -+ } -+ -+parse: -+ /* if invalid, use default env */ -+ if (valid) { -+ config->env = env; -+ if (!no_unit) -+ config->unit = unit; -+ } else { -+ log(TERM, LOG_INFO, "Improper %s, set to default %s.\n", -+ config->name, config->env); -+ } -+ -+ /* if env value string is greater than ulong_max, truncate the last digit */ -+ sscanf(config->env, "%lu", &value); -+ for (units = config->units; units->name; units++) { -+ if (!strcasecmp(config->unit, units->name)) -+ unit_matched = 1; -+ if (unit_matched) { -+ tmp = value; -+ value *= units->val; -+ if (tmp != 0 && value / tmp != units->val) -+ config->overflow = true; -+ } -+ } -+ config->val = value; -+ /* In order to output value and unit perfectly */ -+ config->unit = no_unit ? config->unit : ""; -+} -+ -+static void parse_env_string(struct isolation *config, char *str) -+{ -+ int i; -+ -+ if (config->overflow) { -+ /* when overflow, use basic unit */ -+ for (i = 0; config->units[i].name; i++) ; -+ sprintf(str, "%lu%s", config->val, config->units[i-1].name); -+ log(TERM, LOG_INFO, "%s is set overflow(%s), truncate it\n", -+ config->name, config->env); -+ } else { -+ sprintf(str, "%s%s", config->env, config->unit); -+ } -+} -+ -+static void page_isolation_init(void) -+{ -+ char threshold_string[PARSED_ENV_LEN]; -+ char cycle_string[PARSED_ENV_LEN]; -+ /** -+ * It's unnecessary to parse threshold configuration when offline -+ * choice is off. -+ */ -+ if (offline == OFFLINE_OFF) -+ return; -+ -+ parse_isolation_env(&threshold); -+ parse_isolation_env(&cycle); -+ parse_env_string(&threshold, threshold_string); -+ parse_env_string(&cycle, cycle_string); -+ log(TERM, LOG_INFO, "Threshold of memory Corrected Errors is %s / %s\n", -+ threshold_string, cycle_string); -+} -+ -+void ras_page_account_init(void) -+{ -+ page_offline_init(); -+ page_isolation_init(); -+} -+ -+static int do_page_offline(unsigned long long addr, enum otype type) -+{ -+ FILE *offline_file; -+ int err; -+ -+ offline_file = fopen(kernel_offline[type], "w"); -+ if (!offline_file) -+ return -1; -+ -+ fprintf(offline_file, "%#llx", addr); -+ err = ferror(offline_file) ? -1 : 0; -+ fclose(offline_file); -+ -+ return err; -+} -+ -+static void page_offline(struct page_record *pr) -+{ -+ unsigned long long addr = pr->addr; -+ int ret; -+ -+ /* Offlining page is not required */ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ /* Ignore offlined pages */ -+ if (pr->offlined != PAGE_ONLINE) -+ return; -+ -+ /* Time to silence this noisy page */ -+ if (offline == OFFLINE_SOFT_THEN_HARD) { -+ ret = do_page_offline(addr, OFFLINE_SOFT); -+ if (ret < 0) -+ ret = do_page_offline(addr, OFFLINE_HARD); -+ } else { -+ ret = do_page_offline(addr, offline); -+ } -+ -+ pr->offlined = ret < 0 ? PAGE_OFFLINE_FAILED : PAGE_OFFLINE; -+ -+ log(TERM, LOG_INFO, "Result of offlining page at %#llx: %s\n", -+ addr, page_state[pr->offlined]); -+} -+ -+static void page_record(struct page_record *pr, unsigned count, time_t time) -+{ -+ unsigned long period = time - pr->start; -+ unsigned long tolerate; -+ -+ if (period >= cycle.val) { -+ /** -+ * Since we don't refresh automatically, it is possible that the period -+ * between two occurences will be longer than the pre-configured refresh cycle. -+ * In this case, we tolerate the frequency of the whole period up to -+ * the pre-configured threshold. -+ */ -+ tolerate = (period / (double)cycle.val) * threshold.val; -+ pr->count -= (tolerate > pr->count) ? pr->count : tolerate; -+ pr->start = time; -+ pr->excess = 0; -+ } -+ -+ pr->count += count; -+ if (pr->count >= threshold.val) { -+ log(TERM, LOG_INFO, "Corrected Errors at %#llx exceeded threshold\n", pr->addr); -+ -+ /** -+ * Backup ce count of current cycle to enable next round, which actually -+ * should never happen if we can disable overflow completely in the same -+ * time unit (but sadly we can't). -+ */ -+ pr->excess += pr->count; -+ pr->count = 0; -+ page_offline(pr); -+ } -+} -+ -+static struct page_record *page_lookup_insert(unsigned long long addr) -+{ -+ struct rb_node **entry = &page_records.rb_node; -+ struct rb_node *parent = NULL; -+ struct page_record *pr = NULL, *find = NULL; -+ -+ while (*entry) { -+ parent = *entry; -+ pr = rb_entry(parent, struct page_record, entry); -+ if (addr == pr->addr) { -+ return pr; -+ } else if (addr < pr->addr) { -+ entry = &(*entry)->rb_left; -+ } else { -+ entry = &(*entry)->rb_right; -+ } -+ } -+ -+ find = calloc(1, sizeof(struct page_record)); -+ if (!find) { -+ log(TERM, LOG_ERR, "No memory for page records\n"); -+ return NULL; -+ } -+ -+ find->addr = addr; -+ rb_link_node(&find->entry, parent, entry); -+ rb_insert_color(&find->entry, &page_records); -+ -+ return find; -+} -+ -+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) -+{ -+ struct page_record *pr = NULL; -+ -+ if (offline == OFFLINE_OFF) -+ return; -+ -+ pr = page_lookup_insert(addr & PAGE_MASK); -+ if (pr) { -+ if (!pr->start) -+ pr->start = time; -+ page_record(pr, count, time); -+ } -+} -diff --git a/ras-page-isolation.h b/ras-page-isolation.h -new file mode 100644 -index 0000000..3d03cef ---- /dev/null -+++ b/ras-page-isolation.h -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+*/ -+ -+#ifndef __RAS_PAGE_ISOLATION_H -+#define __RAS_PAGE_ISOLATION_H -+ -+#include -+#include -+#include "rbtree.h" -+ -+#define PAGE_SHIFT 12 -+#define PAGE_SIZE (1 << PAGE_SHIFT) -+#define PAGE_MASK (~(PAGE_SIZE-1)) -+ -+struct config { -+ char *name; -+ unsigned long val; -+}; -+ -+enum otype { -+ OFFLINE_OFF, -+ OFFLINE_ACCOUNT, -+ OFFLINE_SOFT, -+ OFFLINE_HARD, -+ OFFLINE_SOFT_THEN_HARD, -+}; -+ -+enum pstate { -+ PAGE_ONLINE, -+ PAGE_OFFLINE, -+ PAGE_OFFLINE_FAILED, -+}; -+ -+struct page_record { -+ struct rb_node entry; -+ unsigned long long addr; -+ time_t start; -+ enum pstate offlined; -+ unsigned long count; -+ unsigned long excess; -+}; -+ -+struct isolation { -+ char *name; -+ char *env; -+ const struct config *units; -+ unsigned long val; -+ bool overflow; -+ char *unit; -+}; -+ -+void ras_page_account_init(void); -+void ras_record_page_error(unsigned long long addr, unsigned count, time_t time); -+ -+#endif --- -2.27.0 - diff --git a/0023-rasdaemon-Add-notification-support-when-page-goes-of.patch b/0023-rasdaemon-Add-notification-support-when-page-goes-of.patch deleted file mode 100644 index 9132d67..0000000 --- a/0023-rasdaemon-Add-notification-support-when-page-goes-of.patch +++ /dev/null @@ -1,259 +0,0 @@ -From 07c3c72d18e5c7da2109b5afa918966733039f13 Mon Sep 17 00:00:00 2001 -From: Bixuan Cui -Date: Sun, 5 Jun 2022 02:10:24 +0800 -Subject: [PATCH] rasdaemon: Add notification support when page goes offline for Memory Corrected Error - -When the page goes offline, it may affect the user's processes. -The user needs to do some special actions (such as restarting the -process) before or after going offline. - -So add page-ce-offline-pre-notice and page-ce-offline-post-notice -to env file of rasdaemon for notifying the user when doing page -offline. - -Signed-off-by: Bixuan Cui ---- - Makefile.am | 2 +- - misc/notices/page-ce-offline-post-notice | 17 +++++ - misc/notices/page-ce-offline-pre-notice | 17 +++++ - misc/rasdaemon.env | 4 ++ - misc/rasdaemon.spec.in | 3 + - ras-page-isolation.c | 90 ++++++++++++++++++++++++ - 6 files changed, 132 insertions(+), 1 deletion(-) - create mode 100755 misc/notices/page-ce-offline-post-notice - create mode 100755 misc/notices/page-ce-offline-pre-notice - -diff --git a/Makefile.am b/Makefile.am -index de76301..701b120 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -1,6 +1,6 @@ - ACLOCAL_AMFLAGS=-I m4 - SUBDIRS = libtrace util man --SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in misc/rasdaemon.env -+SYSTEMD_SERVICES_IN = misc/rasdaemon.service.in misc/ras-mc-ctl.service.in misc/rasdaemon.env misc/notices - SYSTEMD_SERVICES = $(SYSTEMD_SERVICES_IN:.service.in=.service) - EXTRA_DIST = $(SYSTEMD_SERVICES_IN) - -diff --git a/misc/notices/page-ce-offline-post-notice b/misc/notices/page-ce-offline-post-notice -new file mode 100755 -index 0000000..d78b1b0 ---- /dev/null -+++ b/misc/notices/page-ce-offline-post-notice -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon after a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-post-notice.local ] && . ./page-ce-offline-post-notice.local $1 -+ -+if [ -d page-ce-offline-post-notice.extern ] -+then -+ ls page-ce-offline-post-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-post-notice.extern/$item ] && . ./page-ce-offline-post-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff --git a/misc/notices/page-ce-offline-pre-notice b/misc/notices/page-ce-offline-pre-notice -new file mode 100755 -index 0000000..d1038a3 ---- /dev/null -+++ b/misc/notices/page-ce-offline-pre-notice -@@ -0,0 +1,17 @@ -+#!/bin/sh -+# This shell script can be executed by rasdaemon before a page goes offline. -+ -+cd `dirname $0` -+ -+[ -x ./page-ce-offline-pre-notice.local ] && . ./page-ce-offline-pre-notice.local $1 -+ -+if [ -d page-ce-offline-pre-notice.extern ] -+then -+ ls page-ce-offline-pre-notice.extern | -+ while read item -+ do -+ [ -x ./page-ce-offline-pre-notice.extern/$item ] && . ./page-ce-offline-pre-notice.extern/$item $1 -+ done -+fi -+ -+exit 0 -diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env -index 12fd766..713875a 100644 ---- a/misc/rasdaemon.env -+++ b/misc/rasdaemon.env -@@ -27,3 +27,7 @@ PAGE_CE_THRESHOLD="50" - # soft-then-hard First try to soft offline, then try hard offlining. - # Note: default offline choice is "soft". - PAGE_CE_ACTION="soft" -+ -+# Notices script when doing memory offline -+PAGE_CE_OFFLINE_PRE_NOTICE="page-ce-offline-pre-notice" -+PAGE_CE_OFFLINE_POST_NOTICE="page-ce-offline-post-notice" -diff --git a/misc/rasdaemon.spec.in b/misc/rasdaemon.spec.in -index eff9794..f690575 100644 ---- a/misc/rasdaemon.spec.in -+++ b/misc/rasdaemon.spec.in -@@ -45,6 +45,8 @@ make install DESTDIR=%{buildroot} - install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} - install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service - install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service -+install -d %{buildroot}%{_sysconfdir}/rasdaemon_notices/ -+install -D -p -m 0755 misc/notices/* %{buildroot}%{_sysconfdir}/rasdaemon_notices/ - rm INSTALL %{buildroot}/usr/include/*.h - - %files -@@ -56,6 +58,7 @@ rm INSTALL %{buildroot}/usr/include/*.h - %{_sharedstatedir}/rasdaemon - %{_sysconfdir}/ras/dimm_labels.d - %config(noreplace) %{_sysconfdir}/sysconfig/%{name} -+%config(noreplace) %{_sysconfdir}/rasdaemon_notices/* - - %changelog - -diff --git a/ras-page-isolation.c b/ras-page-isolation.c -index 50e4406..f4f3bc1 100644 ---- a/ras-page-isolation.c -+++ b/ras-page-isolation.c -@@ -17,9 +17,13 @@ - #include - #include - #include -+#include -+#include -+#include - #include "ras-logger.h" - #include "ras-page-isolation.h" - -+#define MAX_PATH_LEN 64 - #define PARSED_ENV_LEN 50 - static const struct config threshold_units[] = { - { "m", 1000 }, -@@ -73,6 +77,8 @@ static const char *page_state[] = { - - static enum otype offline = OFFLINE_SOFT; - static struct rb_root page_records; -+static char pre_notice[MAX_PATH_LEN]; -+static char post_notice[MAX_PATH_LEN]; - - static void page_offline_init(void) - { -@@ -202,16 +208,94 @@ static void page_isolation_init(void) - threshold_string, cycle_string); - } - -+static void page_notice_init(void) -+{ -+ char *notice_root = "/etc/rasdaemon_notices"; -+ char *pre_re = getenv("PAGE_CE_OFFLINE_PRE_NOTICE"); -+ char *post_re = getenv("PAGE_CE_OFFLINE_POST_NOTICE"); -+ -+ if (offline <= OFFLINE_ACCOUNT) -+ return; -+ -+ snprintf(pre_notice, sizeof(pre_notice), "%s/%s", notice_root, pre_re); -+ if (access(pre_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", pre_notice); -+ -+ snprintf(post_notice, sizeof(post_notice), "%s/%s", notice_root, post_re); -+ if (access(post_notice, R_OK|X_OK) < 0) -+ log(TERM, LOG_ERR, "cannot access page notice '%s'\n", post_notice); -+} -+ - void ras_page_account_init(void) - { - page_offline_init(); - page_isolation_init(); -+ page_notice_init(); -+} -+ -+static void finish_child(pid_t child, int status) -+{ -+ if (WIFEXITED(status) && WEXITSTATUS(status)) { -+ log(TERM, LOG_INFO, "notice exited with status %d\n", WEXITSTATUS(status)); -+ } else if (WIFSIGNALED(status)) { -+ log(TERM, LOG_INFO,"notice died with signal %s\n", strsignal(WTERMSIG(status))); -+ } -+ -+ return; -+} -+ -+static void __run_notice(char *argv[], char **env) -+{ -+ pid_t child; -+ int status; -+ -+ child = fork(); -+ if (child < 0) { -+ log(TERM, LOG_ERR, "Cannot create process for offline notice"); -+ return; -+ } -+ if (child == 0) { -+ execve(argv[0], argv, env); -+ _exit(127); -+ } -+ else { -+ waitpid(child, &status, 0); -+ finish_child(child, status); -+ } -+} -+ -+static void run_notice(char *argv[]) -+{ -+ int MAX_ENV = 20; -+ char *env[MAX_ENV]; -+ int ei = 0; -+ int i; -+ -+ asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin"); -+ env[ei] = NULL; -+ assert(ei < MAX_ENV); -+ -+ __run_notice(argv, env); -+ -+ for (i = 0; i < ei; i++) -+ free(env[i]); - } - - static int do_page_offline(unsigned long long addr, enum otype type) - { - FILE *offline_file; - int err; -+ char *args; -+ char *argv[] = { -+ NULL, -+ NULL, -+ NULL, -+ }; -+ -+ asprintf(&args, "%llu", addr); -+ argv[0] = (char*)&pre_notice; -+ argv[1] = args; -+ run_notice(argv); - - offline_file = fopen(kernel_offline[type], "w"); - if (!offline_file) -@@ -221,6 +305,11 @@ static int do_page_offline(unsigned long long addr, enum otype type) - err = ferror(offline_file) ? -1 : 0; - fclose(offline_file); - -+ argv[0] = (char*)&post_notice; -+ run_notice(argv); -+ -+ free(args); -+ - return err; - } - -@@ -329,4 +418,5 @@ void ras_record_page_error(unsigned long long addr, unsigned count, time_t time) - pr->start = time; - page_record(pr, count, time); - } -+ - } --- -2.27.0 - diff --git a/1001-rasdaemon-avoid-multiple-definitions.patch b/1001-rasdaemon-avoid-multiple-definitions.patch deleted file mode 100644 index 109587b..0000000 --- a/1001-rasdaemon-avoid-multiple-definitions.patch +++ /dev/null @@ -1,24 +0,0 @@ -commit fd982af0a307edc5d3e56011d2e045015b1efd4b -Author: Mauro Carvalho Chehab -Date: Mon Mar 30 01:22:24 2020 +0200 - - ras-record.h: define an external var as such - - Otherwise, newer versions of gcc will produce multiple symbols, - causing link breakages. - - Signed-off-by: Mauro Carvalho Chehab - -diff --git a/ras-record.h b/ras-record.h -index 5311c67caf44..0d2a481c23dd 100644 ---- a/ras-record.h -+++ b/ras-record.h -@@ -25,7 +25,7 @@ - - extern long user_hz; - --struct ras_events *ras; -+extern struct ras_events *ras; - - struct ras_mc_event { - char timestamp[64]; diff --git a/899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch b/899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch new file mode 100644 index 0000000..8f26b51 --- /dev/null +++ b/899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch @@ -0,0 +1,71 @@ +commit 899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d +Author: Aristeu Rozanski +Date: Thu Jan 19 08:45:57 2023 -0500 + + rasdaemon: ras-report: fix possible but unlikely file descriptor leak + + Found with covscan. + + Signed-off-by: Aristeu Rozanski + Signed-off-by: Mauro Carvalho Chehab + +--- + ras-report.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- rasdaemon-0.6.1.orig/ras-report.c 2023-01-23 11:36:20.972368760 -0500 ++++ rasdaemon-0.6.1/ras-report.c 2023-01-23 11:36:23.236343267 -0500 +@@ -374,7 +374,7 @@ if(rc < 0){ + + mc_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -424,7 +424,7 @@ if(rc < 0){ + + aer_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -473,7 +473,7 @@ rc = 0; + + non_standard_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -518,7 +518,7 @@ rc = 0; + + arm_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -564,7 +564,7 @@ if(rc < 0){ + + mce_fail: + +- if(sockfd > 0){ ++ if(sockfd >= 0){ + close(sockfd); + } + +@@ -609,7 +609,7 @@ if (rc < 0) + done = 1; + + mf_fail: +- if (sockfd > 0) ++ if (sockfd >= 0) + close(sockfd); + + if (done) diff --git a/ce33041e0abfa20054ff5d6874ffbd1ab592558d.patch b/ce33041e0abfa20054ff5d6874ffbd1ab592558d.patch new file mode 100644 index 0000000..49b4ee1 --- /dev/null +++ b/ce33041e0abfa20054ff5d6874ffbd1ab592558d.patch @@ -0,0 +1,28 @@ +commit ce33041e0abfa20054ff5d6874ffbd1ab592558d +Author: Aristeu Rozanski +Date: Thu Jan 19 08:45:57 2023 -0500 + + rasdaemon: ras-memory-failure-handler: handle localtime() failure correctly + + We could just have an empty string but keeping the format could prevent + issues if someone is actually parsing this. + Found with covscan. + + v2: fixed the timestamp as pointed by Robert Elliott + + Signed-off-by: Aristeu Rozanski + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +index 9941e68..1951456 100644 +--- a/ras-memory-failure-handler.c ++++ b/ras-memory-failure-handler.c +@@ -148,6 +148,8 @@ int ras_memory_failure_event_handler(struct trace_seq *s, + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); ++ else ++ strncpy(ev.timestamp, "1970-01-01 00:00:00 +0000", sizeof(ev.timestamp)); + trace_seq_printf(s, "%s ", ev.timestamp); + + if (pevent_get_field_val(s, event, "pfn", record, &val, 1) < 0) diff --git a/dist b/dist new file mode 100644 index 0000000..9c0e36e --- /dev/null +++ b/dist @@ -0,0 +1 @@ +an8 diff --git a/e8b97ec14a11764fedfea50bd4d96ddda43c7fc1.patch b/e8b97ec14a11764fedfea50bd4d96ddda43c7fc1.patch new file mode 100644 index 0000000..9135e08 --- /dev/null +++ b/e8b97ec14a11764fedfea50bd4d96ddda43c7fc1.patch @@ -0,0 +1,24 @@ +commit e8b97ec14a11764fedfea50bd4d96ddda43c7fc1 +Author: Aristeu Rozanski +Date: Thu Jan 19 08:45:57 2023 -0500 + + rasdaemon: mce-amd-smca: properly limit bank types + + Found with covscan. + + Signed-off-by: Aristeu Rozanski + Signed-off-by: Mauro Carvalho Chehab + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index f3379fc..27ca8aa 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -584,7 +584,7 @@ static void decode_smca_error(struct mce_event *e) + return; + } + +- if (bank_type >= MAX_NR_BANKS) { ++ if (bank_type >= N_SMCA_BANK_TYPES) { + strcpy(e->mcastatus_msg, "Don't know how to decode this bank"); + return; + } diff --git a/rasdaemon.spec b/rasdaemon.spec index 30ed82d..0875add 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,7 +1,6 @@ -%define anolis_release .0.2 Name: rasdaemon Version: 0.6.1 -Release: 12%{anolis_release}%{?dist} +Release: 13%{?dist} Summary: Utility to receive RAS error tracings Group: Applications/System License: GPLv2 @@ -45,14 +44,9 @@ Patch17: 16d929b024c31d54a7f8a72eab094376c7be27f5.patch Patch18: b497a3d6a39d402c41065e9284d49114b97e3bfe.patch Patch19: ce6e7864f11f709c4f803828fbc8e507d115d03b.patch Patch20: a8c776ed94f68ae31d7b5f74e19545698898c13c.patch -Patch21: 0021-rasdaemon-add-rbtree-support-for-page-record.patch -Patch22: 0022-rasdaemon-add-support-for-memory-Corrected-Error-pre.patch -Patch23: 0023-rasdaemon-Add-notification-support-when-page-goes-of.patch - -# Begin: Anolis customized patches -# Backport from fc32 to fix FTBFS on gcc10 -Patch1001: 1001-rasdaemon-avoid-multiple-definitions.patch -# End: Anolis customized patches +Patch21: 899fcc2cf21c86b5462c8f4441cd9c92b3d75f7d.patch +Patch22: e8b97ec14a11764fedfea50bd4d96ddda43c7fc1.patch +Patch23: ce33041e0abfa20054ff5d6874ffbd1ab592558d.patch %description %{name} is a RAS (Reliability, Availability and Serviceability) logging tool. @@ -90,15 +84,13 @@ an utility for reporting current error counts from the EDAC sysfs files. %patch22 -p1 %patch23 -p1 -%patch1001 -p1 - # The tarball is locked in time the first time aclocal was ran and will keep # requiring an older version of automake autoreconf -vfi %build %ifarch %{arm} aarch64 -%configure --enable-aer --enable-sqlite3 --enable-abrt-report --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-ce-pfa +%configure --enable-aer --enable-sqlite3 --enable-abrt-report --enable-non-standard --enable-hisi-ns-decode --enable-arm %else %configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-memory-failure %endif @@ -109,11 +101,6 @@ make install DESTDIR=%{buildroot} install -D -p -m 0644 misc/rasdaemon.service %{buildroot}/%{_unitdir}/rasdaemon.service install -D -p -m 0644 misc/ras-mc-ctl.service %{buildroot}%{_unitdir}/ras-mc-ctl.service install -D -p -m 0655 labels/* %{buildroot}%{_sysconfdir}/ras/dimm_labels.d -%ifarch %{arm} aarch64 -install -D -p -m 0644 misc/rasdaemon.env %{buildroot}%{_sysconfdir}/sysconfig/%{name} -install -d %{buildroot}%{_sysconfdir}/rasdaemon_notices/ -install -D -p -m 0755 misc/notices/* %{buildroot}%{_sysconfdir}/rasdaemon_notices/ -%endif rm INSTALL %{buildroot}/usr/include/*.h %files @@ -124,18 +111,10 @@ rm INSTALL %{buildroot}/usr/include/*.h %{_unitdir}/*.service %{_sharedstatedir}/rasdaemon %{_sysconfdir}/ras/dimm_labels.d -%ifarch %{arm} aarch64 -%config(noreplace) %{_sysconfdir}/sysconfig/%{name} -%config(noreplace) %{_sysconfdir}/rasdaemon_notices/* -%endif %changelog -* Tue Jun 28 2022 Bixuan Cui - 0.6.1-12.0.2 -- rasdaemon: add support for memory Corrected Error predictive failure analysis -- rasdaemon: add notification support when page goes offline for Memory Corrected Error - -* Fri Apr 22 2022 Weitao Zhou - 0.6.1-12.0.1 -- use extern in header files when declaring global variables for compatible gcc10 build +* Mon Jan 23 2023 Aristeu Rozanski 0.6.1-13 +- Fixing covscan issues [2073516] * Tue Oct 12 2021 Aristeu Rozanski 0.6.1-12 - Adding missing bits from b497a3d6a39d402c41065e9284d49114b97e3bfe [1923254] -- Gitee