From 463e8e4d56289bb3160e0693f725e689c94bcfa1 Mon Sep 17 00:00:00 2001 From: Xiaofei Tan Date: Fri, 3 Dec 2021 11:26:06 +0800 Subject: [PATCH] Backport the feature of memory failure events support Only one patch: rasdaemon: add support for memory_failure events Signed-off-by: Xiaofei Tan --- ...dd-support-for-memory_failure-events.patch | 634 ++++++++++++++++++ rasdaemon.spec | 11 +- 2 files changed, 643 insertions(+), 2 deletions(-) create mode 100644 backport-rasdaemon-add-support-for-memory_failure-events.patch diff --git a/backport-rasdaemon-add-support-for-memory_failure-events.patch b/backport-rasdaemon-add-support-for-memory_failure-events.patch new file mode 100644 index 0000000..c67cd43 --- /dev/null +++ b/backport-rasdaemon-add-support-for-memory_failure-events.patch @@ -0,0 +1,634 @@ +From efb2a994b1e24c1e6645ec0dee27d8b3a7deae92 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Tue, 30 Nov 2021 19:50:06 +0800 +Subject: [PATCH] rasdaemon: add support for memory_failure events + +Add support to log the memory_failure kernel trace +events. + +Example rasdaemon log and SQLite DB output for the +memory_failure event, +================================================= +rasdaemon: memory_failure_event store: 0x126ce8f8 +rasdaemon: register inserted at db +<...>-785 [000] 0.000024: memory_failure_event: 2020-10-02 +13:27:13 -0400 pfn=0x204000000 page_type=free buddy page +action_result=Delayed + +CREATE TABLE memory_failure_event (id INTEGER PRIMARY KEY, timestamp +TEXT, pfn TEXT, page_type TEXT, action_result TEXT); +INSERT INTO memory_failure_event VALUES(1,'2020-10-02 13:27:13 +-0400','0x204000000','free buddy page','Delayed'); +================================================== + +Signed-off-by: Shiju Jose +Signed-off-by: Mauro Carvalho Chehab +--- + .travis.yml | 2 +- + Makefile.am | 5 +- + configure.ac | 11 +++ + ras-events.c | 15 +++ + ras-events.h | 1 + + ras-memory-failure-handler.c | 179 +++++++++++++++++++++++++++++++++++ + ras-memory-failure-handler.h | 25 +++++ + ras-record.c | 70 ++++++++++++++ + ras-record.h | 13 +++ + ras-report.c | 68 +++++++++++++ + ras-report.h | 2 + + 11 files changed, 389 insertions(+), 2 deletions(-) + create mode 100644 ras-memory-failure-handler.c + create mode 100644 ras-memory-failure-handler.h + +diff --git a/.travis.yml b/.travis.yml +index 5ab3957..b865e1d 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -20,7 +20,7 @@ before_install: + - sudo apt-get install -y sqlite3 + install: + - autoreconf -vfi +-- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa --enable-cpu-fault-isolation ++- ./configure --enable-sqlite3 --enable-aer --enable-non-standard --enable-arm --enable-mce --enable-extlog --enable-devlink --enable-diskerror --enable-abrt-report --enable-hisi-ns-decode --enable-memory-ce-pfa --enable-cpu-fault-isolation --enable-memory-failure + + script: + - make && sudo make install +diff --git a/Makefile.am b/Makefile.am +index 61dc2cc..a032352 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -48,6 +48,9 @@ endif + if WITH_DISKERROR + rasdaemon_SOURCES += ras-diskerror-handler.c + endif ++if WITH_MEMORY_FAILURE ++ rasdaemon_SOURCES += ras-memory-failure-handler.c ++endif + if WITH_ABRT_REPORT + rasdaemon_SOURCES += ras-report.c + endif +@@ -66,7 +69,7 @@ include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ + ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ + ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ + ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ +- ras-cpu-isolation.h queue.h ++ ras-cpu-isolation.h queue.h non-standard-hisilicon.h ras-memory-failure-handler.h + + # This rule can't be called with more than one Makefile job (like make -j8) + # I can't figure out a way to fix that +diff --git a/configure.ac b/configure.ac +index a682bb9..fd67be8 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -111,6 +111,16 @@ AS_IF([test "x$enable_diskerror" = "xyes" || test "x$enable_all" == "xyes"], [ + AM_CONDITIONAL([WITH_DISKERROR], [test x$enable_diskerror = xyes || test x$enable_all == xyes]) + AM_COND_IF([WITH_DISKERROR], [USE_DISKERROR="yes"], [USE_DISKERROR="no"]) + ++AC_ARG_ENABLE([memory_failure], ++ AS_HELP_STRING([--enable-memory-failure], [enable memory failure events (currently experimental)])) ++ ++AS_IF([test "x$enable_memory_failure" = "xyes" || test "x$enable_all" == "xyes"], [ ++ AC_DEFINE(HAVE_MEMORY_FAILURE,1,"have memory failure events collect") ++ AC_SUBST([WITH_MEMORY_FAILURE]) ++]) ++AM_CONDITIONAL([WITH_MEMORY_FAILURE], [test x$enable_memory_failure = xyes || test x$enable_all == xyes]) ++AM_COND_IF([WITH_MEMORY_FAILURE], [USE_MEMORY_FAILURE="yes"], [USE_MEMORY_FAILURE="no"]) ++ + AC_ARG_ENABLE([abrt_report], + AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)])) + +@@ -182,6 +192,7 @@ compile time options summary + ARM events : $USE_ARM + DEVLINK : $USE_DEVLINK + Disk I/O errors : $USE_DISKERROR ++ Memory Failure : $USE_MEMORY_FAILURE + Memory CE PFA : $USE_MEMORY_CE_PFA + CPU fault isolation : $USE_CPU_FAULT_ISOLATION + EOF +diff --git a/ras-events.c b/ras-events.c +index 31c4170..92ae2c8 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -37,6 +37,7 @@ + #include "ras-extlog-handler.h" + #include "ras-devlink-handler.h" + #include "ras-diskerror-handler.h" ++#include "ras-memory-failure-handler.h" + #include "ras-record.h" + #include "ras-logger.h" + #include "ras-page-isolation.h" +@@ -256,6 +257,10 @@ int toggle_ras_mc_event(int enable) + rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable); + #endif + ++#ifdef HAVE_MEMORY_FAILURE ++ rc |= __toggle_ras_mc_event(ras, "ras", "memory_failure_event", enable); ++#endif ++ + free_ras: + free(ras); + return rc; +@@ -938,6 +943,16 @@ int handle_ras_events(int record_events) + } + #endif + ++#ifdef HAVE_MEMORY_FAILURE ++ rc = add_event_handler(ras, pevent, page_size, "ras", "memory_failure_event", ++ ras_memory_failure_event_handler, NULL, MF_EVENT); ++ if (!rc) ++ num_events++; ++ else ++ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n", ++ "ras", "memory_failure_event"); ++#endif ++ + if (!num_events) { + log(ALL, LOG_INFO, + "Failed to trace all supported RAS events. Aborting.\n"); +diff --git a/ras-events.h b/ras-events.h +index f028741..dfd690c 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -38,6 +38,7 @@ enum { + EXTLOG_EVENT, + DEVLINK_EVENT, + DISKERROR_EVENT, ++ MF_EVENT, + NR_EVENTS + }; + +diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c +new file mode 100644 +index 0000000..9941e68 +--- /dev/null ++++ b/ras-memory-failure-handler.c +@@ -0,0 +1,179 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include "libtrace/kbuffer.h" ++#include "ras-memory-failure-handler.h" ++#include "ras-record.h" ++#include "ras-logger.h" ++#include "ras-report.h" ++ ++/* Memory failure - various types of pages */ ++enum mf_action_page_type { ++ MF_MSG_KERNEL, ++ MF_MSG_KERNEL_HIGH_ORDER, ++ MF_MSG_SLAB, ++ MF_MSG_DIFFERENT_COMPOUND, ++ MF_MSG_POISONED_HUGE, ++ MF_MSG_HUGE, ++ MF_MSG_FREE_HUGE, ++ MF_MSG_NON_PMD_HUGE, ++ MF_MSG_UNMAP_FAILED, ++ MF_MSG_DIRTY_SWAPCACHE, ++ MF_MSG_CLEAN_SWAPCACHE, ++ MF_MSG_DIRTY_MLOCKED_LRU, ++ MF_MSG_CLEAN_MLOCKED_LRU, ++ MF_MSG_DIRTY_UNEVICTABLE_LRU, ++ MF_MSG_CLEAN_UNEVICTABLE_LRU, ++ MF_MSG_DIRTY_LRU, ++ MF_MSG_CLEAN_LRU, ++ MF_MSG_TRUNCATED_LRU, ++ MF_MSG_BUDDY, ++ MF_MSG_BUDDY_2ND, ++ MF_MSG_DAX, ++ MF_MSG_UNSPLIT_THP, ++ MF_MSG_UNKNOWN, ++}; ++ ++/* Action results for various types of pages */ ++enum mf_action_result { ++ MF_IGNORED, /* Error: cannot be handled */ ++ MF_FAILED, /* Error: handling failed */ ++ MF_DELAYED, /* Will be handled later */ ++ MF_RECOVERED, /* Successfully recovered */ ++}; ++ ++/* memory failure page types */ ++static const struct { ++ int type; ++ const char *page_type; ++} mf_page_type[] = { ++ { MF_MSG_KERNEL, "reserved kernel page" }, ++ { MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page"}, ++ { MF_MSG_SLAB, "kernel slab page"}, ++ { MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking"}, ++ { MF_MSG_POISONED_HUGE, "huge page already hardware poisoned"}, ++ { MF_MSG_HUGE, "huge page"}, ++ { MF_MSG_FREE_HUGE, "free huge page"}, ++ { MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page"}, ++ { MF_MSG_UNMAP_FAILED, "unmapping failed page"}, ++ { MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page"}, ++ { MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page"}, ++ { MF_MSG_DIRTY_MLOCKED_LRU, "dirty mlocked LRU page"}, ++ { MF_MSG_CLEAN_MLOCKED_LRU, "clean mlocked LRU page"}, ++ { MF_MSG_DIRTY_UNEVICTABLE_LRU, "dirty unevictable LRU page"}, ++ { MF_MSG_CLEAN_UNEVICTABLE_LRU, "clean unevictable LRU page"}, ++ { MF_MSG_DIRTY_LRU, "dirty LRU page"}, ++ { MF_MSG_CLEAN_LRU, "clean LRU page"}, ++ { MF_MSG_TRUNCATED_LRU, "already truncated LRU page"}, ++ { MF_MSG_BUDDY, "free buddy page"}, ++ { MF_MSG_BUDDY_2ND, "free buddy page (2nd try)"}, ++ { MF_MSG_DAX, "dax page"}, ++ { MF_MSG_UNSPLIT_THP, "unsplit thp"}, ++ { MF_MSG_UNKNOWN, "unknown page"}, ++}; ++ ++/* memory failure action results */ ++static const struct { ++ int result; ++ const char *action_result; ++} mf_action_result[] = { ++ { MF_IGNORED, "Ignored" }, ++ { MF_FAILED, "Failed" }, ++ { MF_DELAYED, "Delayed" }, ++ { MF_RECOVERED, "Recovered" }, ++}; ++ ++static const char *get_page_type(int page_type) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(mf_page_type); i++) ++ if (mf_page_type[i].type == page_type) ++ return mf_page_type[i].page_type; ++ ++ return "unknown page"; ++} ++ ++static const char *get_action_result(int result) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(mf_action_result); i++) ++ if (mf_action_result[i].result == result) ++ return mf_action_result[i].action_result; ++ ++ return "unknown"; ++} ++ ++ ++int ras_memory_failure_event_handler(struct trace_seq *s, ++ struct pevent_record *record, ++ struct event_format *event, void *context) ++{ ++ unsigned long long val; ++ struct ras_events *ras = context; ++ time_t now; ++ struct tm *tm; ++ struct ras_mf_event ev; ++ ++ /* ++ * Newer kernels (3.10-rc1 or upper) provide an uptime clock. ++ * On previous kernels, the way to properly generate an event would ++ * be to inject a fake one, measure its timestamp and diff it against ++ * gettimeofday. We won't do it here. Instead, let's use uptime, ++ * falling-back to the event report's time, if "uptime" clock is ++ * not available (legacy kernels). ++ */ ++ ++ if (ras->use_uptime) ++ now = record->ts/user_hz + ras->uptime_diff; ++ else ++ now = time(NULL); ++ ++ tm = localtime(&now); ++ if (tm) ++ strftime(ev.timestamp, sizeof(ev.timestamp), ++ "%Y-%m-%d %H:%M:%S %z", tm); ++ trace_seq_printf(s, "%s ", ev.timestamp); ++ ++ if (pevent_get_field_val(s, event, "pfn", record, &val, 1) < 0) ++ return -1; ++ sprintf(ev.pfn, "0x%llx", val); ++ trace_seq_printf(s, "pfn=0x%llx ", val); ++ ++ if (pevent_get_field_val(s, event, "type", record, &val, 1) < 0) ++ return -1; ++ ev.page_type = get_page_type(val); ++ trace_seq_printf(s, "page_type=%s ", ev.page_type); ++ ++ if (pevent_get_field_val(s, event, "result", record, &val, 1) < 0) ++ return -1; ++ ev.action_result = get_action_result(val); ++ trace_seq_printf(s, "action_result=%s ", ev.action_result); ++ ++ /* Store data into the SQLite DB */ ++#ifdef HAVE_SQLITE3 ++ ras_store_mf_event(ras, &ev); ++#endif ++ ++#ifdef HAVE_ABRT_REPORT ++ /* Report event to ABRT */ ++ ras_report_mf_event(ras, &ev); ++#endif ++ ++ return 0; ++} +diff --git a/ras-memory-failure-handler.h b/ras-memory-failure-handler.h +new file mode 100644 +index 0000000..b9e9971 +--- /dev/null ++++ b/ras-memory-failure-handler.h +@@ -0,0 +1,25 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2020. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++*/ ++ ++#ifndef __RAS_MEMORY_FAILURE_HANDLER_H ++#define __RAS_MEMORY_FAILURE_HANDLER_H ++ ++#include "ras-events.h" ++#include "libtrace/event-parse.h" ++ ++int ras_memory_failure_event_handler(struct trace_seq *s, ++ struct pevent_record *record, ++ struct event_format *event, void *context); ++ ++#endif +diff --git a/ras-record.c b/ras-record.c +index 33d4741..27863c7 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -506,6 +506,56 @@ int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev + } + #endif + ++/* ++ * Table and functions to handle ras:memory_failure ++ */ ++ ++#ifdef HAVE_MEMORY_FAILURE ++static const struct db_fields mf_event_fields[] = { ++ { .name="id", .type="INTEGER PRIMARY KEY" }, ++ { .name="timestamp", .type="TEXT" }, ++ { .name="pfn", .type="TEXT" }, ++ { .name="page_type", .type="TEXT" }, ++ { .name="action_result", .type="TEXT" }, ++}; ++ ++static const struct db_table_descriptor mf_event_tab = { ++ .name = "memory_failure_event", ++ .fields = mf_event_fields, ++ .num_fields = ARRAY_SIZE(mf_event_fields), ++}; ++ ++int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) ++{ ++ int rc; ++ struct sqlite3_priv *priv = ras->db_priv; ++ ++ if (!priv || !priv->stmt_mf_event) ++ return 0; ++ log(TERM, LOG_INFO, "memory_failure_event store: %p\n", priv->stmt_mf_event); ++ ++ sqlite3_bind_text(priv->stmt_mf_event, 1, ev->timestamp, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mf_event, 2, ev->pfn, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mf_event, 3, ev->page_type, -1, NULL); ++ sqlite3_bind_text(priv->stmt_mf_event, 4, ev->action_result, -1, NULL); ++ ++ rc = sqlite3_step(priv->stmt_mf_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed to do memory_failure_event step on sqlite: error = %d\n", rc); ++ ++ rc = sqlite3_reset(priv->stmt_mf_event); ++ if (rc != SQLITE_OK && rc != SQLITE_DONE) ++ log(TERM, LOG_ERR, ++ "Failed reset memory_failure_event on sqlite: error = %d\n", ++ rc); ++ ++ log(TERM, LOG_INFO, "register inserted at db\n"); ++ ++ return rc; ++} ++#endif ++ + /* + * Generic code + */ +@@ -818,6 +868,16 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + } + #endif + ++#ifdef HAVE_MEMORY_FAILURE ++ rc = ras_mc_create_table(priv, &mf_event_tab); ++ if (rc == SQLITE_OK) { ++ rc = ras_mc_prepare_stmt(priv, &priv->stmt_mf_event, ++ &mf_event_tab); ++ if (rc != SQLITE_OK) ++ goto error; ++ } ++#endif ++ + ras->db_priv = priv; + return 0; + +@@ -920,6 +980,16 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + } + #endif + ++#ifdef HAVE_MEMORY_FAILURE ++ if (priv->stmt_mf_event) { ++ rc = sqlite3_finalize(priv->stmt_mf_event); ++ if (rc != SQLITE_OK) ++ log(TERM, LOG_ERR, ++ "cpu %u: Failed to finalize mf_event sqlite: error = %d\n", ++ cpu, rc); ++ } ++#endif ++ + rc = sqlite3_close_v2(db); + if (rc != SQLITE_OK) + log(TERM, LOG_ERR, +diff --git a/ras-record.h b/ras-record.h +index b453f83..830202f 100644 +--- a/ras-record.h ++++ b/ras-record.h +@@ -103,6 +103,13 @@ struct diskerror_event { + const char *cmd; + }; + ++struct ras_mf_event { ++ char timestamp[64]; ++ char pfn[30]; ++ const char *page_type; ++ const char *action_result; ++}; ++ + struct ras_mc_event; + struct ras_aer_event; + struct ras_extlog_event; +@@ -111,6 +118,7 @@ struct ras_arm_event; + struct mce_event; + struct devlink_event; + struct diskerror_event; ++struct ras_mf_event; + + #ifdef HAVE_SQLITE3 + +@@ -140,6 +148,9 @@ struct sqlite3_priv { + #ifdef HAVE_DISKERROR + sqlite3_stmt *stmt_diskerror_event; + #endif ++#ifdef HAVE_MEMORY_FAILURE ++ sqlite3_stmt *stmt_mf_event; ++#endif + }; + + struct db_fields { +@@ -166,6 +177,7 @@ int ras_store_non_standard_record(struct ras_events *ras, struct ras_non_standar + int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev); + int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev); + int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); ++int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev); + + #else + static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; }; +@@ -178,6 +190,7 @@ static inline int ras_store_non_standard_record(struct ras_events *ras, struct r + static inline int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; + static inline int ras_store_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; + static inline int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; ++static inline int ras_store_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; + + #endif + +diff --git a/ras-report.c b/ras-report.c +index 2710eac..ea3a9b6 100644 +--- a/ras-report.c ++++ b/ras-report.c +@@ -309,6 +309,28 @@ static int set_diskerror_event_backtrace(char *buf, struct diskerror_event *ev) + return 0; + } + ++static int set_mf_event_backtrace(char *buf, struct ras_mf_event *ev) ++{ ++ char bt_buf[MAX_BACKTRACE_SIZE]; ++ ++ if (!buf || !ev) ++ return -1; ++ ++ sprintf(bt_buf, "BACKTRACE=" \ ++ "timestamp=%s\n" \ ++ "pfn=%s\n" \ ++ "page_type=%s\n" \ ++ "action_result=%s\n", \ ++ ev->timestamp, \ ++ ev->pfn, \ ++ ev->page_type, \ ++ ev->action_result); ++ ++ strcat(buf, bt_buf); ++ ++ return 0; ++} ++ + static int commit_report_backtrace(int sockfd, int type, void *ev){ + char buf[MAX_BACKTRACE_SIZE]; + char *pbuf = buf; +@@ -343,6 +365,9 @@ static int commit_report_backtrace(int sockfd, int type, void *ev){ + case DISKERROR_EVENT: + rc = set_diskerror_event_backtrace(buf, (struct diskerror_event *)ev); + break; ++ case MF_EVENT: ++ rc = set_mf_event_backtrace(buf, (struct ras_mf_event *)ev); ++ break; + default: + return -1; + } +@@ -708,3 +733,46 @@ diskerror_fail: + return -1; + } + } ++ ++int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) ++{ ++ char buf[MAX_MESSAGE_SIZE]; ++ int sockfd = 0; ++ int done = 0; ++ int rc = -1; ++ ++ memset(buf, 0, sizeof(buf)); ++ ++ sockfd = setup_report_socket(); ++ if (sockfd < 0) ++ return -1; ++ ++ rc = commit_report_basic(sockfd); ++ if (rc < 0) ++ goto mf_fail; ++ ++ rc = commit_report_backtrace(sockfd, MF_EVENT, ev); ++ if (rc < 0) ++ goto mf_fail; ++ ++ sprintf(buf, "ANALYZER=%s", "rasdaemon-memory_failure"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto mf_fail; ++ ++ sprintf(buf, "REASON=%s", "memory failure problem"); ++ rc = write(sockfd, buf, strlen(buf) + 1); ++ if (rc < strlen(buf) + 1) ++ goto mf_fail; ++ ++ done = 1; ++ ++mf_fail: ++ if (sockfd > 0) ++ close(sockfd); ++ ++ if (done) ++ return 0; ++ else ++ return -1; ++} +diff --git a/ras-report.h b/ras-report.h +index 1d911de..e605eb1 100644 +--- a/ras-report.h ++++ b/ras-report.h +@@ -38,6 +38,7 @@ int ras_report_non_standard_event(struct ras_events *ras, struct ras_non_standar + int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev); + int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev); + int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev); ++int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev); + + #else + +@@ -48,6 +49,7 @@ static inline int ras_report_non_standard_event(struct ras_events *ras, struct r + static inline int ras_report_arm_event(struct ras_events *ras, struct ras_arm_event *ev) { return 0; }; + static inline int ras_report_devlink_event(struct ras_events *ras, struct devlink_event *ev) { return 0; }; + static inline int ras_report_diskerror_event(struct ras_events *ras, struct diskerror_event *ev) { return 0; }; ++static inline int ras_report_mf_event(struct ras_events *ras, struct ras_mf_event *ev) { return 0; }; + + #endif + +-- +2.33.0 + diff --git a/rasdaemon.spec b/rasdaemon.spec index 205d8a5..8e62c0d 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.6 -Release: 8 +Release: 9 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -37,6 +37,7 @@ Patch15: 0004-rasdaemon-Add-some-modules-supported-by-hisi-common-.patch Patch16: 0006-add-cpu-online-fault-isolation.patch Patch17: 0007-add-trace-print-and-add-sqlite-store.patch Patch18: 0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch +Patch19: backport-rasdaemon-add-support-for-memory_failure-events.patch %description The rasdaemon program is a daemon which monitors the platform @@ -55,7 +56,7 @@ autoheader libtoolize --automake --copy --debug --force automake --add-missing %ifarch %{arm} aarch64 -%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror --enable-non-standard --enable-hisi-ns-decode --enable-arm +%configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror --enable-non-standard --enable-hisi-ns-decode --enable-arm --enable-memory-failure %else %configure --enable-mce --enable-aer --enable-sqlite3 --enable-extlog --enable-abrt-report --enable-devlink --enable-diskerror %endif @@ -83,6 +84,12 @@ rm INSTALL %{buildroot}/usr/include/*.h /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Thurs Dec 2 2021 tanxiaofei - 0.6.6-9 +- Type:feature +- ID:NA +- SUG:NA +- DESC: Backport memory failure feature, one patch. + * Wed Oct 27 2021 luoshengwei - 0.6.6-8 - Type:feature - ID:NA -- Gitee