From 09eae3860b99c27ce41d7247b058ee7654e00e9a Mon Sep 17 00:00:00 2001 From: huangfangrun Date: Tue, 4 Apr 2023 10:24:15 +0800 Subject: [PATCH] rasdaemon: Fix for regression in ras_mc_create_table() if some cpus are offline at the system start and Fix poll() on per_cpu trace_pipe_raw blocks indefinitely --- ...r-regression-in-ras_mc_create_table-.patch | 165 ++++++++++++++++++ ...ll-on-per_cpu-trace_pipe_raw-blocks-.patch | 85 +++++++++ rasdaemon.spec | 12 +- 3 files changed, 261 insertions(+), 1 deletion(-) create mode 100644 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch create mode 100644 0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch diff --git a/0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch b/0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch new file mode 100644 index 0000000..64f60f6 --- /dev/null +++ b/0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch @@ -0,0 +1,165 @@ +From e53389e7d7bd805900386b979fb3d48f1e79a7bc Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sun, 5 Mar 2023 23:14:42 +0000 +Subject: [PATCH] rasdaemon: Fix for regression in ras_mc_create_table() if + some cpus are offline at the system start + +Issues: +Regression in the ras_mc_create_table() if some of the cpus are offline +at the system start when run the rasdaemon. This issue is +reproducible in ras_mc_create_table() with decode and record +non-standard events and reproducible sometimes with +ras_mc_create_table() for the standard events. +Also in the multi thread way, there is memory leak in ras_mc_event_opendb() +as struct sqlite3_priv *priv and sqlite3 *db allocated/initialized per +thread, but stored in the common struct ras_events ras in pthread data, +which is shared across the threads. + +Reason: +when the system start with some of the cpus are offline and then run +the rasdaemon, read_ras_event_all_cpus() exit with error and switch to +the multi thread way. However read() in read_ras_event() return error in +threads for each of the offline CPUs and does clean up including calling +ras_mc_event_closedb(). +Since the 'struct ras_events ras' passed in the pthread_data to each of the +threads is common, struct sqlite3_priv *priv and sqlite3 *db allocated/ +initialized per thread and stored in the common 'struct ras_events ras', +are getting overwritten in each ras_mc_event_opendb()(which called from +pthread per cpu), result memory leak. Also when ras_mc_event_closedb() +is called in the above error case from the threads corresponding to the +offline cpus, close the sqlite3 *db and free sqlite3_priv *priv stored +in the common 'struct ras_events ras', result regression when accessing +priv->db in the ras_mc_create_table() from another context later. + +Proposed solution: +In ras_mc_event_opendb(), allocate struct sqlite3_priv *priv, +init sqlite3 *db and create tables common for the threads with shared +'struct ras_events ras' based on a reference count and free them in the +same way. +Also protect critical code ras_mc_event_opendb() and ras_mc_event_closedb() +using mutex in the multi thread case from any regression caused by the +thread pre-emption. + +Reported-by: Lei Feng +Signed-off-by: Shiju Jose +--- + ras-events.c | 16 +++++++++++++++- + ras-events.h | 4 +++- + ras-record.c | 12 ++++++++++++ + 3 files changed, 30 insertions(+), 2 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index 49e4f9a..5fe8e19 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -625,19 +625,25 @@ static void *handle_ras_events_cpu(void *priv) + + log(TERM, LOG_INFO, "Listening to events on cpu %d\n", pdata->cpu); + if (pdata->ras->record_events) { ++ pthread_mutex_lock(&pdata->ras->db_lock); + if (ras_mc_event_opendb(pdata->cpu, pdata->ras)) { ++ pthread_mutex_unlock(&pdata->ras->db_lock); + log(TERM, LOG_ERR, "Can't open database\n"); + close(fd); + kbuffer_free(kbuf); + free(page); + return 0; + } ++ pthread_mutex_unlock(&pdata->ras->db_lock); + } + + read_ras_event(fd, pdata, kbuf, page); + +- if (pdata->ras->record_events) ++ if (pdata->ras->record_events) { ++ pthread_mutex_lock(&pdata->ras->db_lock); + ras_mc_event_closedb(pdata->cpu, pdata->ras); ++ pthread_mutex_unlock(&pdata->ras->db_lock); ++ } + + close(fd); + kbuffer_free(kbuf); +@@ -993,6 +999,11 @@ int handle_ras_events(int record_events) + + /* Poll doesn't work on this kernel. Fallback to pthread way */ + if (rc == -255) { ++ if (pthread_mutex_init(&ras->db_lock, NULL) != 0) { ++ log(SYSLOG, LOG_INFO, "sqlite db lock init has failed\n"); ++ goto err; ++ } ++ + log(SYSLOG, LOG_INFO, + "Opening one thread per cpu (%d threads)\n", cpus); + for (i = 0; i < cpus; i++) { +@@ -1005,6 +1016,8 @@ int handle_ras_events(int record_events) + i); + while (--i) + pthread_cancel(data[i].thread); ++ ++ pthread_mutex_destroy(&ras->db_lock); + goto err; + } + } +@@ -1012,6 +1025,7 @@ int handle_ras_events(int record_events) + /* Wait for all threads to complete */ + for (i = 0; i < cpus; i++) + pthread_join(data[i].thread, NULL); ++ pthread_mutex_destroy(&ras->db_lock); + } + + log(SYSLOG, LOG_INFO, "Huh! something got wrong. Aborting.\n"); +diff --git a/ras-events.h b/ras-events.h +index 6c9f507..649b0c0 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -56,7 +56,9 @@ struct ras_events { + time_t uptime_diff; + + /* For ras-record */ +- void *db_priv; ++ void *db_priv; ++ int db_ref_count; ++ pthread_mutex_t db_lock; + + /* For the mce handler */ + struct mce_priv *mce_priv; +diff --git a/ras-record.c b/ras-record.c +index a367939..adc97a4 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -763,6 +763,10 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + + printf("Calling %s()\n", __FUNCTION__); + ++ ras->db_ref_count++; ++ if (ras->db_ref_count > 1) ++ return 0; ++ + ras->db_priv = NULL; + + priv = calloc(1, sizeof(*priv)); +@@ -912,6 +916,13 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + + printf("Calling %s()\n", __func__); + ++ if (ras->db_ref_count > 0) ++ ras->db_ref_count--; ++ else ++ return -1; ++ if (ras->db_ref_count > 0) ++ return 0; ++ + if (!priv) + return -1; + +@@ -1018,6 +1029,7 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + log(TERM, LOG_ERR, + "cpu %u: Failed to shutdown sqlite: error = %d\n", cpu, rc); + free(priv); ++ ras->db_priv = NULL; + + return 0; + } +-- +2.25.1 + diff --git a/0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch b/0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch new file mode 100644 index 0000000..85ae07d --- /dev/null +++ b/0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch @@ -0,0 +1,85 @@ +From 6986d818e6d2c846c001fc7211b5a4153e5ecd11 Mon Sep 17 00:00:00 2001 +From: Shiju Jose +Date: Sat, 4 Feb 2023 19:15:55 +0000 +Subject: [PATCH] rasdaemon: Fix poll() on per_cpu trace_pipe_raw blocks + indefinitely + +The error events are not received in the rasdaemon since kernel 6.1-rc6. +This issue is firstly detected and reported, when testing the CXL error +events in the rasdaemon. + +Debugging showed, poll() on trace_pipe_raw in the ras-events.c do not +return and this issue is seen after the commit +42fb0a1e84ff525ebe560e2baf9451ab69127e2b ("tracing/ring-buffer: Have +polling block on watermark"). + +This issue is also verified using a test application for poll() +and select() on per_cpu trace_pipe_raw. + +There is also a bug reported on this issue, +https://lore.kernel.org/all/31eb3b12-3350-90a4-a0d9-d1494db7cf74@oracle.com/ + +This issue occurs for the per_cpu case, which calls the ring_buffer_poll_wait(), +in kernel/trace/ring_buffer.c, with the buffer_percent > 0 and then wait until +the percentage of pages are available. The default value set for the +buffer_percent is 50 in the kernel/trace/trace.c. However poll() does not return +even met the percentage of pages condition. + +As a fix, rasdaemon set buffer_percent as 0 through the +/sys/kernel/debug/tracing/instances/rasdaemon/buffer_percent, then the +task will wake up as soon as data is added to any of the specific cpu +buffer and poll() on per_cpu/cpuX/trace_pipe_raw does not block +indefinitely. + +Dependency on the kernel fix commit +3e46d910d8acf94e5360126593b68bf4fee4c4a1("tracing: Fix poll() and select() +do not work on per_cpu trace_pipe and trace_pipe_raw") + +Signed-off-by: Shiju Jose +--- + ras-events.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/ras-events.c b/ras-events.c +index 39f9ce2..49e4f9a 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -376,6 +376,8 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + int warnonce[n_cpus]; + char pipe_raw[PATH_MAX]; + int legacy_kernel = 0; ++ int fd; ++ char buf[16]; + #if 0 + int need_sleep = 0; + #endif +@@ -395,6 +397,26 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata, + return -ENOMEM; + } + ++ /* Fix for poll() on the per_cpu trace_pipe and trace_pipe_raw blocks ++ * indefinitely with the default buffer_percent in the kernel trace system, ++ * which is introduced by the following change in the kernel. ++ * https://lore.kernel.org/all/20221020231427.41be3f26@gandalf.local.home/T/#u. ++ * Set buffer_percent to 0 so that poll() will return immediately ++ * when the trace data is available in the ras per_cpu trace pipe_raw ++ */ ++ fd = open_trace(pdata[0].ras, "buffer_percent", O_WRONLY); ++ if (fd >= 0) { ++ /* For the backward compatibility to the old kernels, do not return ++ * if fail to set the buffer_percent. ++ */ ++ snprintf(buf, sizeof(buf), "0"); ++ size = write(fd, buf, strlen(buf)); ++ if (size <= 0) ++ log(TERM, LOG_WARNING, "can't write to buffer_percent\n"); ++ close(fd); ++ } else ++ log(TERM, LOG_WARNING, "Can't open buffer_percent\n"); ++ + for (i = 0; i < (n_cpus + 1); i++) + fds[i].fd = -1; + +-- +2.25.1 + diff --git a/rasdaemon.spec b/rasdaemon.spec index 992bc80..c0725c4 100644 --- a/rasdaemon.spec +++ b/rasdaemon.spec @@ -1,6 +1,6 @@ Name: rasdaemon Version: 0.6.8 -Release: 2 +Release: 3 License: GPLv2 Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events URL: https://github.com/mchehab/rasdaemon.git @@ -36,6 +36,8 @@ Patch9010: 0008-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch Patch9011: 0009-rasdaemon-ras-mc-ctl-Updated-HiSilicon-platform-name.patch Patch9012: 0010-rasdaemon-Fix-for-a-memory-out-of-bounds-issue-and-o.patch Patch9013: 0001-rasdaemon-use-standard-length-PATH_MAX-for-path-name.patch +Patch9014: 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch +Patch9015: 0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch %description The rasdaemon program is a daemon which monitors the platform @@ -81,6 +83,14 @@ rm INSTALL %{buildroot}/usr/include/*.h /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || : %changelog +* Fri Mar 31 2023 huangfangrun - 0.6.8-3 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC: + 1.Fix for regression in ras_mc_create_table() if some cpus are offline at the system start. + 2.Fix poll() on per_cpu trace_pipe_raw blocks indefinitely. + * Fri Mar 24 2023 renhongxun - 0.6.8-2 - backport patches from upstream -- Gitee