From 09eae3860b99c27ce41d7247b058ee7654e00e9a Mon Sep 17 00:00:00 2001
From: huangfangrun <huangfangrun1@h-partners.com>
Date: Tue, 4 Apr 2023 10:24:15 +0800
Subject: [PATCH] rasdaemon: Fix for regression in ras_mc_create_table() if
 some cpus are offline at the system start and Fix poll() on per_cpu
 trace_pipe_raw blocks indefinitely

---
 ...r-regression-in-ras_mc_create_table-.patch | 165 ++++++++++++++++++
 ...ll-on-per_cpu-trace_pipe_raw-blocks-.patch |  85 +++++++++
 rasdaemon.spec                                |  12 +-
 3 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch
 create mode 100644 0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch

diff --git a/0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch b/0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch
new file mode 100644
index 0000000..64f60f6
--- /dev/null
+++ b/0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch
@@ -0,0 +1,165 @@
+From e53389e7d7bd805900386b979fb3d48f1e79a7bc Mon Sep 17 00:00:00 2001
+From: Shiju Jose <shiju.jose@huawei.com>
+Date: Sun, 5 Mar 2023 23:14:42 +0000
+Subject: [PATCH] rasdaemon: Fix for regression in ras_mc_create_table() if
+ some cpus are offline at the system start
+
+Issues:
+Regression in the ras_mc_create_table() if some of the cpus are offline
+at the system start when run the rasdaemon. This issue is
+reproducible in ras_mc_create_table() with decode and record
+non-standard events and reproducible sometimes with
+ras_mc_create_table() for the standard events.
+Also in the multi thread way, there is memory leak in ras_mc_event_opendb()
+as struct sqlite3_priv *priv and sqlite3 *db allocated/initialized per
+thread, but stored in the common struct ras_events ras in pthread data,
+which is shared across the threads.
+
+Reason:
+when the system start with some of the cpus are offline and then run
+the rasdaemon, read_ras_event_all_cpus() exit with error and switch to
+the multi thread way. However read() in read_ras_event() return error in
+threads for each of the offline CPUs and does clean up including calling
+ras_mc_event_closedb().
+Since the 'struct ras_events ras' passed in the pthread_data to each of the
+threads is common, struct sqlite3_priv *priv and sqlite3 *db allocated/
+initialized per thread and stored in the common 'struct ras_events ras',
+are getting overwritten in each ras_mc_event_opendb()(which called from
+pthread per cpu), result memory leak. Also when ras_mc_event_closedb()
+is called in the above error case from the threads corresponding to the
+offline cpus, close the sqlite3 *db and free sqlite3_priv *priv stored
+in the common 'struct ras_events ras', result regression when accessing
+priv->db in the ras_mc_create_table() from another context later.
+
+Proposed solution:
+In ras_mc_event_opendb(), allocate struct sqlite3_priv *priv,
+init sqlite3 *db and create tables common for the threads with shared
+'struct ras_events ras' based on a reference count and free them in the
+same way.
+Also protect critical code ras_mc_event_opendb() and ras_mc_event_closedb()
+using mutex in the multi thread case from any regression caused by the
+thread pre-emption.
+
+Reported-by: Lei Feng <fenglei47@h-partners.com>
+Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
+---
+ ras-events.c | 16 +++++++++++++++-
+ ras-events.h |  4 +++-
+ ras-record.c | 12 ++++++++++++
+ 3 files changed, 30 insertions(+), 2 deletions(-)
+
+diff --git a/ras-events.c b/ras-events.c
+index 49e4f9a..5fe8e19 100644
+--- a/ras-events.c
++++ b/ras-events.c
+@@ -625,19 +625,25 @@ static void *handle_ras_events_cpu(void *priv)
+ 
+ 	log(TERM, LOG_INFO, "Listening to events on cpu %d\n", pdata->cpu);
+ 	if (pdata->ras->record_events) {
++		pthread_mutex_lock(&pdata->ras->db_lock);
+ 		if (ras_mc_event_opendb(pdata->cpu, pdata->ras)) {
++			pthread_mutex_unlock(&pdata->ras->db_lock);
+ 			log(TERM, LOG_ERR, "Can't open database\n");
+ 			close(fd);
+ 			kbuffer_free(kbuf);
+ 			free(page);
+ 			return 0;
+ 		}
++		pthread_mutex_unlock(&pdata->ras->db_lock);
+ 	}
+ 
+ 	read_ras_event(fd, pdata, kbuf, page);
+ 
+-	if (pdata->ras->record_events)
++	if (pdata->ras->record_events) {
++		pthread_mutex_lock(&pdata->ras->db_lock);
+ 		ras_mc_event_closedb(pdata->cpu, pdata->ras);
++		pthread_mutex_unlock(&pdata->ras->db_lock);
++	}
+ 
+ 	close(fd);
+ 	kbuffer_free(kbuf);
+@@ -993,6 +999,11 @@ int handle_ras_events(int record_events)
+ 
+ 	/* Poll doesn't work on this kernel. Fallback to pthread way */
+ 	if (rc == -255) {
++		if (pthread_mutex_init(&ras->db_lock, NULL) != 0) {
++			log(SYSLOG, LOG_INFO, "sqlite db lock init has failed\n");
++			goto err;
++		}
++
+ 		log(SYSLOG, LOG_INFO,
+ 		"Opening one thread per cpu (%d threads)\n", cpus);
+ 		for (i = 0; i < cpus; i++) {
+@@ -1005,6 +1016,8 @@ int handle_ras_events(int record_events)
+ 				i);
+ 				while (--i)
+ 					pthread_cancel(data[i].thread);
++
++				pthread_mutex_destroy(&ras->db_lock);
+ 				goto err;
+ 			}
+ 		}
+@@ -1012,6 +1025,7 @@ int handle_ras_events(int record_events)
+ 		/* Wait for all threads to complete */
+ 		for (i = 0; i < cpus; i++)
+ 			pthread_join(data[i].thread, NULL);
++		pthread_mutex_destroy(&ras->db_lock);
+ 	}
+ 
+ 	log(SYSLOG, LOG_INFO, "Huh! something got wrong. Aborting.\n");
+diff --git a/ras-events.h b/ras-events.h
+index 6c9f507..649b0c0 100644
+--- a/ras-events.h
++++ b/ras-events.h
+@@ -56,7 +56,9 @@ struct ras_events {
+ 	time_t		uptime_diff;
+ 
+ 	/* For ras-record */
+-	void		*db_priv;
++	void	*db_priv;
++	int	db_ref_count;
++	pthread_mutex_t db_lock;
+ 
+ 	/* For the mce handler */
+ 	struct mce_priv	*mce_priv;
+diff --git a/ras-record.c b/ras-record.c
+index a367939..adc97a4 100644
+--- a/ras-record.c
++++ b/ras-record.c
+@@ -763,6 +763,10 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
+ 
+ 	printf("Calling %s()\n", __FUNCTION__);
+ 
++	ras->db_ref_count++;
++	if (ras->db_ref_count > 1)
++		return 0;
++
+ 	ras->db_priv = NULL;
+ 
+ 	priv = calloc(1, sizeof(*priv));
+@@ -912,6 +916,13 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
+ 
+ 	printf("Calling %s()\n", __func__);
+ 
++	if (ras->db_ref_count > 0)
++		ras->db_ref_count--;
++	else
++		return -1;
++	if (ras->db_ref_count > 0)
++		return 0;
++
+ 	if (!priv)
+ 		return -1;
+ 
+@@ -1018,6 +1029,7 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras)
+ 		log(TERM, LOG_ERR,
+ 		    "cpu %u: Failed to shutdown sqlite: error = %d\n", cpu, rc);
+ 	free(priv);
++	ras->db_priv = NULL;
+ 
+ 	return 0;
+ }
+-- 
+2.25.1
+
diff --git a/0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch b/0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch
new file mode 100644
index 0000000..85ae07d
--- /dev/null
+++ b/0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch
@@ -0,0 +1,85 @@
+From 6986d818e6d2c846c001fc7211b5a4153e5ecd11 Mon Sep 17 00:00:00 2001
+From: Shiju Jose <shiju.jose@huawei.com>
+Date: Sat, 4 Feb 2023 19:15:55 +0000
+Subject: [PATCH] rasdaemon: Fix poll() on per_cpu trace_pipe_raw blocks
+ indefinitely
+
+The error events are not received in the rasdaemon since kernel 6.1-rc6.
+This issue is firstly detected and reported, when testing the CXL error
+events in the rasdaemon.
+
+Debugging showed, poll() on trace_pipe_raw in the ras-events.c do not
+return and this issue is seen after the commit
+42fb0a1e84ff525ebe560e2baf9451ab69127e2b ("tracing/ring-buffer: Have
+polling block on watermark").
+
+This issue is also verified using a test application for poll()
+and select() on per_cpu trace_pipe_raw.
+
+There is also a bug reported on this issue,
+https://lore.kernel.org/all/31eb3b12-3350-90a4-a0d9-d1494db7cf74@oracle.com/
+
+This issue occurs for the per_cpu case, which calls the ring_buffer_poll_wait(),
+in kernel/trace/ring_buffer.c, with the buffer_percent > 0 and then wait until
+the percentage of pages are available. The default value set for the
+buffer_percent is 50 in the kernel/trace/trace.c. However poll() does not return
+even met the percentage of pages condition.
+
+As a fix, rasdaemon set buffer_percent as 0 through the
+/sys/kernel/debug/tracing/instances/rasdaemon/buffer_percent, then the
+task will wake up as soon as data is added to any of the specific cpu
+buffer and poll() on per_cpu/cpuX/trace_pipe_raw does not block
+indefinitely.
+
+Dependency on the kernel fix commit
+3e46d910d8acf94e5360126593b68bf4fee4c4a1("tracing: Fix poll() and select()
+do not work on per_cpu trace_pipe and trace_pipe_raw")
+
+Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
+---
+ ras-events.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/ras-events.c b/ras-events.c
+index 39f9ce2..49e4f9a 100644
+--- a/ras-events.c
++++ b/ras-events.c
+@@ -376,6 +376,8 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata,
+ 	int warnonce[n_cpus];
+ 	char pipe_raw[PATH_MAX];
+ 	int legacy_kernel = 0;
++	int fd;
++	char buf[16];
+ #if 0
+ 	int need_sleep = 0;
+ #endif
+@@ -395,6 +397,26 @@ static int read_ras_event_all_cpus(struct pthread_data *pdata,
+ 		return -ENOMEM;
+ 	}
+ 
++	/* Fix for poll() on the per_cpu trace_pipe and trace_pipe_raw blocks
++	 * indefinitely with the default buffer_percent in the kernel trace system,
++	 * which is introduced by the following change in the kernel.
++	 * https://lore.kernel.org/all/20221020231427.41be3f26@gandalf.local.home/T/#u.
++	 * Set buffer_percent to 0 so that poll() will return immediately
++	 * when the trace data is available in the ras per_cpu trace pipe_raw
++	 */
++	fd = open_trace(pdata[0].ras, "buffer_percent", O_WRONLY);
++	if (fd >= 0) {
++		/* For the backward compatibility to the old kernels, do not return
++		 * if fail to set the buffer_percent.
++		 */
++		snprintf(buf, sizeof(buf), "0");
++		size = write(fd, buf, strlen(buf));
++		if (size <= 0)
++			log(TERM, LOG_WARNING, "can't write to buffer_percent\n");
++		close(fd);
++	} else
++		log(TERM, LOG_WARNING, "Can't open buffer_percent\n");
++
+ 	for (i = 0; i < (n_cpus + 1); i++)
+ 		fds[i].fd = -1;
+ 
+-- 
+2.25.1
+
diff --git a/rasdaemon.spec b/rasdaemon.spec
index 992bc80..c0725c4 100644
--- a/rasdaemon.spec
+++ b/rasdaemon.spec
@@ -1,6 +1,6 @@
 Name:			rasdaemon
 Version:		0.6.8
-Release:		2
+Release:		3
 License:		GPLv2
 Summary:		Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
 URL:			https://github.com/mchehab/rasdaemon.git
@@ -36,6 +36,8 @@ Patch9010: 0008-rasdaemon-ras-mc-ctl-Relocate-reading-and-display-Ku.patch
 Patch9011: 0009-rasdaemon-ras-mc-ctl-Updated-HiSilicon-platform-name.patch
 Patch9012: 0010-rasdaemon-Fix-for-a-memory-out-of-bounds-issue-and-o.patch
 Patch9013: 0001-rasdaemon-use-standard-length-PATH_MAX-for-path-name.patch
+Patch9014: 0001-rasdaemon-Fix-for-regression-in-ras_mc_create_table-.patch
+Patch9015: 0002-rasdaemon-Fix-poll-on-per_cpu-trace_pipe_raw-blocks-.patch
 
 %description
 The  rasdaemon  program  is  a  daemon which monitors the platform
@@ -81,6 +83,14 @@ rm INSTALL %{buildroot}/usr/include/*.h
 /usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || :
 
 %changelog
+* Fri Mar 31 2023 huangfangrun <huangfangrun1@h-partners.com> - 0.6.8-3
+- Type:bugfix
+- ID:NA
+- SUG:NA
+- DESC:
+  1.Fix for regression in ras_mc_create_table() if some cpus are offline at the system start.
+  2.Fix poll() on per_cpu trace_pipe_raw blocks indefinitely.
+
 * Fri Mar 24 2023 renhongxun <renhongxun@h-partners.com> - 0.6.8-2
 - backport patches from upstream
 
-- 
Gitee