From fadc8c791e76c932e90258db0108e35393066198 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 8 Jun 2023 14:40:12 +0800 Subject: [PATCH 1/4] fs/dcache: bring back explicit INIT_HLIST_BL_HEAD init commit 6657c7f6ea61fd3fea378532fcae38776afc9fc1 upstream. Commit 3d375d78593c ("mm: update callers to use HASH_ZERO flag") removed INIT_HLIST_BL_HEAD and uses the ZERO flag instead for the init. However on RT we have also a spinlock which needs an init call so we can't use that. Signed-off-by: Sebastian Andrzej Siewior --- fs/dcache.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index bc56aff73761..684c8991cf70 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3136,6 +3136,8 @@ __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { + unsigned int loop; + /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. */ @@ -3152,11 +3154,16 @@ static void __init dcache_init_early(void) NULL, 0, 0); + + for (loop = 0; loop < (1U << d_hash_shift); loop++) + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); + d_hash_shift = 32 - d_hash_shift; } static void __init dcache_init(void) { + unsigned int loop; /* * A constructor could be added for stable state like the lists, * but it is probably not worth it because of the cache nature @@ -3180,6 +3187,10 @@ static void __init dcache_init(void) NULL, 0, 0); + + for (loop = 0; loop < (1U << d_hash_shift); loop++) + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); + d_hash_shift = 32 - d_hash_shift; } -- Gitee From d4a447f039cca93d58bd319632745172dc478e4c Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 8 Jun 2023 14:40:27 +0800 Subject: [PATCH 2/4] fs/dcache: disable preemption on i_dir_seq's write side commit f7fc6eb795b824aba9a02869fb2f399bb51a880c upstream. i_dir_seq is an opencoded seqcounter. Based on the code it looks like we could have two writers in parallel despite the fact that the d_lock is held. The problem is that during the write process on RT the preemption is still enabled and if this process is interrupted by a reader with RT priority then we lock up. To avoid that lock up I am disabling the preemption during the update. The rename of i_dir_seq is here to ensure to catch new write sides in future. Cc: stable-rt@vger.kernel.org Reported-by: Oleg.Karfich@wago.com Signed-off-by: Sebastian Andrzej Siewior --- fs/dcache.c | 12 +++++++----- fs/inode.c | 2 +- include/linux/fs.h | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 684c8991cf70..8f868760e799 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2480,9 +2480,10 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { + preempt_disable_rt(); for (;;) { - unsigned n = dir->i_dir_seq; - if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) + unsigned n = dir->__i_dir_seq; + if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n) return n; cpu_relax(); } @@ -2490,7 +2491,8 @@ static inline unsigned start_dir_add(struct inode *dir) static inline void end_dir_add(struct inode *dir, unsigned n) { - smp_store_release(&dir->i_dir_seq, n + 2); + smp_store_release(&dir->__i_dir_seq, n + 2); + preempt_enable_rt(); } static void d_wait_lookup(struct dentry *dentry) @@ -2523,7 +2525,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, retry: rcu_read_lock(); - seq = smp_load_acquire(&parent->d_inode->i_dir_seq); + seq = smp_load_acquire(&parent->d_inode->__i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { @@ -2551,7 +2553,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, } hlist_bl_lock(b); - if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { + if (unlikely(READ_ONCE(parent->d_inode->__i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; diff --git a/fs/inode.c b/fs/inode.c index 199f90cf96eb..6a2ef6de2886 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -157,7 +157,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_link = NULL; - inode->i_dir_seq = 0; + inode->__i_dir_seq = 0; inode->i_rdev = 0; inode->dirtied_when = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 8d20e7f16ea5..34e15e9dd6f5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -713,7 +713,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; - unsigned i_dir_seq; + unsigned __i_dir_seq; }; __u32 i_generation; -- Gitee From a2971c15a1509781ae54f9ab4118a69c4ce01d57 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 8 Jun 2023 14:40:37 +0800 Subject: [PATCH 3/4] squashfs: make use of local lock in multi_cpu decompressor commit cd5771603fe1942769706821fac18040226fec7c upstream. Currently, the squashfs multi_cpu decompressor makes use of get_cpu_ptr()/put_cpu_ptr(), which unconditionally disable preemption during decompression. Because the workload is distributed across CPUs, all CPUs can observe a very high wakeup latency, which has been seen to be as much as 8000us. Convert this decompressor to make use of a local lock, which will allow execution of the decompressor with preemption-enabled, but also ensure concurrent accesses to the percpu compressor data on the local CPU will be serialized. Cc: stable-rt@vger.kernel.org Reported-by: Alexander Stein Tested-by: Alexander Stein Signed-off-by: Julia Cartwright Signed-off-by: Sebastian Andrzej Siewior --- fs/squashfs/decompressor_multi_percpu.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 23a9c28ad8ea..6a73c4fa88e7 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -25,6 +26,8 @@ struct squashfs_stream { void *stream; }; +static DEFINE_LOCAL_IRQ_LOCK(stream_lock); + void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, void *comp_opts) { @@ -79,10 +82,15 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, { struct squashfs_stream __percpu *percpu = (struct squashfs_stream __percpu *) msblk->stream; - struct squashfs_stream *stream = get_cpu_ptr(percpu); - int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, - offset, length, output); - put_cpu_ptr(stream); + struct squashfs_stream *stream; + int res; + + stream = get_locked_ptr(stream_lock, percpu); + + res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + + put_locked_ptr(stream_lock, stream); if (res < 0) ERROR("%s decompression failed, data probably corrupt\n", -- Gitee From 696acf62886f994a6fdc36b8f30f143fa658e5ff Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 8 Jun 2023 14:40:56 +0800 Subject: [PATCH 4/4] fs/epoll: Do not disable preemption on RT commit 5916e5218923335c20252ab8309a16d071a54a80 upstream. ep_call_nested() takes a sleeping lock so we can't disable preemption. The light version is enough since ep_call_nested() doesn't mind beeing invoked twice on the same CPU. Signed-off-by: Thomas Gleixner --- fs/eventpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5fd0e0463a5f..ba690b614864 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -565,12 +565,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) static void ep_poll_safewake(wait_queue_head_t *wq) { - int this_cpu = get_cpu(); + int this_cpu = get_cpu_light(); ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); - put_cpu(); + put_cpu_light(); } #else -- Gitee