From 562d9b70c695ff1518c8d26d08e1d22ef2f4fa7a Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Wed, 7 Jun 2023 18:08:19 +0800 Subject: [PATCH 1/4] mm/zsmalloc: copy with get_cpu_var() and locking commit 93ca91891d4736f28a2bad76cee96adc68c78ea9 upstream. get_cpu_var() disables preemption and triggers a might_sleep() splat later. This is replaced with get_locked_var(). This bitspinlocks are replaced with a proper mutex which requires a slightly larger struct to allocate. Signed-off-by: Mike Galbraith [bigeasy: replace the bitspin_lock() with a mutex, get_locked_var(). Mike then fixed the size magic] Signed-off-by: Sebastian Andrzej Siewior --- mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 6 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c18dc8e61d35..7dad2ff3e778 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -57,6 +57,7 @@ #include #include #include +#include #define ZSPAGE_MAGIC 0x58 @@ -77,6 +78,20 @@ #define ZS_HANDLE_SIZE (sizeof(unsigned long)) +#ifdef CONFIG_PREEMPT_RT + +struct zsmalloc_handle { + unsigned long addr; + struct mutex lock; +}; + +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) + +#else + +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) +#endif + /* * Object location (, ) is encoded as * a single (unsigned long) handle value. @@ -293,6 +308,7 @@ struct zspage { }; struct mapping_area { + local_lock_t lock; char *vm_buf; /* copy buffer for objects that span pages */ char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ @@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} static int create_cache(struct zs_pool *pool) { - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, 0, 0, NULL); if (!pool->handle_cachep) return 1; @@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool *pool) static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); + void *p; + + p = kmem_cache_alloc(pool->handle_cachep, + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); +#ifdef CONFIG_PREEMPT_RT + if (p) { + struct zsmalloc_handle *zh = p; + + mutex_init(&zh->lock); + } +#endif + return (unsigned long)p; +} + +#ifdef CONFIG_PREEMPT_RT +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) +{ + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); } +#endif static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { @@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) static void record_obj(unsigned long handle, unsigned long obj) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + WRITE_ONCE(zh->addr, obj); +#else /* * lsb of @obj represents handle lock while other bits * represent object value the handle is pointing so * updating shouldn't do store tearing. */ WRITE_ONCE(*(unsigned long *)handle, obj); +#endif } /* zpool driver */ @@ -455,7 +494,10 @@ MODULE_ALIAS("zpool-zsmalloc"); #endif /* CONFIG_ZPOOL */ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area); +static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { + /* XXX remove this and use a spin_lock_t in pin_tag() */ + .lock = INIT_LOCAL_LOCK(lock), +}; static bool is_zspage_isolated(struct zspage *zspage) { @@ -865,7 +907,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) static unsigned long handle_to_obj(unsigned long handle) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return zh->addr; +#else return *(unsigned long *)handle; +#endif } static unsigned long obj_to_head(struct page *page, void *obj) @@ -879,22 +927,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) static inline int testpin_tag(unsigned long handle) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return mutex_is_locked(&zh->lock); +#else return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static inline int trypin_tag(unsigned long handle) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return mutex_trylock(&zh->lock); +#else return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static void pin_tag(unsigned long handle) __acquires(bitlock) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return mutex_lock(&zh->lock); +#else bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static void unpin_tag(unsigned long handle) __releases(bitlock) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return mutex_unlock(&zh->lock); +#else bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static void reset_page(struct page *page) @@ -1278,7 +1350,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; - area = &get_cpu_var(zs_map_area); + local_lock(&zs_map_area.lock); + area = this_cpu_ptr(&zs_map_area); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ @@ -1332,7 +1405,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) __zs_unmap_object(area, pages, off, class->size); } - put_cpu_var(zs_map_area); + local_unlock(&zs_map_area.lock); migrate_read_unlock(zspage); unpin_tag(handle); -- Gitee From 80189adc1c793011361c437f087911ca33f8f66e Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Wed, 7 Jun 2023 18:08:32 +0800 Subject: [PATCH 2/4] mm/zswap: Use local lock to protect per-CPU data commit 4049393ce89f48a9191cd54b11195ccf1ceb868c upstream. zwap uses per-CPU compression. The per-CPU data pointer is acquired with get_cpu_ptr() which implicitly disables preemption. It allocates memory inside the preempt disabled region which conflicts with the PREEMPT_RT semantics. Replace the implicit preemption control with an explicit local lock. This allows RT kernels to substitute it with a real per CPU lock, which serializes the access but keeps the code section preemptible. On non RT kernels this maps to preempt_disable() as before, i.e. no functional change. [bigeasy: Use local_lock(), additional hunks, patch description] Cc: Seth Jennings Cc: Dan Streetman Cc: Vitaly Wool Cc: Andrew Morton Cc: linux-mm@kvack.org Signed-off-by: Luis Claudio R. Goncalves Signed-off-by: Sebastian Andrzej Siewior --- mm/zswap.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index fbb782924ccc..b24f761b9241 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -387,27 +388,37 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, /********************************* * per-cpu code **********************************/ -static DEFINE_PER_CPU(u8 *, zswap_dstmem); +struct zswap_comp { + /* Used for per-CPU dstmem and tfm */ + local_lock_t lock; + u8 *dstmem; +}; + +static DEFINE_PER_CPU(struct zswap_comp, zswap_comp) = { + .lock = INIT_LOCAL_LOCK(lock), +}; static int zswap_dstmem_prepare(unsigned int cpu) { + struct zswap_comp *zcomp; u8 *dst; dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); if (!dst) return -ENOMEM; - per_cpu(zswap_dstmem, cpu) = dst; + zcomp = per_cpu_ptr(&zswap_comp, cpu); + zcomp->dstmem = dst; return 0; } static int zswap_dstmem_dead(unsigned int cpu) { - u8 *dst; + struct zswap_comp *zcomp; - dst = per_cpu(zswap_dstmem, cpu); - kfree(dst); - per_cpu(zswap_dstmem, cpu) = NULL; + zcomp = per_cpu_ptr(&zswap_comp, cpu); + kfree(zcomp->dstmem); + zcomp->dstmem = NULL; return 0; } @@ -919,10 +930,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) dlen = PAGE_SIZE; src = (u8 *)zhdr + sizeof(struct zswap_header); dst = kmap_atomic(page); - tfm = *get_cpu_ptr(entry->pool->tfm); + local_lock(&zswap_comp.lock); + tfm = *this_cpu_ptr(entry->pool->tfm); ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen); - put_cpu_ptr(entry->pool->tfm); + local_unlock(&zswap_comp.lock); kunmap_atomic(dst); BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); @@ -1074,12 +1086,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, } /* compress */ - dst = get_cpu_var(zswap_dstmem); - tfm = *get_cpu_ptr(entry->pool->tfm); + local_lock(&zswap_comp.lock); + dst = *this_cpu_ptr(&zswap_comp.dstmem); + tfm = *this_cpu_ptr(entry->pool->tfm); src = kmap_atomic(page); ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen); kunmap_atomic(src); - put_cpu_ptr(entry->pool->tfm); if (ret) { ret = -EINVAL; goto put_dstmem; @@ -1103,7 +1115,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, memcpy(buf, &zhdr, hlen); memcpy(buf + hlen, dst, dlen); zpool_unmap_handle(entry->pool->zpool, handle); - put_cpu_var(zswap_dstmem); + local_unlock(&zswap_comp.lock); /* populate entry */ entry->offset = offset; @@ -1131,7 +1143,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, return 0; put_dstmem: - put_cpu_var(zswap_dstmem); + local_unlock(&zswap_comp.lock); zswap_pool_put(entry->pool); freepage: zswap_entry_cache_free(entry); @@ -1176,9 +1188,10 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, if (zpool_evictable(entry->pool->zpool)) src += sizeof(struct zswap_header); dst = kmap_atomic(page); - tfm = *get_cpu_ptr(entry->pool->tfm); + local_lock(&zswap_comp.lock); + tfm = *this_cpu_ptr(entry->pool->tfm); ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen); - put_cpu_ptr(entry->pool->tfm); + local_unlock(&zswap_comp.lock); kunmap_atomic(dst); zpool_unmap_handle(entry->pool->zpool, entry->handle); BUG_ON(ret); -- Gitee From cbdca7c6e50c217987aa23bfcbc2762db91d31bb Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Wed, 7 Jun 2023 18:09:13 +0800 Subject: [PATCH 3/4] x86: kvm Require const tsc for RT commit 3eeb29d14e9e2ba52b98581d225c4cb285714a61 upstream. Non constant TSC is a nightmare on bare metal already, but with virtualization it becomes a complete disaster because the workarounds are horrible latency wise. That's also a preliminary for running RT in a guest on top of a RT host. Signed-off-by: Thomas Gleixner --- arch/x86/kvm/x86.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d698aeefe0da..9a90daffeb0d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8245,6 +8245,14 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n"); + r = -EOPNOTSUPP; + goto out; + } +#endif + r = -ENOMEM; x86_emulator_cache = kvm_alloc_emulator_cache(); -- Gitee From 5081eca405126725697b5e80820062b409e29dac Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Wed, 7 Jun 2023 18:09:24 +0800 Subject: [PATCH 4/4] wait.h: include atomic.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a1aa613398f4813bf3c60556130254c52733ac0b upstream. | CC init/main.o |In file included from include/linux/mmzone.h:9:0, | from include/linux/gfp.h:4, | from include/linux/kmod.h:22, | from include/linux/module.h:13, | from init/main.c:15: |include/linux/wait.h: In function ‘wait_on_atomic_t’: |include/linux/wait.h:982:2: error: implicit declaration of function ‘atomic_read’ [-Werror=implicit-function-declaration] | if (atomic_read(val) == 0) | ^ This pops up on ARM. Non-RT gets its atomic.h include from spinlock.h Signed-off-by: Sebastian Andrzej Siewior --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/wait.h b/include/linux/wait.h index 9b8b0833100a..33001b534a84 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -10,6 +10,7 @@ #include #include +#include typedef struct wait_queue_entry wait_queue_entry_t; -- Gitee