From dd93014f921a5b5665f2cd5a689dae22281ed0e5 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 19:10:37 +0800 Subject: [PATCH 01/11] riscv: select ARCH_USE_CMPXCHG_LOCKREF ANBZ: #23245 commit eb1e5037294652ddf1437f62292c0727183f11ae upstream. Select ARCH_USE_CMPXCHG_LOCKREF to enable the cmpxchg-based lockless lockref implementation for riscv. Using Linus' test case[1] on TH1520 platform, I see a 11.2% improvement. On JH7110 platform, I see 12.0% improvement. Link: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4 [1] Signed-off-by: Jisheng Zhang Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240325111038.1700-2-jszhang@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 702a914821e0..e4ee5ea69b6a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,6 +53,7 @@ config RISCV select ARCH_SUPPORTS_HUGETLBFS if MMU select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU + select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USES_CFI_TRAPS if CFI_CLANG -- Gitee From bfebdee0a7bfe68ceb7c3e212f8941960d22e761 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 25 Mar 2024 19:10:38 +0800 Subject: [PATCH 02/11] riscv: cmpxchg: implement arch_cmpxchg64_{relaxed|acquire|release} ANBZ: #23245 commit 79d6e4eae9662b9103fecf94d52b44deca56743c upstream. After selecting ARCH_USE_CMPXCHG_LOCKREF, one straight futher optimization is implementing the arch_cmpxchg64_relaxed() because the lockref code does not need the cmpxchg to have barrier semantics. At the same time, implement arch_cmpxchg64_acquire and arch_cmpxchg64_release as well. However, on both TH1520 and JH7110 platforms, I didn't see obvious performance improvement with Linus' test case [1]. IMHO, this may be related with the fence and lr.d/sc.d hw implementations. In theory, lr/sc without fence could give performance improvement over lr/sc plus fence, so add the code here to leave performance improvement room on newer HW platforms. Link: http://marc.info/?l=linux-fsdevel&m=137782380714721&w=4 [1] Signed-off-by: Jisheng Zhang Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240325111038.1700-3-jszhang@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/include/asm/cmpxchg.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index f86a74eda6d4..2cb4c44b54bb 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -207,6 +207,24 @@ arch_cmpxchg_relaxed((ptr), (o), (n)); \ }) +#define arch_cmpxchg64_relaxed(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_relaxed((ptr), (o), (n)); \ +}) + +#define arch_cmpxchg64_acquire(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_acquire((ptr), (o), (n)); \ +}) + +#define arch_cmpxchg64_release(ptr, o, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_cmpxchg_release((ptr), (o), (n)); \ +}) + #ifdef CONFIG_RISCV_ISA_ZAWRS /* * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to -- Gitee From c4b8f200def9efa202fc0e89e985a2feaaaf0703 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 30 May 2024 16:55:46 +0200 Subject: [PATCH 03/11] riscv: Fix fully ordered LR/SC xchg[8|16]() implementations ANBZ: #23245 commit 1d84afaf02524d2558e8ca3ca169be2ef720380b upstream. The fully ordered versions of xchg[8|16]() using LR/SC lack the necessary memory barriers to guarantee the order. Fix this by matching what is already implemented in the fully ordered versions of cmpxchg() using LR/SC. Suggested-by: Andrea Parri Reported-by: Andrea Parri Closes: https://lore.kernel.org/linux-riscv/ZlYbupL5XgzgA0MX@andrea/T/#u Fixes: a8ed2b7a2c13 ("riscv/cmpxchg: Implement xchg for variables of size 1 and 2") Signed-off-by: Alexandre Ghiti Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240530145546.394248-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/include/asm/cmpxchg.h | 22 ++++++++++++---------- arch/riscv/include/asm/fence.h | 2 ++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 2cb4c44b54bb..935556eb2cd2 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -14,7 +14,7 @@ #include #include -#define __arch_xchg_masked(prepend, append, r, p, n) \ +#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ ({ \ u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ @@ -29,7 +29,7 @@ "0: lr.w %0, %2\n" \ " and %1, %0, %z4\n" \ " or %1, %1, %z3\n" \ - " sc.w %1, %1, %2\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ " bnez %1, 0b\n" \ append \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ @@ -50,7 +50,8 @@ : "memory"); \ }) -#define _arch_xchg(ptr, new, sfx, prepend, append) \ +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \ + sc_append, swap_append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __new = (new); \ @@ -59,15 +60,15 @@ switch (sizeof(*__ptr)) { \ case 1: \ case 2: \ - __arch_xchg_masked(prepend, append, \ + __arch_xchg_masked(sc_sfx, prepend, sc_append, \ __ret, __ptr, __new); \ break; \ case 4: \ - __arch_xchg(".w" sfx, prepend, append, \ + __arch_xchg(".w" swap_sfx, prepend, swap_append, \ __ret, __ptr, __new); \ break; \ case 8: \ - __arch_xchg(".d" sfx, prepend, append, \ + __arch_xchg(".d" swap_sfx, prepend, swap_append, \ __ret, __ptr, __new); \ break; \ default: \ @@ -77,16 +78,17 @@ }) #define arch_xchg_relaxed(ptr, x) \ - _arch_xchg(ptr, x, "", "", "") + _arch_xchg(ptr, x, "", "", "", "", "") #define arch_xchg_acquire(ptr, x) \ - _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER) + _arch_xchg(ptr, x, "", "", "", \ + RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER) #define arch_xchg_release(ptr, x) \ - _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "") + _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "") #define arch_xchg(ptr, x) \ - _arch_xchg(ptr, x, ".aqrl", "", "") + _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "") #define xchg32(ptr, x) \ ({ \ diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h index 2b443a3a487f..9b95db2ade6b 100644 --- a/arch/riscv/include/asm/fence.h +++ b/arch/riscv/include/asm/fence.h @@ -4,9 +4,11 @@ #ifdef CONFIG_SMP #define RISCV_ACQUIRE_BARRIER "\tfence r , rw\n" #define RISCV_RELEASE_BARRIER "\tfence rw, w\n" +#define RISCV_FULL_BARRIER "\tfence rw, rw\n" #else #define RISCV_ACQUIRE_BARRIER #define RISCV_RELEASE_BARRIER +#define RISCV_FULL_BARRIER #endif #endif /* _ASM_RISCV_FENCE_H */ -- Gitee From 82092b59cc3aa491621eb7305b195c2e28e2ca39 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:41 +0100 Subject: [PATCH 04/11] riscv: Move cpufeature.h macros into their own header ANBZ: #23245 commit 010e12aa4925b36700ebacb763a7e6cfd771d9a2 upstream. asm/cmpxchg.h will soon need riscv_has_extension_unlikely() macros and then needs to include asm/cpufeature.h which introduces a lot of header circular dependencies. So move the riscv_has_extension_XXX() macros into their own header which prevents such circular dependencies by including a restricted number of headers. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/include/asm/cpufeature-macros.h | 66 ++++++++++++++++++++++ arch/riscv/include/asm/cpufeature.h | 57 ++----------------- 2 files changed, 70 insertions(+), 53 deletions(-) create mode 100644 arch/riscv/include/asm/cpufeature-macros.h diff --git a/arch/riscv/include/asm/cpufeature-macros.h b/arch/riscv/include/asm/cpufeature-macros.h new file mode 100644 index 000000000000..a8103edbf51f --- /dev/null +++ b/arch/riscv/include/asm/cpufeature-macros.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022-2024 Rivos, Inc + */ + +#ifndef _ASM_CPUFEATURE_MACROS_H +#define _ASM_CPUFEATURE_MACROS_H + +#include +#include + +#define STANDARD_EXT 0 + +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); +#define riscv_isa_extension_available(isa_bitmap, ext) \ + __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) + +static __always_inline bool __riscv_has_extension_likely(const unsigned long vendor, + const unsigned long ext) +{ + asm goto(ALTERNATIVE("j %l[l_no]", "nop", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_no); + + return true; +l_no: + return false; +} + +static __always_inline bool __riscv_has_extension_unlikely(const unsigned long vendor, + const unsigned long ext) +{ + asm goto(ALTERNATIVE("nop", "j %l[l_yes]", %[vendor], %[ext], 1) + : + : [vendor] "i" (vendor), [ext] "i" (ext) + : + : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool riscv_has_extension_unlikely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_unlikely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +static __always_inline bool riscv_has_extension_likely(const unsigned long ext) +{ + compiletime_assert(ext < RISCV_ISA_EXT_MAX, "ext must be < RISCV_ISA_EXT_MAX"); + + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) + return __riscv_has_extension_likely(STANDARD_EXT, ext); + + return __riscv_isa_extension_available(NULL, ext); +} + +#endif /* _ASM_CPUFEATURE_MACROS_H */ diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index a535f8869519..2cb596064b9f 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -8,9 +8,11 @@ #include #include +#include +#include +#include #include -#include -#include +#include /* * These are probed via a device_initcall(), via either the SBI or directly @@ -52,57 +54,6 @@ extern const size_t riscv_isa_ext_count; extern bool riscv_isa_fallback; unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap); - -bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, unsigned int bit); -#define riscv_isa_extension_available(isa_bitmap, ext) \ - __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext) - -static __always_inline bool -riscv_has_extension_likely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("j %l[l_no]", "nop", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_no); - } else { - if (!__riscv_isa_extension_available(NULL, ext)) - goto l_no; - } - - return true; -l_no: - return false; -} - -static __always_inline bool -riscv_has_extension_unlikely(const unsigned long ext) -{ - compiletime_assert(ext < RISCV_ISA_EXT_MAX, - "ext must be < RISCV_ISA_EXT_MAX"); - - if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) { - asm goto( - ALTERNATIVE("nop", "j %l[l_yes]", 0, %[ext], 1) - : - : [ext] "i" (ext) - : - : l_yes); - } else { - if (__riscv_isa_extension_available(NULL, ext)) - goto l_yes; - } - - return false; -l_yes: - return true; -} - static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) -- Gitee From 7006f2e61e4493f1ba07848460a42d5c5be87c21 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:43 +0100 Subject: [PATCH 05/11] riscv: Implement cmpxchg32/64() using Zacas ANBZ: #23245 commit 38acdee32d23f789e866488c99867fd497d43c86 upstream. This adds runtime support for Zacas in cmpxchg operations. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/Kconfig | 16 +++++++++++ arch/riscv/Makefile | 3 ++ arch/riscv/include/asm/cmpxchg.h | 48 +++++++++++++++++++++----------- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e4ee5ea69b6a..da91c65e95c8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -585,6 +585,22 @@ config RISCV_ISA_ZAWRS use of these instructions in the kernel when the Zawrs extension is detected at boot. +config TOOLCHAIN_HAS_ZACAS + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZACAS + bool "Zacas extension support for atomic CAS" + depends on TOOLCHAIN_HAS_ZACAS + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zacas ISA-extension to implement kernel atomic + cmpxchg operations when it is detected at boot. + If you don't know what to do here, say Y. config TOOLCHAIN_HAS_ZBB diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 7155a8be497e..f23ce16bd695 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -57,6 +57,9 @@ endif include $(srctree)/${RISCV_ARCH_PATH}/Makefile.isa +# Check if the toolchain supports Zacas +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 935556eb2cd2..7f5a015cb41a 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -13,6 +13,7 @@ #include #include #include +#include #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ ({ \ @@ -138,24 +139,37 @@ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ }) -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ ({ \ - register unsigned int __rc; \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ + r = o; \ \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr" lr_sfx " %0, %2\n" \ - " bne %0, %z3, 1f\n" \ - " sc" sc_sfx " %1, %z4, %2\n" \ - " bnez %1, 0b\n" \ - append \ - "1:\n" \ - : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ - : "rJ" (co o), "rJ" (n) \ - : "memory"); \ + __asm__ __volatile__ ( \ + prepend \ + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ + append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + register unsigned int __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr" lr_sfx " %0, %2\n" \ + " bne %0, %z3, 1f\n" \ + " sc" sc_cas_sfx " %1, %z4, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ + : "rJ" (co o), "rJ" (n) \ + : "memory"); \ + } \ }) -#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ +#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ @@ -165,15 +179,15 @@ switch (sizeof(*__ptr)) { \ case 1: \ case 2: \ - __arch_cmpxchg_masked(sc_sfx, prepend, append, \ + __arch_cmpxchg_masked(sc_cas_sfx, prepend, append, \ __ret, __ptr, __old, __new); \ break; \ case 4: \ - __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ + __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \ __ret, __ptr, (long), __old, __new); \ break; \ case 8: \ - __arch_cmpxchg(".d", ".d" sc_sfx, prepend, append, \ + __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \ __ret, __ptr, /**/, __old, __new); \ break; \ default: \ -- Gitee From 7726c09e9bffa41206d589a98dc4efbfc6554b95 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:44 +0100 Subject: [PATCH 06/11] dt-bindings: riscv: Add Zabha ISA extension description ANBZ: #23245 commit 51624ddcf59dd78c810fd7da768d688e193b42d6 upstream. Add description for the Zabha ISA extension which was ratified in April 2024. Signed-off-by: Alexandre Ghiti Reviewed-by: Guo Ren Acked-by: Conor Dooley Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-5-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- Documentation/devicetree/bindings/riscv/extensions.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index ac60b6623c69..61682155cd3d 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -196,6 +196,12 @@ properties: as ratified at commit 4a69197e5617 ("Update to ratified state") of riscv-svvptc. + - const: zabha + description: | + The Zabha extension for Byte and Halfword Atomic Memory Operations + as ratified at commit 49f49c842ff9 ("Update to Rafified state") of + riscv-zabha. + - const: zacas description: | The Zacas extension for Atomic Compare-and-Swap (CAS) instructions -- Gitee From d4ab0966cea5343e6e8d64ade52efe7011144e85 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:45 +0100 Subject: [PATCH 07/11] riscv: Implement cmpxchg8/16() using Zabha ANBZ: #23245 commit 1658ef4314b37ff4858a6c207646ff9d280ca4f7 upstream. This adds runtime support for Zabha in cmpxchg8/16() operations. Note that in the absence of Zacas support in the toolchain, CAS instructions from Zabha won't be used. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-6-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/Kconfig | 18 ++++++++ arch/riscv/Makefile | 3 ++ arch/riscv/include/asm/cmpxchg.h | 78 ++++++++++++++++++++------------ arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/kernel/cpufeature.c | 1 + 5 files changed, 72 insertions(+), 29 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index da91c65e95c8..54eeddaae263 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -585,6 +585,24 @@ config RISCV_ISA_ZAWRS use of these instructions in the kernel when the Zawrs extension is detected at boot. +config TOOLCHAIN_HAS_ZABHA + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZABHA + bool "Zabha extension support for atomic byte/halfword operations" + depends on TOOLCHAIN_HAS_ZABHA + depends on RISCV_ALTERNATIVE + default y + help + Enable the use of the Zabha ISA-extension to implement kernel + byte/halfword atomic memory operations when it is detected at boot. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZACAS bool default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index f23ce16bd695..578ca1f958f6 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -60,6 +60,9 @@ include $(srctree)/${RISCV_ARCH_PATH}/Makefile.isa # Check if the toolchain supports Zacas riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas +# Check if the toolchain supports Zabha +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 7f5a015cb41a..b743e3ca05b1 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -109,34 +109,49 @@ * indicated by comparing RETURN with OLD. */ -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \ -({ \ - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ - << __s; \ - ulong __newx = (ulong)(n) << __s; \ - ulong __oldx = (ulong)(o) << __s; \ - ulong __retx; \ - ulong __rc; \ - \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr.w %0, %2\n" \ - " and %1, %0, %z5\n" \ - " bne %1, %z3, 1f\n" \ - " and %1, %0, %z6\n" \ - " or %1, %1, %z4\n" \ - " sc.w" sc_sfx " %1, %1, %2\n" \ - " bnez %1, 0b\n" \ - append \ - "1:\n" \ - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" ((long)__oldx), "rJ" (__newx), \ - "rJ" (__mask), "rJ" (~__mask) \ - : "memory"); \ - \ - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \ +({ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ + r = o; \ + \ + __asm__ __volatile__ ( \ + prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __oldx = (ulong)(o) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z5\n" \ + " bne %1, %z3, 1f\n" \ + " and %1, %0, %z6\n" \ + " or %1, %1, %z4\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + append \ + "1:\n" \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" ((long)__oldx), "rJ" (__newx), \ + "rJ" (__mask), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + } \ }) #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ @@ -178,8 +193,13 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \ + prepend, append, \ + __ret, __ptr, __old, __new); \ + break; \ case 2: \ - __arch_cmpxchg_masked(sc_cas_sfx, prepend, append, \ + __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \ + prepend, append, \ __ret, __ptr, __old, __new); \ break; \ case 4: \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index dfb7367a21a9..e9de793508a8 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -102,6 +102,7 @@ #define RISCV_ISA_EXT_SMMPM 87 #define RISCV_ISA_EXT_SMNPM 88 #define RISCV_ISA_EXT_SSNPM 89 +#define RISCV_ISA_EXT_ZABHA 90 #define RISCV_ISA_EXT_ZICCRSE 91 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 4f1739274de3..6ad6f48433e6 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -362,6 +362,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM), __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP), + __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA), __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS), __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS), __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA), -- Gitee From 65396764ca4c6ef36724a8eec5850105f4680bc9 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:46 +0100 Subject: [PATCH 08/11] riscv: Improve zacas fully-ordered cmpxchg() ANBZ: #23245 commit 6116e22ef33a8239f3d53bb25377e9ed733c4176 upstream. The current fully-ordered cmpxchgXX() implementation results in: amocas.X.rl a5,a4,(s1) fence rw,rw This provides enough sync but we can actually use the following better mapping instead: amocas.X.aqrl a5,a4,(s1) Suggested-by: Andrea Parri Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-7-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/include/asm/cmpxchg.h | 92 ++++++++++++++++++++++---------- 1 file changed, 64 insertions(+), 28 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index b743e3ca05b1..12a9f85fc16e 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -108,8 +108,10 @@ * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ - -#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \ +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + r, p, o, n) \ ({ \ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ @@ -118,9 +120,9 @@ r = o; \ \ __asm__ __volatile__ ( \ - prepend \ + cas_prepend \ " amocas" cas_sfx " %0, %z2, %1\n" \ - append \ + cas_append \ : "+&r" (r), "+A" (*(p)) \ : "rJ" (n) \ : "memory"); \ @@ -135,7 +137,7 @@ ulong __rc; \ \ __asm__ __volatile__ ( \ - prepend \ + sc_prepend \ "0: lr.w %0, %2\n" \ " and %1, %0, %z5\n" \ " bne %1, %z3, 1f\n" \ @@ -143,7 +145,7 @@ " or %1, %1, %z4\n" \ " sc.w" sc_sfx " %1, %1, %2\n" \ " bnez %1, 0b\n" \ - append \ + sc_append \ "1:\n" \ : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ : "rJ" ((long)__oldx), "rJ" (__newx), \ @@ -154,16 +156,19 @@ } \ }) -#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ +#define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + r, p, co, o, n) \ ({ \ if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \ riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \ r = o; \ \ __asm__ __volatile__ ( \ - prepend \ - " amocas" sc_cas_sfx " %0, %z2, %1\n" \ - append \ + cas_prepend \ + " amocas" cas_sfx " %0, %z2, %1\n" \ + cas_append \ : "+&r" (r), "+A" (*(p)) \ : "rJ" (n) \ : "memory"); \ @@ -171,12 +176,12 @@ register unsigned int __rc; \ \ __asm__ __volatile__ ( \ - prepend \ + sc_prepend \ "0: lr" lr_sfx " %0, %2\n" \ " bne %0, %z3, 1f\n" \ - " sc" sc_cas_sfx " %1, %z4, %2\n" \ + " sc" sc_sfx " %1, %z4, %2\n" \ " bnez %1, 0b\n" \ - append \ + sc_append \ "1:\n" \ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ : "rJ" (co o), "rJ" (n) \ @@ -184,7 +189,9 @@ } \ }) -#define _arch_cmpxchg(ptr, old, new, sc_cas_sfx, prepend, append) \ +#define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append) \ ({ \ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ @@ -193,22 +200,28 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ - __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx, \ - prepend, append, \ - __ret, __ptr, __old, __new); \ + __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, __old, __new); \ break; \ case 2: \ - __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx, \ - prepend, append, \ - __ret, __ptr, __old, __new); \ + __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, __old, __new); \ break; \ case 4: \ - __arch_cmpxchg(".w", ".w" sc_cas_sfx, prepend, append, \ - __ret, __ptr, (long), __old, __new); \ + __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, (long), __old, __new); \ break; \ case 8: \ - __arch_cmpxchg(".d", ".d" sc_cas_sfx, prepend, append, \ - __ret, __ptr, /**/, __old, __new); \ + __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ + sc_prepend, sc_append, \ + cas_prepend, cas_append, \ + __ret, __ptr, /**/, __old, __new); \ break; \ default: \ BUILD_BUG(); \ @@ -216,17 +229,40 @@ (__typeof__(*(__ptr)))__ret; \ }) +/* + * These macros are here to improve the readability of the arch_cmpxchg_XXX() + * macros. + */ +#define SC_SFX(x) x +#define CAS_SFX(x) x +#define SC_PREPEND(x) x +#define SC_APPEND(x) x +#define CAS_PREPEND(x) x +#define CAS_APPEND(x) x + #define arch_cmpxchg_relaxed(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", "", "") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(""), SC_APPEND(""), \ + CAS_PREPEND(""), CAS_APPEND("")) #define arch_cmpxchg_acquire(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", "", RISCV_ACQUIRE_BARRIER) + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \ + CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER)) #define arch_cmpxchg_release(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), "", RISCV_RELEASE_BARRIER, "") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(""), CAS_SFX(""), \ + SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \ + CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND("")) #define arch_cmpxchg(ptr, o, n) \ - _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") + _arch_cmpxchg((ptr), (o), (n), \ + SC_SFX(".rl"), CAS_SFX(".aqrl"), \ + SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \ + CAS_PREPEND(""), CAS_APPEND("")) #define arch_cmpxchg_local(ptr, o, n) \ arch_cmpxchg_relaxed((ptr), (o), (n)) -- Gitee From 5404cb45b7fbe1d54f01c40d67b08ea331f5ace1 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:47 +0100 Subject: [PATCH 09/11] riscv: Implement arch_cmpxchg128() using Zacas ANBZ: #23245 commit f7bd2be7663c7de1dde27dadd352b2c3f4e19106 upstream. Now that Zacas is supported in the kernel, let's use the double word atomic version of amocas to improve the SLUB allocator. Note that we have to select fixed registers, otherwise gcc fails to pick even registers and then produces a reserved encoding which fails to assemble. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-8-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/cmpxchg.h | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 54eeddaae263..01629189138a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -95,6 +95,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU + select HAVE_ALIGNED_STRUCT_PAGE select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 12a9f85fc16e..1ba3ccbab2a9 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -297,6 +297,44 @@ arch_cmpxchg_release((ptr), (o), (n)); \ }) +#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) + +#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS) + +union __u128_halves { + u128 full; + struct { + u64 low, high; + }; +}; + +#define __arch_cmpxchg128(p, o, n, cas_sfx) \ +({ \ + __typeof__(*(p)) __o = (o); \ + union __u128_halves __hn = { .full = (n) }; \ + union __u128_halves __ho = { .full = (__o) }; \ + register unsigned long t1 asm ("t1") = __hn.low; \ + register unsigned long t2 asm ("t2") = __hn.high; \ + register unsigned long t3 asm ("t3") = __ho.low; \ + register unsigned long t4 asm ("t4") = __ho.high; \ + \ + __asm__ __volatile__ ( \ + " amocas.q" cas_sfx " %0, %z3, %2" \ + : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \ + : "rJ" (t1), "rJ" (t2) \ + : "memory"); \ + \ + ((u128)t4 << 64) | t3; \ +}) + +#define arch_cmpxchg128(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), ".aqrl") + +#define arch_cmpxchg128_local(ptr, o, n) \ + __arch_cmpxchg128((ptr), (o), (n), "") + +#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */ + #ifdef CONFIG_RISCV_ISA_ZAWRS /* * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to -- Gitee From cfeb43b1f32eaf56f80fde392d1c8ebeb2a24dda Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:48 +0100 Subject: [PATCH 10/11] riscv: Implement xchg8/16() using Zabha ANBZ: #23245 commit 97ddab7fbea8fceb044108b64ba2ee2c96ff8dab upstream. This adds runtime support for Zabha in xchg8/16() operations. Signed-off-by: Alexandre Ghiti Reviewed-by: Andrew Jones Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-9-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: luohaiyang --- arch/riscv/include/asm/cmpxchg.h | 65 ++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 24 deletions(-) diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 1ba3ccbab2a9..4d4f22ac2cab 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -15,29 +15,41 @@ #include #include -#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ -({ \ - u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ - ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ - ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ - << __s; \ - ulong __newx = (ulong)(n) << __s; \ - ulong __retx; \ - ulong __rc; \ - \ - __asm__ __volatile__ ( \ - prepend \ - "0: lr.w %0, %2\n" \ - " and %1, %0, %z4\n" \ - " or %1, %1, %z3\n" \ - " sc.w" sc_sfx " %1, %1, %2\n" \ - " bnez %1, 0b\n" \ - append \ - : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ - : "rJ" (__newx), "rJ" (~__mask) \ - : "memory"); \ - \ - r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ +#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \ + swap_append, r, p, n) \ +({ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \ + riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \ + __asm__ __volatile__ ( \ + prepend \ + " amoswap" swap_sfx " %0, %z2, %1\n" \ + swap_append \ + : "=&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } else { \ + u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \ + ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \ + ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \ + << __s; \ + ulong __newx = (ulong)(n) << __s; \ + ulong __retx; \ + ulong __rc; \ + \ + __asm__ __volatile__ ( \ + prepend \ + "0: lr.w %0, %2\n" \ + " and %1, %0, %z4\n" \ + " or %1, %1, %z3\n" \ + " sc.w" sc_sfx " %1, %1, %2\n" \ + " bnez %1, 0b\n" \ + sc_append \ + : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \ + : "rJ" (__newx), "rJ" (~__mask) \ + : "memory"); \ + \ + r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ + } \ }) #define __arch_xchg(sfx, prepend, append, r, p, n) \ @@ -60,8 +72,13 @@ \ switch (sizeof(*__ptr)) { \ case 1: \ + __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \ + prepend, sc_append, swap_append, \ + __ret, __ptr, __new); \ + break; \ case 2: \ - __arch_xchg_masked(sc_sfx, prepend, sc_append, \ + __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \ + prepend, sc_append, swap_append, \ __ret, __ptr, __new); \ break; \ case 4: \ -- Gitee From 4752197fda110166b5194f5b11db6609e30df47e Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 3 Nov 2024 15:51:53 +0100 Subject: [PATCH 11/11] riscv: Add qspinlock support ANBZ: #23245 commit ab83647fadae2f1f723119dc066b39a461d6d288 upstream. In order to produce a generic kernel, a user can select CONFIG_COMBO_SPINLOCKS which will fallback at runtime to the ticket spinlock implementation if Zabha or Ziccrse are not present. Note that we can't use alternatives here because the discovery of extensions is done too late and we need to start with the qspinlock implementation because the ticket spinlock implementation would pollute the spinlock value, so let's use static keys. This is largely based on Guo's work and Leonardo reviews at [1]. Link: https://lore.kernel.org/linux-riscv/20231225125847.2778638-1-guoren@kernel.org/ [1] Signed-off-by: Guo Ren Signed-off-by: Alexandre Ghiti Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20241103145153.105097-14-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt [only enable Zacas/Zabha in riscv_spinlock_init() - luohaiyang] Signed-off-by: luohaiyang --- arch/riscv/kernel/setup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 4c8d63e11fcc..ab385e02e19d 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -312,7 +312,12 @@ static void __init riscv_spinlock_init(void) return; } - if (riscv_isa_extension_available(NULL, ZICCRSE)) + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && + IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && + riscv_isa_extension_available(NULL, ZABHA) && + riscv_isa_extension_available(NULL, ZACAS)) { + using_ext = "using Zabha"; + } else if (riscv_isa_extension_available(NULL, ZICCRSE)) using_ext = "using Ziccrse"; #if defined(CONFIG_RISCV_COMBO_SPINLOCKS) else { -- Gitee