From 9787cb8ebdb03b4395f14f23e3e09eda9a7a443a Mon Sep 17 00:00:00 2001 From: winterddd Date: Tue, 22 Jul 2025 14:18:44 +0800 Subject: [PATCH 01/17] anolis: Revert "anolis:driver:drm/amd/display: Support DRM_AMD_DC_FP on RISC-V" ANBZ: #22602 This reverts commit a81c6130e08d1bba17f3ddc8113acc0475db6096. Signed-off-by: winterddd --- drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 6 ++---- drivers/gpu/drm/amd/display/dc/dml/Makefile | 6 ------ 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 89ea959e0c05..901d1961b739 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -8,7 +8,7 @@ config DRM_AMD_DC depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 - select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG) || (RISCV && FPU)) + select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG)) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index 834dca0396f1..4ae4720535a5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -35,8 +35,6 @@ #include #elif defined(CONFIG_LOONGARCH) #include -#elif defined(CONFIG_RISCV) -#include #endif /** @@ -91,7 +89,7 @@ void dc_fpu_begin(const char *function_name, const int line) depth = __this_cpu_inc_return(fpu_recursion_depth); if (depth == 1) { -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) || defined(CONFIG_RISCV) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) kernel_fpu_begin(); #elif defined(CONFIG_PPC64) if (cpu_has_feature(CPU_FTR_VSX_COMP)) @@ -124,7 +122,7 @@ void dc_fpu_end(const char *function_name, const int line) depth = __this_cpu_dec_return(fpu_recursion_depth); if (depth == 0) { -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) || defined(CONFIG_RISCV) +#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) kernel_fpu_end(); #elif defined(CONFIG_PPC64) if (cpu_has_feature(CPU_FTR_VSX_COMP)) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index abd04d13997d..0ba9a7997d56 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -43,12 +43,6 @@ dml_ccflags := -mfpu=64 dml_rcflags := -msoft-float endif -ifdef CONFIG_RISCV -include $(srctree)/arch/riscv/Makefile.isa -# Remove V from the ISA string, like in arch/riscv/Makefile, but keep F and D. -dml_ccflags := -march=$(subst v0p7,,$(riscv-march-y)) -endif - ifdef CONFIG_CC_IS_GCC ifneq ($(call gcc-min-version, 70100),y) IS_OLD_GCC = 1 -- Gitee From e7311864319172ccc048c648749a3e194c8ae22e Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Fri, 11 Jul 2025 17:51:01 +0800 Subject: [PATCH 02/17] anolis: Revert "anolis:arch:riscv: Factor out riscv-march-y to a separate Makefile" ANBZ: #22602 This reverts commit b070e8a7fab41214225d57ece4d39886831fd7cc. [Ruidong: fix conflict about pause] Signed-off-by: Ruidong Tian --- arch/riscv/Makefile | 15 +++++++++++++-- arch/riscv/Makefile.isa | 15 --------------- 2 files changed, 13 insertions(+), 17 deletions(-) delete mode 100644 arch/riscv/Makefile.isa diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 7155a8be497e..dd98d8b9646c 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -6,7 +6,6 @@ # for more details. # -RISCV_ARCH_PATH := arch/riscv LDFLAGS_vmlinux := -z norelro ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs @@ -55,7 +54,19 @@ endif endif endif -include $(srctree)/${RISCV_ARCH_PATH}/Makefile.isa +# ISA string setting +riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima +riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima +riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd +riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c +riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v + +ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC +KBUILD_CFLAGS += -Wa,-misa-spec=2.2 +KBUILD_AFLAGS += -Wa,-misa-spec=2.2 +else +riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei +endif # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) diff --git a/arch/riscv/Makefile.isa b/arch/riscv/Makefile.isa deleted file mode 100644 index 279f24f32763..000000000000 --- a/arch/riscv/Makefile.isa +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -# ISA string setting -riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima -riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima -riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd -riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c -riscv-march-$(CONFIG_RISCV_ISA_V) := $(riscv-march-y)v - -ifdef CONFIG_TOOLCHAIN_NEEDS_OLD_ISA_SPEC -KBUILD_CFLAGS += -Wa,-misa-spec=2.2 -KBUILD_AFLAGS += -Wa,-misa-spec=2.2 -else -riscv-march-$(CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI) := $(riscv-march-y)_zicsr_zifencei -endif -- Gitee From 7c402b6d844070078c435f1747fd0f96f2515b46 Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Fri, 11 Jul 2025 17:51:32 +0800 Subject: [PATCH 03/17] anolis: Revert "anolis:arch:riscv: Add support for kernel-mode FPU" ANBZ: #22602 This reverts commit 69bb9d9f1b79407429205fb82240c5c5a4d3f13e. Signed-off-by: Ruidong Tian --- arch/riscv/include/asm/switch_to.h | 15 --------------- arch/riscv/kernel/process.c | 3 --- 2 files changed, 18 deletions(-) diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 7508f3ec8063..f90d8e42f3c7 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -63,21 +63,6 @@ static __always_inline bool has_fpu(void) return riscv_has_extension_likely(RISCV_ISA_EXT_f) || riscv_has_extension_likely(RISCV_ISA_EXT_d); } - - -static inline void kernel_fpu_begin(void) -{ - preempt_disable(); - fstate_save(current, task_pt_regs(current)); - csr_set(CSR_SSTATUS, SR_FS); -} - -static inline void kernel_fpu_end(void) -{ - csr_clear(CSR_SSTATUS, SR_FS); - fstate_restore(current, task_pt_regs(current)); - preempt_enable(); -} #else static __always_inline bool has_fpu(void) { return false; } #define fstate_save(task, regs) do { } while (0) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index dd973216e31c..83e223318822 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -204,6 +204,3 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } - -EXPORT_SYMBOL_GPL(__fstate_save); -EXPORT_SYMBOL_GPL(__fstate_restore); -- Gitee From 5accf4373d32d07019bab074ecf6e6956c8e61f0 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:16 -0700 Subject: [PATCH 04/17] arch: add ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 6cbd1d6d36c5d8312de99d1dfa3bec40ac840ce0 upstream. Several architectures provide an API to enable the FPU and run floating-point SIMD code in kernel space. However, the function names, header locations, and semantics are inconsistent across architectures, and FPU support may be gated behind other Kconfig options. provide a standard way for architectures to declare that kernel space FPU support is available. Architectures selecting this option must implement what is currently the most common API (kernel_fpu_begin() and kernel_fpu_end(), plus a new function kernel_fpu_available()) and provide the appropriate CFLAGS for compiling floating-point C code. Link: https://lkml.kernel.org/r/20240329072441.591471-2-samuel.holland@sifive.com Signed-off-by: Samuel Holland Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- Documentation/core-api/floating-point.rst | 78 +++++++++++++++++++++++ Documentation/core-api/index.rst | 1 + Makefile | 5 ++ arch/Kconfig | 6 ++ include/linux/fpu.h | 12 ++++ 5 files changed, 102 insertions(+) create mode 100644 Documentation/core-api/floating-point.rst create mode 100644 include/linux/fpu.h diff --git a/Documentation/core-api/floating-point.rst b/Documentation/core-api/floating-point.rst new file mode 100644 index 000000000000..a8d0d4b05052 --- /dev/null +++ b/Documentation/core-api/floating-point.rst @@ -0,0 +1,78 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +Floating-point API +================== + +Kernel code is normally prohibited from using floating-point (FP) registers or +instructions, including the C float and double data types. This rule reduces +system call overhead, because the kernel does not need to save and restore the +userspace floating-point register state. + +However, occasionally drivers or library functions may need to include FP code. +This is supported by isolating the functions containing FP code to a separate +translation unit (a separate source file), and saving/restoring the FP register +state around calls to those functions. This creates "critical sections" of +floating-point usage. + +The reason for this isolation is to prevent the compiler from generating code +touching the FP registers outside these critical sections. Compilers sometimes +use FP registers to optimize inlined ``memcpy`` or variable assignment, as +floating-point registers may be wider than general-purpose registers. + +Usability of floating-point code within the kernel is architecture-specific. +Additionally, because a single kernel may be configured to support platforms +both with and without a floating-point unit, FPU availability must be checked +both at build time and at run time. + +Several architectures implement the generic kernel floating-point API from +``linux/fpu.h``, as described below. Some other architectures implement their +own unique APIs, which are documented separately. + +Build-time API +-------------- + +Floating-point code may be built if the option ``ARCH_HAS_KERNEL_FPU_SUPPORT`` +is enabled. For C code, such code must be placed in a separate file, and that +file must have its compilation flags adjusted using the following pattern:: + + CFLAGS_foo.o += $(CC_FLAGS_FPU) + CFLAGS_REMOVE_foo.o += $(CC_FLAGS_NO_FPU) + +Architectures are expected to define one or both of these variables in their +top-level Makefile as needed. For example:: + + CC_FLAGS_FPU := -mhard-float + +or:: + + CC_FLAGS_NO_FPU := -msoft-float + +Normal kernel code is assumed to use the equivalent of ``CC_FLAGS_NO_FPU``. + +Runtime API +----------- + +The runtime API is provided in ``linux/fpu.h``. This header cannot be included +from files implementing FP code (those with their compilation flags adjusted as +above). Instead, it must be included when defining the FP critical sections. + +.. c:function:: bool kernel_fpu_available( void ) + + This function reports if floating-point code can be used on this CPU or + platform. The value returned by this function is not expected to change + at runtime, so it only needs to be called once, not before every + critical section. + +.. c:function:: void kernel_fpu_begin( void ) + void kernel_fpu_end( void ) + + These functions create a floating-point critical section. It is only + valid to call ``kernel_fpu_begin()`` after a previous call to + ``kernel_fpu_available()`` returned ``true``. These functions are only + guaranteed to be callable from (preemptible or non-preemptible) process + context. + + Preemption may be disabled inside critical sections, so their size + should be minimized. They are *not* required to be reentrant. If the + caller expects to nest critical sections, it must implement its own + reference counting. diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 7a3a08d81f11..974beccd671f 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -48,6 +48,7 @@ Library functionality that is used throughout the kernel. errseq wrappers/atomic_t wrappers/atomic_bitops + floating-point Low level entry and exit ======================== diff --git a/Makefile b/Makefile index 53b4ebe23b3f..ec24e6461ba2 100644 --- a/Makefile +++ b/Makefile @@ -992,6 +992,11 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI) export CC_FLAGS_CFI endif +# Architectures can define flags to add/remove for floating-point support +CC_FLAGS_FPU += -D_LINUX_FPU_COMPILATION_UNIT +export CC_FLAGS_FPU +export CC_FLAGS_NO_FPU + ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT) endif diff --git a/arch/Kconfig b/arch/Kconfig index cf8b88fd863e..2da32b779f76 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1499,6 +1499,12 @@ config ARCH_HAS_NONLEAF_PMD_YOUNG address translations. Page table walkers that clear the accessed bit may use this capability to reduce their search space. +config ARCH_HAS_KERNEL_FPU_SUPPORT + bool + help + Architectures that select this option can run floating-point code in + the kernel, as described in Documentation/core-api/floating-point.rst. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/include/linux/fpu.h b/include/linux/fpu.h new file mode 100644 index 000000000000..2fb63e22913b --- /dev/null +++ b/include/linux/fpu.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_FPU_H +#define _LINUX_FPU_H + +#ifdef _LINUX_FPU_COMPILATION_UNIT +#error FP code must be compiled separately. See Documentation/core-api/floating-point.rst. +#endif + +#include + +#endif -- Gitee From 62d81d75c983c02a5873581cb783af63355a1c10 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:26 -0700 Subject: [PATCH 05/17] riscv: add support for kernel-mode FPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 77acc6b55ae46f52bfa4eca52c9fe627f5c3ba3f upstream. This is motivated by the amdgpu DRM driver, which needs floating-point code to support recent hardware. That code is not performance-critical, so only provide a minimal non-preemptible implementation for now. Support is limited to riscv64 because riscv32 requires runtime (libgcc) assistance to convert between doubles and 64-bit integers. Link: https://lkml.kernel.org/r/20240329072441.591471-12-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Palmer Dabbelt Reviewed-by: Palmer Dabbelt Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/riscv/Kconfig | 1 + arch/riscv/Makefile | 3 +++ arch/riscv/include/asm/fpu.h | 16 ++++++++++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/kernel_mode_fpu.c | 28 ++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+) create mode 100644 arch/riscv/include/asm/fpu.h create mode 100644 arch/riscv/kernel/kernel_mode_fpu.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 01cfff84e9a4..b50a3d56b7ed 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -30,6 +30,7 @@ config RISCV select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && FPU select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MMIOWB select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index dd98d8b9646c..33b3bec27963 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -74,6 +74,9 @@ KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64i KBUILD_AFLAGS += -march=$(riscv-march-y) +# For C code built with floating-point support, exclude V but keep F and D. +CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/') + KBUILD_CFLAGS += -mno-save-restore KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) diff --git a/arch/riscv/include/asm/fpu.h b/arch/riscv/include/asm/fpu.h new file mode 100644 index 000000000000..91c04c244e12 --- /dev/null +++ b/arch/riscv/include/asm/fpu.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_RISCV_FPU_H +#define _ASM_RISCV_FPU_H + +#include + +#define kernel_fpu_available() has_fpu() + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +#endif /* ! _ASM_RISCV_FPU_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f18f9e4d1ef1..65b9a8c28463 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_FPU) += kernel_mode_fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/riscv/kernel/kernel_mode_fpu.c b/arch/riscv/kernel/kernel_mode_fpu.c new file mode 100644 index 000000000000..0ac8348876c4 --- /dev/null +++ b/arch/riscv/kernel/kernel_mode_fpu.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 SiFive + */ + +#include +#include + +#include +#include +#include +#include + +void kernel_fpu_begin(void) +{ + preempt_disable(); + fstate_save(current, task_pt_regs(current)); + csr_set(CSR_SSTATUS, SR_FS); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + csr_clear(CSR_SSTATUS, SR_FS); + fstate_restore(current, task_pt_regs(current)); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); -- Gitee From 796989639060f731d6187eb67342755552ca3b0d Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:17 -0700 Subject: [PATCH 06/17] ARM: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit cb2b7b7de805cd649851fb8f2f36df3b8425682e upstream. ARM provides an equivalent to the common kernel-mode FPU API, but in a different header and using different function names. Add a wrapper header, and export CFLAGS adjustments as found in lib/raid6/Makefile. [samuel.holland@sifive.com: ARM: do not select ARCH_HAS_KERNEL_FPU_SUPPORT] Link: https://lkml.kernel.org/r/20240509013727.648600-1-samuel.holland@sifive.com Link: https://lkml.kernel.org/r/20240329072441.591471-3-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Cc: Thiago Jung Bauermann Cc: Ard Biesheuvel Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/arm/Makefile | 7 +++++++ arch/arm/include/asm/fpu.h | 15 +++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 arch/arm/include/asm/fpu.h diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 5ba42f69f8ce..1dd860dba5f5 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -130,6 +130,13 @@ endif # Accept old syntax despite ".syntax unified" AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +CC_FLAGS_FPU := -ffreestanding +# Enable +CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include) +CC_FLAGS_FPU += -march=armv7-a -mfloat-abi=softfp -mfpu=neon + ifeq ($(CONFIG_THUMB2_KERNEL),y) CFLAGS_ISA :=-Wa,-mimplicit-it=always $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb diff --git a/arch/arm/include/asm/fpu.h b/arch/arm/include/asm/fpu.h new file mode 100644 index 000000000000..2ae50bdce59b --- /dev/null +++ b/arch/arm/include/asm/fpu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_FPU_H +#define __ASM_FPU_H + +#include + +#define kernel_fpu_available() cpu_has_neon() +#define kernel_fpu_begin() kernel_neon_begin() +#define kernel_fpu_end() kernel_neon_end() + +#endif /* ! __ASM_FPU_H */ -- Gitee From e1c5a972244c0ca1c50b9beb93d65359b87e993b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:18 -0700 Subject: [PATCH 07/17] ARM: crypto: use CC_FLAGS_FPU for NEON CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit c41624315b602da32f59e70baa825c5f11fea892 upstream. Now that CC_FLAGS_FPU is exported and can be used anywhere in the source tree, use it instead of duplicating the flags here. Link: https://lkml.kernel.org/r/20240329072441.591471-4-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/arm/lib/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 650404be6768..0ca5aae1bcc3 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -40,8 +40,7 @@ $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S ifeq ($(CONFIG_KERNEL_MODE_NEON),y) - NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon - CFLAGS_xor-neon.o += $(NEON_FLAGS) + CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif -- Gitee From 945138e3ea1be686fc67710b14f2051718204264 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:19 -0700 Subject: [PATCH 08/17] arm64: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 71883ae3527808d445c019870512d4b9fea2332b upstream. arm64 provides an equivalent to the common kernel-mode FPU API, but in a different header and using different function names. Add a wrapper header, and export CFLAGS adjustments as found in lib/raid6/Makefile. Link: https://lkml.kernel.org/r/20240329072441.591471-5-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 9 ++++++++- arch/arm64/include/asm/fpu.h | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/fpu.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ec63e89d0858..c6fa48c0f6a1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -31,6 +31,7 @@ config ARM64 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 302df2d819fa..c9a7ec85ff3c 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -36,7 +36,14 @@ ifeq ($(CONFIG_BROKEN_GAS_INST),y) $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif -KBUILD_CFLAGS += -mgeneral-regs-only \ +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +CC_FLAGS_FPU := -ffreestanding +# Enable +CC_FLAGS_FPU += -isystem $(shell $(CC) -print-file-name=include) +CC_FLAGS_NO_FPU := -mgeneral-regs-only + +KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \ $(compat_vdso) $(cc_has_k_constraint) KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h new file mode 100644 index 000000000000..2ae50bdce59b --- /dev/null +++ b/arch/arm64/include/asm/fpu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef __ASM_FPU_H +#define __ASM_FPU_H + +#include + +#define kernel_fpu_available() cpu_has_neon() +#define kernel_fpu_begin() kernel_neon_begin() +#define kernel_fpu_end() kernel_neon_end() + +#endif /* ! __ASM_FPU_H */ -- Gitee From 9c8a8018f5a0ae90c42cede9ec73bde713de7f90 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:20 -0700 Subject: [PATCH 09/17] arm64: crypto: use CC_FLAGS_FPU for NEON CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 7177089525d97bd8d7fefd6a9406f45d818410e0 upstream. Now that CC_FLAGS_FPU is exported and can be used anywhere in the source tree, use it instead of duplicating the flags here. Link: https://lkml.kernel.org/r/20240329072441.591471-6-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/arm64/lib/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index a2fd865b816d..65ec3d24d32d 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -7,10 +7,8 @@ lib-y := clear_user.o delay.o copy_from_user.o \ ifeq ($(CONFIG_KERNEL_MODE_NEON), y) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o -CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only -CFLAGS_xor-neon.o += -ffreestanding -# Enable -CFLAGS_xor-neon.o += -isystem $(shell $(CC) -print-file-name=include) +CFLAGS_xor-neon.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_xor-neon.o += $(CC_FLAGS_NO_FPU) endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -- Gitee From 42d501a3a554b5dc7dd57d490f49f3df394e593b Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:21 -0700 Subject: [PATCH 10/17] lib/raid6: use CC_FLAGS_FPU for NEON CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 4be073931cd831fa19bf8b612b9a1521385aa53e upstream. Now that CC_FLAGS_FPU is exported and can be used anywhere in the source tree, use it instead of duplicating the flags here. Link: https://lkml.kernel.org/r/20240329072441.591471-7-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- lib/raid6/Makefile | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 035b0a4db476..7a01fcc987f0 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -33,25 +33,6 @@ CFLAGS_REMOVE_vpermxor8.o += -msoft-float endif endif -# The GCC option -ffreestanding is required in order to compile code containing -# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) -ifeq ($(CONFIG_KERNEL_MODE_NEON),y) -NEON_FLAGS := -ffreestanding -# Enable -NEON_FLAGS += -isystem $(shell $(CC) -print-file-name=include) -ifeq ($(ARCH),arm) -NEON_FLAGS += -march=armv7-a -mfloat-abi=softfp -mfpu=neon -endif -CFLAGS_recov_neon_inner.o += $(NEON_FLAGS) -ifeq ($(ARCH),arm64) -CFLAGS_REMOVE_recov_neon_inner.o += -mgeneral-regs-only -CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only -CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only -CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only -CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only -endif -endif - quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ @@ -75,10 +56,16 @@ targets += vpermxor1.c vpermxor2.c vpermxor4.c vpermxor8.c $(obj)/vpermxor%.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) -CFLAGS_neon1.o += $(NEON_FLAGS) -CFLAGS_neon2.o += $(NEON_FLAGS) -CFLAGS_neon4.o += $(NEON_FLAGS) -CFLAGS_neon8.o += $(NEON_FLAGS) +CFLAGS_neon1.o += $(CC_FLAGS_FPU) +CFLAGS_neon2.o += $(CC_FLAGS_FPU) +CFLAGS_neon4.o += $(CC_FLAGS_FPU) +CFLAGS_neon8.o += $(CC_FLAGS_FPU) +CFLAGS_recov_neon_inner.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_neon1.o += $(CC_FLAGS_NO_FPU) +CFLAGS_REMOVE_neon2.o += $(CC_FLAGS_NO_FPU) +CFLAGS_REMOVE_neon4.o += $(CC_FLAGS_NO_FPU) +CFLAGS_REMOVE_neon8.o += $(CC_FLAGS_NO_FPU) +CFLAGS_REMOVE_recov_neon_inner.o += $(CC_FLAGS_NO_FPU) targets += neon1.c neon2.c neon4.c neon8.c $(obj)/neon%.c: $(src)/neon.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) -- Gitee From 0355c7795ea9d97fc94218f43e2a5d0d6fb35208 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:22 -0700 Subject: [PATCH 11/17] LoongArch: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 372f662345d603c6e4e26864a8908826bf7a3e45 upstream. LoongArch already provides kernel_fpu_begin() and kernel_fpu_end() in asm/fpu.h, so it only needs to add kernel_fpu_available() and export the CFLAGS adjustments. Link: https://lkml.kernel.org/r/20240329072441.591471-8-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: WANG Xuerui Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/loongarch/Kconfig | 1 + arch/loongarch/Makefile | 5 ++++- arch/loongarch/include/asm/fpu.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 494d0deef45f..ea1454419643 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -16,6 +16,7 @@ config LOONGARCH select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if CPU_HAS_FPU select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index e05c10b04456..29edc25095b5 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -25,6 +25,9 @@ endif 32bit-emul = elf32loongarch 64bit-emul = elf64loongarch +CC_FLAGS_FPU := -mfpu=64 +CC_FLAGS_NO_FPU := -msoft-float + ifdef CONFIG_UNWINDER_ORC orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h orc_hash_sh := $(srctree)/scripts/orc_hash.sh @@ -58,7 +61,7 @@ ld-emul = $(64bit-emul) cflags-y += -mabi=lp64s endif -cflags-y += -pipe -msoft-float +cflags-y += -pipe $(CC_FLAGS_NO_FPU) LDFLAGS_vmlinux += -static -n -nostdlib # When the assembler supports explicit relocation hint, we must use it. diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 772741322785..c80cc1cd5407 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -21,6 +21,7 @@ struct sigcontext; +#define kernel_fpu_available() cpu_has_fpu extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); -- Gitee From c8e601bb9fb0277b0da421ae403d9714d70f1dc5 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:23 -0700 Subject: [PATCH 12/17] powerpc: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 01db473e1aa32d651477be80f79b309313636203 upstream. PowerPC provides an equivalent to the common kernel-mode FPU API, but in a different header and using different function names. The PowerPC API also requires a non-preemptible context. Add a wrapper header, and export the CFLAGS adjustments. Link: https://lkml.kernel.org/r/20240329072441.591471-9-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Michael Ellerman (powerpc) Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 5 ++++- arch/powerpc/include/asm/fpu.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/fpu.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6baa8b85601a..8d9c3cafb912 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 46cbc8ead07d..8b7bcc26f299 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -135,6 +135,9 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD)) CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) +CC_FLAGS_FPU := $(call cc-option,-mhard-float) +CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float) + ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY @@ -156,7 +159,7 @@ asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) -KBUILD_CFLAGS += $(call cc-option,-msoft-float) +KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) KBUILD_CFLAGS += $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h new file mode 100644 index 000000000000..ca584e4bc40f --- /dev/null +++ b/arch/powerpc/include/asm/fpu.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_POWERPC_FPU_H +#define _ASM_POWERPC_FPU_H + +#include + +#include +#include + +#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) + +static inline void kernel_fpu_begin(void) +{ + preempt_disable(); + enable_kernel_fp(); +} + +static inline void kernel_fpu_end(void) +{ + disable_kernel_fp(); + preempt_enable(); +} + +#endif /* ! _ASM_POWERPC_FPU_H */ -- Gitee From 7d55145116e38b6d6761c29ca009a14cd4c3a434 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:25 -0700 Subject: [PATCH 13/17] x86: implement ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit b0b8a15bb89e09e12aa6be8ae28128bb656338f1 upstream. x86 already provides kernel_fpu_begin() and kernel_fpu_end(), but in a different header. Add a wrapper header, and export the CFLAGS adjustments as found in lib/Makefile. Link: https://lkml.kernel.org/r/20240329072441.591471-11-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/x86/Kconfig | 1 + arch/x86/Makefile | 20 ++++++++++++++++++++ arch/x86/include/asm/fpu.h | 13 +++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 arch/x86/include/asm/fpu.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d0fa98776cb7..d230eefa5572 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -83,6 +83,7 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_KERNEL_FPU_SUPPORT select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c83582b5a010..758f95beb630 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -70,6 +70,26 @@ export BITS KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 +# +# CFLAGS for compiling floating point code inside the kernel. +# +CC_FLAGS_FPU := -msse -msse2 +ifdef CONFIG_CC_IS_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 +# +# The "-msse" in the first argument is there so that the +# -mpreferred-stack-boundary=3 build error: +# +# -mpreferred-stack-boundary=3 is not between 4 and 12 +# +# can be triggered. Otherwise gcc doesn't complain. +CC_FLAGS_FPU += -mhard-float +CC_FLAGS_FPU += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4) +endif + ifeq ($(CONFIG_X86_KERNEL_IBT),y) # # Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h new file mode 100644 index 000000000000..b2743fe19339 --- /dev/null +++ b/arch/x86/include/asm/fpu.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 SiFive + */ + +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H + +#include + +#define kernel_fpu_available() true + +#endif /* ! _ASM_X86_FPU_H */ -- Gitee From b55469900cf64a316cc771aa21cf52f0c0897f38 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:24 -0700 Subject: [PATCH 14/17] x86/fpu: fix asm/fpu/types.h include guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit b11b998e983119e27b33210a0128b27df6ae5f78 upstream. Patch series "Unified cross-architecture kernel-mode FPU API", v4. This series unifies the kernel-mode FPU API across several architectures by wrapping the existing functions (where needed) in consistently-named functions placed in a consistent header location, with mostly the same semantics: they can be called from preemptible or non-preemptible task context, and are not assumed to be reentrant. Architectures are also expected to provide CFLAGS adjustments for compiling FPU-dependent code. For the moment, SIMD/vector units are out of scope for this common API. This allows us to remove the ifdeffery and duplicated Makefile logic at each FPU user. It then implements the common API on RISC-V, and converts a couple of users to the new API: the AMDGPU DRM driver, and the FPU self test. The underlying goal of this series is to allow using newer AMD GPUs (e.g. Navi) on RISC-V boards such as SiFive's HiFive Unmatched. Those GPUs need CONFIG_DRM_AMD_DC_FP to initialize, which requires kernel-mode FPU support. This patch (of 15): The include guard should match the filename, or it will conflict with the newly-added asm/fpu.h. Link: https://lkml.kernel.org/r/20240329072441.591471-1-samuel.holland@sifive.com Link: https://lkml.kernel.org/r/20240329072441.591471-10-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Dave Hansen Acked-by: Christian König Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: Alex Deucher Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: WANG Xuerui Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- arch/x86/include/asm/fpu/types.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index fd5fb43d920b..77dbccfbe878 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -2,8 +2,8 @@ /* * FPU data structures: */ -#ifndef _ASM_X86_FPU_H -#define _ASM_X86_FPU_H +#ifndef _ASM_X86_FPU_TYPES_H +#define _ASM_X86_FPU_TYPES_H /* * The legacy x87 FPU state format, as saved by FSAVE and @@ -601,4 +601,4 @@ struct fpu_state_config { /* FPU state configuration information */ extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg; -#endif /* _ASM_X86_FPU_H */ +#endif /* _ASM_X86_FPU_TYPES_H */ -- Gitee From 15863786224ab453150dfe6fa5ac07ea79a5731a Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:28 -0700 Subject: [PATCH 15/17] drm/amd/display: use ARCH_HAS_KERNEL_FPU_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit a28e4b672f042eb38d9b09f9d1fdf58c07052da4 upstream. Now that all previously-supported architectures select ARCH_HAS_KERNEL_FPU_SUPPORT, this code can depend on that symbol instead of the existing list of architectures. It can also take advantage of the common kernel-mode FPU API and method of adjusting CFLAGS. Link: https://lkml.kernel.org/r/20240329072441.591471-14-samuel.holland@sifive.com Signed-off-by: Samuel Holland Acked-by: Alex Deucher Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton [Ruidong: fix conflict] Signed-off-by: Ruidong Tian --- drivers/gpu/drm/amd/display/Kconfig | 2 +- .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 35 ++---------------- drivers/gpu/drm/amd/display/dc/dml/Makefile | 36 ++----------------- 3 files changed, 5 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 901d1961b739..5fcd4f778dc3 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -8,7 +8,7 @@ config DRM_AMD_DC depends on BROKEN || !CC_IS_CLANG || ARM64 || RISCV || SPARC64 || X86_64 select SND_HDA_COMPONENT if SND_HDA_CORE # !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752 - select DRM_AMD_DC_FP if (X86 || LOONGARCH || (PPC64 && ALTIVEC) || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG)) + select DRM_AMD_DC_FP if ARCH_HAS_KERNEL_FPU_SUPPORT && (!ARM64 || !CC_IS_CLANG) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c index 4ae4720535a5..e46f8ce41d87 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c @@ -26,16 +26,7 @@ #include "dc_trace.h" -#if defined(CONFIG_X86) -#include -#elif defined(CONFIG_PPC64) -#include -#include -#elif defined(CONFIG_ARM64) -#include -#elif defined(CONFIG_LOONGARCH) -#include -#endif +#include /** * DOC: DC FPU manipulation overview @@ -87,20 +78,9 @@ void dc_fpu_begin(const char *function_name, const int line) WARN_ON_ONCE(!in_task()); preempt_disable(); depth = __this_cpu_inc_return(fpu_recursion_depth); - if (depth == 1) { -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) + BUG_ON(!kernel_fpu_available()); kernel_fpu_begin(); -#elif defined(CONFIG_PPC64) - if (cpu_has_feature(CPU_FTR_VSX_COMP)) - enable_kernel_vsx(); - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) - enable_kernel_altivec(); - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) - enable_kernel_fp(); -#elif defined(CONFIG_ARM64) - kernel_neon_begin(); -#endif } TRACE_DCN_FPU(true, function_name, line, depth); @@ -122,18 +102,7 @@ void dc_fpu_end(const char *function_name, const int line) depth = __this_cpu_dec_return(fpu_recursion_depth); if (depth == 0) { -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) kernel_fpu_end(); -#elif defined(CONFIG_PPC64) - if (cpu_has_feature(CPU_FTR_VSX_COMP)) - disable_kernel_vsx(); - else if (cpu_has_feature(CPU_FTR_ALTIVEC_COMP)) - disable_kernel_altivec(); - else if (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE)) - disable_kernel_fp(); -#elif defined(CONFIG_ARM64) - kernel_neon_end(); -#endif } else { WARN_ON_ONCE(depth < 0); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 0ba9a7997d56..2d04de698dd6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -25,40 +25,8 @@ # It provides the general basic services required by other DAL # subcomponents. -ifdef CONFIG_X86 -dml_ccflags-$(CONFIG_CC_IS_GCC) := -mhard-float -dml_ccflags := $(dml_ccflags-y) -msse -endif - -ifdef CONFIG_PPC64 -dml_ccflags := -mhard-float -maltivec -endif - -ifdef CONFIG_ARM64 -dml_rcflags := -mgeneral-regs-only -endif - -ifdef CONFIG_LOONGARCH -dml_ccflags := -mfpu=64 -dml_rcflags := -msoft-float -endif - -ifdef CONFIG_CC_IS_GCC -ifneq ($(call gcc-min-version, 70100),y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -dml_ccflags += -mpreferred-stack-boundary=4 -else -dml_ccflags += -msse2 -endif -endif +dml_ccflags := $(CC_FLAGS_FPU) +dml_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -- Gitee From 13a47fdccc048430b7fce584c80ecef6eb1174d7 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:29 -0700 Subject: [PATCH 16/17] selftests/fpu: move FP code to a separate translation unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 9613736d852dce2376a848e6e9af091c422a947e upstream. This ensures no compiler-generated floating-point code can appear outside kernel_fpu_{begin,end}() sections, and some architectures enforce this separation. Link: https://lkml.kernel.org/r/20240329072441.591471-15-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- lib/Makefile | 3 ++- lib/test_fpu.h | 8 +++++++ lib/{test_fpu.c => test_fpu_glue.c} | 32 +------------------------ lib/test_fpu_impl.c | 37 +++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 lib/test_fpu.h rename lib/{test_fpu.c => test_fpu_glue.c} (71%) create mode 100644 lib/test_fpu_impl.c diff --git a/lib/Makefile b/lib/Makefile index 7ab8f09de8ec..048b0b1a6291 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -130,7 +130,8 @@ FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-st endif obj-$(CONFIG_TEST_FPU) += test_fpu.o -CFLAGS_test_fpu.o += $(FPU_CFLAGS) +test_fpu-y := test_fpu_glue.o test_fpu_impl.o +CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS) obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ diff --git a/lib/test_fpu.h b/lib/test_fpu.h new file mode 100644 index 000000000000..4459807084bc --- /dev/null +++ b/lib/test_fpu.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _LIB_TEST_FPU_H +#define _LIB_TEST_FPU_H + +int test_fpu(void); + +#endif diff --git a/lib/test_fpu.c b/lib/test_fpu_glue.c similarity index 71% rename from lib/test_fpu.c rename to lib/test_fpu_glue.c index e82db19fed84..85963d7be826 100644 --- a/lib/test_fpu.c +++ b/lib/test_fpu_glue.c @@ -19,37 +19,7 @@ #include #include -static int test_fpu(void) -{ - /* - * This sequence of operations tests that rounding mode is - * to nearest and that denormal numbers are supported. - * Volatile variables are used to avoid compiler optimizing - * the calculations away. - */ - volatile double a, b, c, d, e, f, g; - - a = 4.0; - b = 1e-15; - c = 1e-310; - - /* Sets precision flag */ - d = a + b; - - /* Result depends on rounding mode */ - e = a + b / 2; - - /* Denormal and very large values */ - f = b / c; - - /* Depends on denormal support */ - g = a + c * f; - - if (d > a && e > a && g > a) - return 0; - else - return -EINVAL; -} +#include "test_fpu.h" static int test_fpu_get(void *data, u64 *val) { diff --git a/lib/test_fpu_impl.c b/lib/test_fpu_impl.c new file mode 100644 index 000000000000..777894dbbe86 --- /dev/null +++ b/lib/test_fpu_impl.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include + +#include "test_fpu.h" + +int test_fpu(void) +{ + /* + * This sequence of operations tests that rounding mode is + * to nearest and that denormal numbers are supported. + * Volatile variables are used to avoid compiler optimizing + * the calculations away. + */ + volatile double a, b, c, d, e, f, g; + + a = 4.0; + b = 1e-15; + c = 1e-310; + + /* Sets precision flag */ + d = a + b; + + /* Result depends on rounding mode */ + e = a + b / 2; + + /* Denormal and very large values */ + f = b / c; + + /* Depends on denormal support */ + g = a + c * f; + + if (d > a && e > a && g > a) + return 0; + else + return -EINVAL; +} -- Gitee From 31f64901d802c61e193624c3e782fb2d2382c2ab Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Fri, 29 Mar 2024 00:18:30 -0700 Subject: [PATCH 17/17] selftests/fpu: allow building on other architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #22602 commit 790a4a3dd1039ee07e7bb5854dcc7ccfbb40c876 upstream. Now that ARCH_HAS_KERNEL_FPU_SUPPORT provides a common way to compile and run floating-point code, this test is no longer x86-specific. Link: https://lkml.kernel.org/r/20240329072441.591471-16-samuel.holland@sifive.com Signed-off-by: Samuel Holland Reviewed-by: Christoph Hellwig Acked-by: Christian König Cc: Alex Deucher Cc: Borislav Petkov (AMD) Cc: Catalin Marinas Cc: Dave Hansen Cc: Huacai Chen Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Palmer Dabbelt Cc: Russell King Cc: Thomas Gleixner Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Ruidong Tian --- lib/Kconfig.debug | 2 +- lib/Makefile | 24 ++---------------------- lib/test_fpu_glue.c | 5 ++++- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d86ee52da391..adc5c8fffa4a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2939,7 +2939,7 @@ config TEST_FREE_PAGES config TEST_FPU tristate "Test floating point operations in kernel space" - depends on X86 && !KCOV_INSTRUMENT_ALL + depends on ARCH_HAS_KERNEL_FPU_SUPPORT && !KCOV_INSTRUMENT_ALL help Enable this option to add /sys/kernel/debug/selftest_helpers/test_fpu which will trigger a sequence of floating point operations. This is used diff --git a/lib/Makefile b/lib/Makefile index 048b0b1a6291..759a8a870980 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -107,31 +107,11 @@ obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE) obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o -# -# CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns -# off the generation of FPU/SSE* instructions for kernel proper but FPU_FLAGS -# get appended last to CFLAGS and thus override those previous compiler options. -# -FPU_CFLAGS := -msse -msse2 -ifdef CONFIG_CC_IS_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -# See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 -# -# The "-msse" in the first argument is there so that the -# -mpreferred-stack-boundary=3 build error: -# -# -mpreferred-stack-boundary=3 is not between 4 and 12 -# -# can be triggered. Otherwise gcc doesn't complain. -FPU_CFLAGS += -mhard-float -FPU_CFLAGS += $(call cc-option,-msse -mpreferred-stack-boundary=3,-mpreferred-stack-boundary=4) -endif obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o -CFLAGS_test_fpu_impl.o += $(FPU_CFLAGS) +CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU) +CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU) obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ diff --git a/lib/test_fpu_glue.c b/lib/test_fpu_glue.c index 85963d7be826..eef282a2715f 100644 --- a/lib/test_fpu_glue.c +++ b/lib/test_fpu_glue.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "test_fpu.h" @@ -38,6 +38,9 @@ static struct dentry *selftest_dir; static int __init test_fpu_init(void) { + if (!kernel_fpu_available()) + return -EINVAL; + selftest_dir = debugfs_create_dir("selftest_helpers", NULL); if (!selftest_dir) return -ENOMEM; -- Gitee