From 233b1c4305fc8a653f08bbf92d93c70f5995d1f0 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 15 Jan 2021 13:46:04 +0800 Subject: [PATCH 001/241] initramfs: Provide a common initrd reserve function Some architectures(eg, ARM and riscv) have similar logic to check and reserve the memory of initrd, let's provide a common function reserve_initrd_mem() to reduce duplicated code. Change-Id: Ia207d4509af97c3673bbcbb1252a91bc1d202af0 Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt Signed-off-by: Huacai Chen --- include/linux/initrd.h | 6 ++++++ init/initramfs.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/include/linux/initrd.h b/include/linux/initrd.h index 8db6f8c8030b..07af5ebc1eaa 100644 --- a/include/linux/initrd.h +++ b/include/linux/initrd.h @@ -15,6 +15,12 @@ extern int initrd_below_start_ok; extern unsigned long initrd_start, initrd_end; extern void free_initrd_mem(unsigned long, unsigned long); +#ifdef CONFIG_BLK_DEV_INITRD +extern void __init reserve_initrd_mem(void); +#else +static inline void __init reserve_initrd_mem(void) {} +#endif + extern phys_addr_t phys_initrd_start; extern unsigned long phys_initrd_size; diff --git a/init/initramfs.c b/init/initramfs.c index 55b74d7e5260..f75c89e9d602 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -535,6 +535,51 @@ extern unsigned long __initramfs_size; #include #include +void __init reserve_initrd_mem(void) +{ + phys_addr_t start; + unsigned long size; + + /* Ignore the virtul address computed during device tree parsing */ + initrd_start = initrd_end = 0; + + if (!phys_initrd_size) + return; + /* + * Round the memory region to page boundaries as per free_initrd_mem() + * This allows us to detect whether the pages overlapping the initrd + * are in use, but more importantly, reserves the entire set of pages + * as we don't want these pages allocated for other purposes. + */ + start = round_down(phys_initrd_start, PAGE_SIZE); + size = phys_initrd_size + (phys_initrd_start - start); + size = round_up(size, PAGE_SIZE); + + if (!memblock_is_region_memory(start, size)) { + pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region", + (u64)start, size); + goto disable; + } + + if (memblock_is_region_reserved(start, size)) { + pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n", + (u64)start, size); + goto disable; + } + + memblock_reserve(start, size); + /* Now convert initrd to virtual addresses */ + initrd_start = (unsigned long)__va(phys_initrd_start); + initrd_end = initrd_start + phys_initrd_size; + initrd_below_start_ok = 1; + + return; +disable: + pr_cont(" - disabling initrd\n"); + initrd_start = 0; + initrd_end = 0; +} + void __weak __init free_initrd_mem(unsigned long start, unsigned long end) { #ifdef CONFIG_ARCH_KEEP_MEMBLOCK -- Gitee From fe865bbd065e0b26381071daa779f101b64b80b7 Mon Sep 17 00:00:00 2001 From: Jun Yi Date: Fri, 12 Mar 2021 09:50:17 +0800 Subject: [PATCH 002/241] fs: Move @f_count to different cacheline with @f_mode get_file_rcu_many, which is called by __fget_files, has used atomic_try_cmpxchg now and it can reduce the access number of the global variable to improve the performance of atomic instruction compared with atomic_cmpxchg. __fget_files does check the @f_mode with mask variable and will do some atomic operations on @f_count, but both are on the same cacheline. Many CPU cores do file access and it will cause much conflicts on @f_count. If we could make the two members into different cachelines, it shall relax the siutations. We have tested this on ARM64 and X86, the result is as follows: Syscall of unixbench has been run on Huawei Kunpeng920 with this patch: 24 x System Call Overhead 1 System Call Overhead 3160841.4 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 3160841.4 2107.2 ======== System Benchmarks Index Score (Partial Only) 2107.2 Without this patch: 24 x System Call Overhead 1 System Call Overhead 2222456.0 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 2222456.0 1481.6 ======== System Benchmarks Index Score (Partial Only) 1481.6 And on Intel 6248 platform with this patch: 40 CPUs in system; running 24 parallel copies of tests System Call Overhead 4288509.1 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 4288509.1 2859.0 ======== System Benchmarks Index Score (Partial Only) 2859.0 Without this patch: 40 CPUs in system; running 24 parallel copies of tests System Call Overhead 3666313.0 lps (10.0 s, 1 samples) System Benchmarks Partial Index BASELINE RESULT INDEX System Call Overhead 15000.0 3666313.0 2444.2 ======== System Benchmarks Index Score (Partial Only) 2444.2 Change-Id: If983af9f65a801880bc2ae1a899e46cb778a7e5c Cc: Will Deacon Cc: Mark Rutland Cc: Peter Zijlstra Cc: Alexander Viro Cc: Boqun Feng Signed-off-by: Yuqi Jin Signed-off-by: Shaokun Zhang Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index a0c79a5de2a8..fc2921b61aa8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -931,7 +931,6 @@ struct file { */ spinlock_t f_lock; enum rw_hint f_write_hint; - atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; struct mutex f_pos_lock; @@ -939,6 +938,7 @@ struct file { struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; + atomic_long_t f_count; u64 f_version; #ifdef CONFIG_SECURITY -- Gitee From 52e84d55f09e65e739b5250915746f302ba9db04 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 003/241] ACPI: Add LoongArch support for ACPI_PROCESSOR/ACPI_NUMA We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add its support for ACPI_PROCESSOR/ACPI_NUMA. Change-Id: Ic72fe9d8dfd44ab26489568817c0e375a051e24f Signed-off-by: Huacai Chen --- drivers/acpi/Kconfig | 4 ++-- drivers/acpi/numa/Kconfig | 2 +- drivers/acpi/numa/srat.c | 2 +- include/linux/acpi.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index b5ea34c340cc..b9753969b0db 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -272,9 +272,9 @@ config ACPI_CPPC_LIB config ACPI_PROCESSOR tristate "Processor" - depends on X86 || IA64 || ARM64 + depends on X86 || IA64 || ARM64 || LOONGARCH select ACPI_PROCESSOR_IDLE - select ACPI_CPU_FREQ_PSS if X86 || IA64 + select ACPI_CPU_FREQ_PSS if X86 || IA64 || LOONGARCH default y help This driver adds support for the ACPI Processor package. It is required diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig index fcf2e556d69d..39b1f34c21df 100644 --- a/drivers/acpi/numa/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -2,7 +2,7 @@ config ACPI_NUMA bool "NUMA support" depends on NUMA - depends on (X86 || IA64 || ARM64) + depends on (X86 || IA64 || ARM64 || LOONGARCH) default y if IA64 || ARM64 config ACPI_HMAT diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 6021a1013442..b8795fc49097 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -206,7 +206,7 @@ int __init srat_disabled(void) return acpi_numa < 0; } -#if defined(CONFIG_X86) || defined(CONFIG_ARM64) +#if defined(CONFIG_X86) || defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) /* * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for * I/O localities since SRAT does not list them. I/O localities are diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9c184dbceba4..fb86b13b54a6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -248,7 +248,7 @@ void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void -- Gitee From 17c5cb66d90ac05398bd7033a6c581fa920b0a04 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 25 Apr 2021 15:31:04 +0800 Subject: [PATCH 004/241] ACPICA: MADT: Add LoongArch APICs support Change-Id: Id006deb49b45b7a03ec77430cb961e511f2c71a8 Signed-off-by: Huacai Chen --- drivers/acpi/tables.c | 10 ++++ include/acpi/actbl2.h | 125 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 9d581045acff..b36d9a22a07a 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -207,6 +207,16 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header) } break; + case ACPI_MADT_TYPE_CORE_PIC: + { + struct acpi_madt_core_pic *p = + (struct acpi_madt_core_pic *)header; + pr_debug("CORE PIC (processor_id[0x%02x] core_id[0x%02x] %s)\n", + p->processor_id, p->core_id, + (p->flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled"); + } + break; + default: pr_warn("Found unsupported MADT entry (type = 0x%x)\n", header->type); diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index ec66779cb193..79a97a9c4f69 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -517,7 +517,14 @@ enum acpi_madt_type { ACPI_MADT_TYPE_GENERIC_MSI_FRAME = 13, ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR = 14, ACPI_MADT_TYPE_GENERIC_TRANSLATOR = 15, - ACPI_MADT_TYPE_RESERVED = 16 /* 16 and greater are reserved */ + ACPI_MADT_TYPE_CORE_PIC = 17, + ACPI_MADT_TYPE_LIO_PIC = 18, + ACPI_MADT_TYPE_HT_PIC = 19, + ACPI_MADT_TYPE_EIO_PIC = 20, + ACPI_MADT_TYPE_MSI_PIC = 21, + ACPI_MADT_TYPE_BIO_PIC = 22, + ACPI_MADT_TYPE_LPC_PIC = 23, + ACPI_MADT_TYPE_RESERVED = 24 /* 24 and greater are reserved */ }; /* @@ -724,6 +731,122 @@ struct acpi_madt_generic_translator { u32 reserved2; }; +/* Values for Version field above */ + +enum acpi_madt_core_pic_version { + ACPI_MADT_CORE_PIC_VERSION_NONE = 0, + ACPI_MADT_CORE_PIC_VERSION_V1 = 1, + ACPI_MADT_CORE_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_lio_pic_version { + ACPI_MADT_LIO_PIC_VERSION_NONE = 0, + ACPI_MADT_LIO_PIC_VERSION_V1 = 1, + ACPI_MADT_LIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_eio_pic_version { + ACPI_MADT_EIO_PIC_VERSION_NONE = 0, + ACPI_MADT_EIO_PIC_VERSION_V1 = 1, + ACPI_MADT_EIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_ht_pic_version { + ACPI_MADT_HT_PIC_VERSION_NONE = 0, + ACPI_MADT_HT_PIC_VERSION_V1 = 1, + ACPI_MADT_HT_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_bio_pic_version { + ACPI_MADT_BIO_PIC_VERSION_NONE = 0, + ACPI_MADT_BIO_PIC_VERSION_V1 = 1, + ACPI_MADT_BIO_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_msi_pic_version { + ACPI_MADT_MSI_PIC_VERSION_NONE = 0, + ACPI_MADT_MSI_PIC_VERSION_V1 = 1, + ACPI_MADT_MSI_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +enum acpi_madt_lpc_pic_version { + ACPI_MADT_LPC_PIC_VERSION_NONE = 0, + ACPI_MADT_LPC_PIC_VERSION_V1 = 1, + ACPI_MADT_LPC_PIC_VERSION_RESERVED = 2 /* 2 and greater are reserved */ +}; + +/* Core Interrupt Controller */ + +struct acpi_madt_core_pic { + struct acpi_subtable_header header; + u8 version; + u32 processor_id; + u32 core_id; + u32 flags; +}; + +/* Legacy I/O Interrupt Controller */ + +struct acpi_madt_lio_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u8 cascade[2]; + u32 cascade_map[2]; +}; + +/* Extend I/O Interrupt Controller */ + +struct acpi_madt_eio_pic { + struct acpi_subtable_header header; + u8 version; + u8 cascade; + u8 node; + u64 node_map; +}; + +/* HT Interrupt Controller */ + +struct acpi_madt_ht_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u8 cascade[8]; +}; + +/* Bridge I/O Interrupt Controller */ + +struct acpi_madt_bio_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u16 id; + u16 gsi_base; +}; + +/* MSI Interrupt Controller */ + +struct acpi_madt_msi_pic { + struct acpi_subtable_header header; + u8 version; + u64 msg_address; + u32 start; + u32 count; +}; + +/* LPC Interrupt Controller */ + +struct acpi_madt_lpc_pic { + struct acpi_subtable_header header; + u8 version; + u64 address; + u16 size; + u8 cascade; +}; + /* * Common flags fields for MADT subtables */ -- Gitee From 2422269be9acaef98934d47d18e9eb9df93ae982 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Apr 2021 14:11:06 +0800 Subject: [PATCH 005/241] ACPICA: Events: Support fixed pcie wake event Some chipsets support fixed pcie wake event which is defined in the PM1 block, such as LS7A1000 of Loongson company, so we add code to handle it. Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen Change-Id: I342b44a6494f3627eb1cb5d83c2f63bc1e65d2c4 --- drivers/acpi/acpica/evevent.c | 12 ++++++++---- drivers/acpi/acpica/hwsleep.c | 12 ++++++++++++ drivers/acpi/acpica/utglobal.c | 4 ++++ include/acpi/actypes.h | 3 ++- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c index 9efca54c51ac..50c0e2f1e758 100644 --- a/drivers/acpi/acpica/evevent.c +++ b/drivers/acpi/acpica/evevent.c @@ -140,9 +140,8 @@ static acpi_status acpi_ev_fixed_event_initialize(void) if (acpi_gbl_fixed_event_info[i].enable_register_id != 0xFF) { status = - acpi_write_bit_register(acpi_gbl_fixed_event_info - [i].enable_register_id, - ACPI_DISABLE_EVENT); + acpi_write_bit_register(acpi_gbl_fixed_event_info[i].enable_register_id, + (i == ACPI_EVENT_PCIE_WAKE) ? ACPI_ENABLE_EVENT : ACPI_DISABLE_EVENT); if (ACPI_FAILURE(status)) { return (status); } @@ -185,6 +184,11 @@ u32 acpi_ev_fixed_event_detect(void) return (int_status); } + if (fixed_enable & ACPI_BITMASK_PCIEXP_WAKE_DISABLE) + fixed_enable &= ~ACPI_BITMASK_PCIEXP_WAKE_DISABLE; + else + fixed_enable |= ACPI_BITMASK_PCIEXP_WAKE_DISABLE; + ACPI_DEBUG_PRINT((ACPI_DB_INTERRUPTS, "Fixed Event Block: Enable %08X Status %08X\n", fixed_enable, fixed_status)); @@ -250,7 +254,7 @@ static u32 acpi_ev_fixed_event_dispatch(u32 event) if (!acpi_gbl_fixed_event_handlers[event].handler) { (void)acpi_write_bit_register(acpi_gbl_fixed_event_info[event]. enable_register_id, - ACPI_DISABLE_EVENT); + event == ACPI_EVENT_PCIE_WAKE ? ACPI_ENABLE_EVENT : ACPI_DISABLE_EVENT); ACPI_ERROR((AE_INFO, "No installed handler for fixed event - %s (%u), disabling", diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c index 6a20bb5059c1..e89895622a72 100644 --- a/drivers/acpi/acpica/hwsleep.c +++ b/drivers/acpi/acpica/hwsleep.c @@ -311,6 +311,18 @@ acpi_status acpi_hw_legacy_wake(u8 sleep_state) [ACPI_EVENT_SLEEP_BUTTON]. status_register_id, ACPI_CLEAR_STATUS); + /* Enable pcie wake event if support */ + if ((acpi_gbl_FADT.flags & ACPI_FADT_PCI_EXPRESS_WAKE)) { + (void) + acpi_write_bit_register(acpi_gbl_fixed_event_info + [ACPI_EVENT_PCIE_WAKE]. + enable_register_id, ACPI_DISABLE_EVENT); + (void) + acpi_write_bit_register(acpi_gbl_fixed_event_info + [ACPI_EVENT_PCIE_WAKE]. + status_register_id, ACPI_CLEAR_STATUS); + } + acpi_hw_execute_sleep_method(METHOD_PATHNAME__SST, ACPI_SST_WORKING); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c index e6dcbdc3fc6e..0dc81b85112c 100644 --- a/drivers/acpi/acpica/utglobal.c +++ b/drivers/acpi/acpica/utglobal.c @@ -186,6 +186,10 @@ struct acpi_fixed_event_info acpi_gbl_fixed_event_info[ACPI_NUM_FIXED_EVENTS] = ACPI_BITREG_RT_CLOCK_ENABLE, ACPI_BITMASK_RT_CLOCK_STATUS, ACPI_BITMASK_RT_CLOCK_ENABLE}, + /* ACPI_EVENT_PCIE_WAKE */ {ACPI_BITREG_PCIEXP_WAKE_STATUS, + ACPI_BITREG_PCIEXP_WAKE_DISABLE, + ACPI_BITMASK_PCIEXP_WAKE_STATUS, + ACPI_BITMASK_PCIEXP_WAKE_DISABLE}, }; #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 7334037624c5..3fb9aee1f716 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -720,7 +720,8 @@ typedef u32 acpi_event_type; #define ACPI_EVENT_POWER_BUTTON 2 #define ACPI_EVENT_SLEEP_BUTTON 3 #define ACPI_EVENT_RTC 4 -#define ACPI_EVENT_MAX 4 +#define ACPI_EVENT_PCIE_WAKE 5 +#define ACPI_EVENT_MAX 5 #define ACPI_NUM_FIXED_EVENTS ACPI_EVENT_MAX + 1 /* -- Gitee From 1c980d9ec3a549abe773e31f330543325f3818bb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 3 Jan 2023 12:02:42 +0800 Subject: [PATCH 006/241] ACPI: Define ACPI_MACHINE_WIDTH to 64 for LoongArch Add 64 bit LoongArch architecture by defining ACPI_MACHINE_WIDTH to 64. Useful for acpica tools and incorporating ACPICA into the Firmware Test Suite. Change-Id: Ic4b90f3c3beb297bb6a2187829bb626a6e456b96 Signed-off-by: Huacai Chen --- include/acpi/platform/aclinux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 72f52a1342a0..49f5560505cb 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -194,7 +194,7 @@ #if defined(__ia64__) || (defined(__x86_64__) && !defined(__ILP32__)) ||\ defined(__aarch64__) || defined(__PPC64__) ||\ - defined(__s390x__) ||\ + defined(__s390x__) || defined(__loongarch__) ||\ (defined(__riscv) && (defined(__LP64__) || defined(_LP64))) #define ACPI_MACHINE_WIDTH 64 #define COMPILER_DEPENDENT_INT64 long -- Gitee From 361b27e89ba923228019753ae9ab1770af3520c0 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 007/241] serial: 8250_pnp: Support configurable clock frequency Loongson boards need configurable clock frequency for serial ports. Change-Id: I605edc9eec964e0e414a771c92b554162398615a Signed-off-by: Huacai Chen --- drivers/tty/serial/8250/8250_pnp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index de90d681b64c..e51421f97281 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -474,7 +474,9 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) uart.port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF; if (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE) uart.port.flags |= UPF_SHARE_IRQ; - uart.port.uartclk = 1843200; + if (device_property_read_u32(&dev->dev, "clock-frequency", &uart.port.uartclk)) { + uart.port.uartclk = 1843200; + } uart.port.dev = &dev->dev; line = serial8250_register_8250_port(&uart); -- Gitee From f7cad7c5ad08253686d9da4cda4c2c803a815c3c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 26 May 2023 12:50:13 +0800 Subject: [PATCH 008/241] genirq/msi, platform-msi: Adjust return value of msi_domain_prepare_irqs() Adjust the return value semanteme of msi_domain_prepare_irqs(), which allows us to modify the input nvec by overriding the msi_domain_ops:: msi_prepare(). This is necessary for the later patch. Before: 0 on success, others on error. After: = 0: Success; > 0: The modified nvec; < 0: Error code. Callers are also updated. Change-Id: I9c43dff38e4ce6b737bf77baeb1b4fcb1b73e37b Signed-off-by: Huacai Chen --- drivers/base/platform-msi.c | 2 +- kernel/irq/msi.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index c4a17e5edf8b..1dc4c59899d4 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -344,7 +344,7 @@ __platform_msi_create_device_domain(struct device *dev, goto free_priv; err = msi_domain_prepare_irqs(domain->parent, dev, nvec, &data->arg); - if (err) + if (err < 0) goto free_domain; return domain; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 4457f3e966d0..548cee01144c 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -301,6 +301,12 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, return domain; } +/* + * Return Val: + * = 0: Success; + * > 0: The modified nvec; + * < 0: Error code. + */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { @@ -407,8 +413,10 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, bool can_reserve; ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); - if (ret) + if (ret < 0) return ret; + if (ret > 0) + nvec = ret; for_each_msi_entry(desc, dev) { ops->set_desc(&arg, desc); -- Gitee From 5b4f612b839595a497f46cec0544979859fbaa9b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 009/241] irqchip: Adjust Kconfig for Loongson We are preparing to add new Loongson (based on LoongArch, not MIPS) support. HTVEC will be used by more Loongson processors, so we adjust its description. HTPIC is bound to MIPS-based Loongson, so we add a MIPS dependency, Change-Id: Ie032a299ccbf7c768ca0fc50bad25a77ab2ff5e2 Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 3c24bf45263c..58a321adcb1f 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -556,7 +556,7 @@ config LOONGSON_LIOINTC config LOONGSON_HTPIC bool "Loongson3 HyperTransport PIC Controller" - depends on MACH_LOONGSON64 + depends on (MACH_LOONGSON64 && MIPS) default y select IRQ_DOMAIN select GENERIC_IRQ_CHIP @@ -564,12 +564,12 @@ config LOONGSON_HTPIC Support for the Loongson-3 HyperTransport PIC Controller. config LOONGSON_HTVEC - bool "Loongson3 HyperTransport Interrupt Vector Controller" + bool "Loongson HyperTransport Interrupt Vector Controller" depends on MACH_LOONGSON64 default MACH_LOONGSON64 select IRQ_DOMAIN_HIERARCHY help - Support for the Loongson3 HyperTransport Interrupt Vector Controller. + Support for the Loongson HyperTransport Interrupt Vector Controller. config LOONGSON_PCH_PIC bool "Loongson PCH PIC Controller" -- Gitee From 866e6ef1d0ca1108eeaf5efa8d2bf50250ee436b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 010/241] irqchip/loongson-pch-pic: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: Ie5df0522bc76313390647b4ef96497169409b5de Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-pch-pic.c | 158 ++++++++++++++++++++----- 1 file changed, 131 insertions(+), 27 deletions(-) diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index a4eb8a2181c7..3d9b1c53ebd9 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv + * Huacai Chen * Loongson PCH PIC support */ @@ -15,6 +17,7 @@ #include #include #include +#include /* Registers */ #define PCH_PIC_MASK 0x20 @@ -32,14 +35,23 @@ #define PIC_COUNT (PIC_COUNT_PER_REG * PIC_REG_COUNT) #define PIC_REG_IDX(irq_id) ((irq_id) / PIC_COUNT_PER_REG) #define PIC_REG_BIT(irq_id) ((irq_id) % PIC_COUNT_PER_REG) +#define PCH_PIC_SIZE 0x400 + +static int nr_pics; struct pch_pic { void __iomem *base; struct irq_domain *pic_domain; + struct fwnode_handle *domain_handle; u32 ht_vec_base; raw_spinlock_t pic_lock; + u32 saved_vec_en[PIC_REG_COUNT]; + u32 saved_vec_pol[PIC_REG_COUNT]; + u32 saved_vec_edge[PIC_REG_COUNT]; }; +static struct pch_pic *pch_pic_priv[MAX_IO_PICS]; + static void pch_pic_bitset(struct pch_pic *priv, int offset, int bit) { u32 reg; @@ -137,6 +149,7 @@ static struct irq_chip pch_pic_irq_chip = { .irq_ack = pch_pic_ack_irq, .irq_set_affinity = irq_chip_set_affinity_parent, .irq_set_type = pch_pic_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static int pch_pic_alloc(struct irq_domain *domain, unsigned int virq, @@ -180,7 +193,7 @@ static void pch_pic_reset(struct pch_pic *priv) int i; for (i = 0; i < PIC_COUNT; i++) { - /* Write vectored ID */ + /* Write vector ID */ writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i)); /* Hardcode route to HT0 Lo */ writeb(1, priv->base + PCH_INT_ROUTE(i)); @@ -198,50 +211,79 @@ static void pch_pic_reset(struct pch_pic *priv) } } -static int pch_pic_of_init(struct device_node *node, - struct device_node *parent) +static int pch_pic_suspend(void) +{ + int i, j; + + for (i = 0; i < nr_pics; i++) { + for (j = 0; j < PIC_REG_COUNT; j++) { + pch_pic_priv[i]->saved_vec_pol[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_POL + 4 * j); + pch_pic_priv[i]->saved_vec_edge[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_EDGE + 4 * j); + pch_pic_priv[i]->saved_vec_en[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_MASK + 4 * j); + } + } + + return 0; +} + +static void pch_pic_resume(void) +{ + int i, j; + + for (i = 0; i < nr_pics; i++) { + pch_pic_reset(pch_pic_priv[i]); + for (j = 0; j < PIC_REG_COUNT; j++) { + writel(pch_pic_priv[i]->saved_vec_pol[j], + pch_pic_priv[i]->base + PCH_PIC_POL + 4 * j); + writel(pch_pic_priv[i]->saved_vec_edge[j], + pch_pic_priv[i]->base + PCH_PIC_EDGE + 4 * j); + writel(pch_pic_priv[i]->saved_vec_en[j], + pch_pic_priv[i]->base + PCH_PIC_MASK + 4 * j); + } + } +} + +static struct syscore_ops pch_pic_syscore_ops = { + .suspend = pch_pic_suspend, + .resume = pch_pic_resume, +}; + +static int pch_pic_init(phys_addr_t addr, unsigned long size, int vec_base, + struct irq_domain *parent_domain, struct fwnode_handle *domain_handle) { + int vec_count; struct pch_pic *priv; - struct irq_domain *parent_domain; - int err; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; raw_spin_lock_init(&priv->pic_lock); - priv->base = of_iomap(node, 0); - if (!priv->base) { - err = -ENOMEM; + priv->base = ioremap(addr, size); + if (!priv->base) goto free_priv; - } - parent_domain = irq_find_host(parent); - if (!parent_domain) { - pr_err("Failed to find the parent domain\n"); - err = -ENXIO; - goto iounmap_base; - } + priv->domain_handle = domain_handle; - if (of_property_read_u32(node, "loongson,pic-base-vec", - &priv->ht_vec_base)) { - pr_err("Failed to determine pic-base-vec\n"); - err = -EINVAL; - goto iounmap_base; - } + priv->ht_vec_base = vec_base; + vec_count = ((readq(priv->base) >> 48) & 0xff) + 1; priv->pic_domain = irq_domain_create_hierarchy(parent_domain, 0, - PIC_COUNT, - of_node_to_fwnode(node), - &pch_pic_domain_ops, - priv); + vec_count, priv->domain_handle, + &pch_pic_domain_ops, priv); + if (!priv->pic_domain) { pr_err("Failed to create IRQ domain\n"); - err = -ENOMEM; goto iounmap_base; } pch_pic_reset(priv); + pch_pic_priv[nr_pics++] = priv; + + register_syscore_ops(&pch_pic_syscore_ops); return 0; @@ -250,7 +292,69 @@ static int pch_pic_of_init(struct device_node *node, free_priv: kfree(priv); - return err; + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int pch_pic_of_init(struct device_node *node, + struct device_node *parent) +{ + int err, vec_base; + struct resource res; + struct irq_domain *parent_domain; + + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + pr_err("Failed to find the parent domain\n"); + return -ENXIO; + } + + if (of_property_read_u32(node, "loongson,pic-base-vec", &vec_base)) { + pr_err("Failed to determine pic-base-vec\n"); + return -EINVAL; + } + + err = pch_pic_init(res.start, resource_size(&res), vec_base, + parent_domain, of_node_to_fwnode(node)); + if (err < 0) + return err; + + return 0; } IRQCHIP_DECLARE(pch_pic, "loongson,pch-pic-1.0", pch_pic_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *pch_pic_acpi_init(struct irq_domain *parent, + struct acpi_madt_bio_pic *acpi_pchpic) +{ + int ret, vec_base; + struct fwnode_handle *domain_handle; + + if (!acpi_pchpic) + return NULL; + + vec_base = acpi_pchpic->gsi_base - GSI_MIN_PCH_IRQ; + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchpic); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return NULL; + } + + ret = pch_pic_init(acpi_pchpic->address, acpi_pchpic->size, + vec_base, parent, domain_handle); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_ANY); +} + +#endif -- Gitee From 415a85bbbe4c8852f50db833256d3c815d5adfd2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 011/241] irqchip/loongson-pch-msi: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: I8c97e03b4500d869bf8d59da9d23b0648492e02f Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-pch-msi.c | 130 +++++++++++++++++-------- 1 file changed, 88 insertions(+), 42 deletions(-) diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index 32562b7e681b..d148ea8e2d68 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv + * Huacai Chen * Loongson PCH MSI support */ @@ -15,14 +17,19 @@ #include #include +static int nr_pics; + struct pch_msi_data { struct mutex msi_map_lock; phys_addr_t doorbell; u32 irq_first; /* The vector number that MSIs starts */ u32 num_irqs; /* The number of vectors for MSIs */ unsigned long *msi_map; + struct fwnode_handle *domain_handle; }; +static struct pch_msi_data *pch_msi_priv[MAX_IO_PICS]; + static void pch_msi_mask_msi_irq(struct irq_data *d) { pci_msi_mask_irq(d); @@ -154,12 +161,14 @@ static const struct irq_domain_ops pch_msi_middle_domain_ops = { }; static int pch_msi_init_domains(struct pch_msi_data *priv, - struct device_node *node, - struct irq_domain *parent) + struct irq_domain *parent, + struct fwnode_handle *domain_handle) { struct irq_domain *middle_domain, *msi_domain; - middle_domain = irq_domain_create_linear(of_node_to_fwnode(node), + priv->domain_handle = domain_handle; + + middle_domain = irq_domain_create_linear(priv->domain_handle, priv->num_irqs, &pch_msi_middle_domain_ops, priv); @@ -171,7 +180,7 @@ static int pch_msi_init_domains(struct pch_msi_data *priv, middle_domain->parent = parent; irq_domain_update_bus_token(middle_domain, DOMAIN_BUS_NEXUS); - msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node), + msi_domain = pci_msi_create_irq_domain(priv->domain_handle, &pch_msi_domain_info, middle_domain); if (!msi_domain) { @@ -183,19 +192,11 @@ static int pch_msi_init_domains(struct pch_msi_data *priv, return 0; } -static int pch_msi_init(struct device_node *node, - struct device_node *parent) +static int pch_msi_init(phys_addr_t msg_address, int irq_base, int irq_count, + struct irq_domain *parent_domain, struct fwnode_handle *domain_handle) { - struct pch_msi_data *priv; - struct irq_domain *parent_domain; - struct resource res; int ret; - - parent_domain = irq_find_host(parent); - if (!parent_domain) { - pr_err("Failed to find the parent domain\n"); - return -ENXIO; - } + struct pch_msi_data *priv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) @@ -203,48 +204,93 @@ static int pch_msi_init(struct device_node *node, mutex_init(&priv->msi_map_lock); - ret = of_address_to_resource(node, 0, &res); - if (ret) { - pr_err("Failed to allocate resource\n"); - goto err_priv; - } - - priv->doorbell = res.start; - - if (of_property_read_u32(node, "loongson,msi-base-vec", - &priv->irq_first)) { - pr_err("Unable to parse MSI vec base\n"); - ret = -EINVAL; - goto err_priv; - } - - if (of_property_read_u32(node, "loongson,msi-num-vecs", - &priv->num_irqs)) { - pr_err("Unable to parse MSI vec number\n"); - ret = -EINVAL; - goto err_priv; - } + priv->doorbell = msg_address; + priv->irq_first = irq_base; + priv->num_irqs = irq_count; priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL); - if (!priv->msi_map) { - ret = -ENOMEM; + if (!priv->msi_map) goto err_priv; - } pr_debug("Registering %d MSIs, starting at %d\n", priv->num_irqs, priv->irq_first); - ret = pch_msi_init_domains(priv, node, parent_domain); + ret = pch_msi_init_domains(priv, parent_domain, domain_handle); if (ret) goto err_map; + pch_msi_priv[nr_pics++] = priv; + return 0; err_map: kfree(priv->msi_map); err_priv: kfree(priv); - return ret; + + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int pch_msi_of_init(struct device_node *node, struct device_node *parent) +{ + int err; + int irq_base, irq_count; + struct resource res; + struct irq_domain *parent_domain; + + parent_domain = irq_find_host(parent); + if (!parent_domain) { + pr_err("Failed to find the parent domain\n"); + return -ENXIO; + } + + if (of_address_to_resource(node, 0, &res)) { + pr_err("Failed to allocate resource\n"); + return -EINVAL; + } + + if (of_property_read_u32(node, "loongson,msi-base-vec", &irq_base)) { + pr_err("Unable to parse MSI vec base\n"); + return -EINVAL; + } + + if (of_property_read_u32(node, "loongson,msi-num-vecs", &irq_count)) { + pr_err("Unable to parse MSI vec number\n"); + return -EINVAL; + } + + err = pch_msi_init(res.start, irq_base, irq_count, parent_domain, of_node_to_fwnode(node)); + if (err < 0) + return err; + + return 0; +} + +IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *pch_msi_acpi_init(struct irq_domain *parent, + struct acpi_madt_msi_pic *acpi_pchmsi) +{ + int ret; + struct fwnode_handle *domain_handle; + + if (!acpi_pchmsi) + return NULL; + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchmsi); + + ret = pch_msi_init(acpi_pchmsi->msg_address, acpi_pchmsi->start, + acpi_pchmsi->count, parent, domain_handle); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_PCI_MSI); } -IRQCHIP_DECLARE(pch_msi, "loongson,pch-msi-1.0", pch_msi_init); +#endif -- Gitee From cc295ae034b83bedb04ec9b51caa2d44e49c6ce8 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 012/241] irqchip/loongson-htvec: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: If7cfd7706375fa620c14f3560d71e88b5d48b6ee Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-htvec.c | 148 +++++++++++++++++++++------ 1 file changed, 114 insertions(+), 34 deletions(-) diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c index 6392aafb9a63..97c2584a9d23 100644 --- a/drivers/irqchip/irq-loongson-htvec.c +++ b/drivers/irqchip/irq-loongson-htvec.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv + * Huacai Chen * Loongson HyperTransport Interrupt Vector support */ @@ -16,22 +18,27 @@ #include #include #include +#include /* Registers */ #define HTVEC_EN_OFF 0x20 #define HTVEC_MAX_PARENT_IRQ 8 - #define VEC_COUNT_PER_REG 32 #define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG) #define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG) +#define HTVEC_SIZE 0x400 struct htvec { int num_parents; void __iomem *base; struct irq_domain *htvec_domain; raw_spinlock_t htvec_lock; + struct fwnode_handle *domain_handle; + u32 saved_vec_en[HTVEC_MAX_PARENT_IRQ]; }; +static struct htvec *htvec_priv; + static void htvec_irq_dispatch(struct irq_desc *desc) { int i; @@ -155,64 +162,137 @@ static void htvec_reset(struct htvec *priv) } } -static int htvec_of_init(struct device_node *node, - struct device_node *parent) +static int htvec_suspend(void) +{ + int i; + for (i = 0; i < htvec_priv->num_parents; i++) { + htvec_priv->saved_vec_en[i] = readl(htvec_priv->base + HTVEC_EN_OFF + 4 * i); + } + return 0; +} + +static void htvec_resume(void) +{ + int i; + for (i = 0; i < htvec_priv->num_parents; i++) { + writel(htvec_priv->saved_vec_en[i], htvec_priv->base + HTVEC_EN_OFF + 4 * i); + } +} + +static struct syscore_ops htvec_syscore_ops = { + .suspend = htvec_suspend, + .resume = htvec_resume, +}; + +static int htvec_init(phys_addr_t addr, unsigned long size, + int num_parents, int parent_irq[], struct fwnode_handle *domain_handle) { + int i; struct htvec *priv; - int err, parent_irq[8], i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + priv->num_parents = num_parents; + priv->base = ioremap(addr, size); + priv->domain_handle = domain_handle; raw_spin_lock_init(&priv->htvec_lock); - priv->base = of_iomap(node, 0); - if (!priv->base) { - err = -ENOMEM; - goto free_priv; - } - /* Interrupt may come from any of the 8 interrupt lines */ - for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) { - parent_irq[i] = irq_of_parse_and_map(node, i); - if (parent_irq[i] <= 0) - break; - - priv->num_parents++; - } - - if (!priv->num_parents) { - pr_err("Failed to get parent irqs\n"); - err = -ENODEV; - goto iounmap_base; - } - - priv->htvec_domain = irq_domain_create_linear(of_node_to_fwnode(node), + /* Setup IRQ domain */ + priv->htvec_domain = irq_domain_create_linear(priv->domain_handle, (VEC_COUNT_PER_REG * priv->num_parents), &htvec_domain_ops, priv); if (!priv->htvec_domain) { - pr_err("Failed to create IRQ domain\n"); - err = -ENOMEM; - goto irq_dispose; + pr_err("loongson-htvec: cannot add IRQ domain\n"); + goto iounmap_base; } htvec_reset(priv); - for (i = 0; i < priv->num_parents; i++) + for (i = 0; i < priv->num_parents; i++) { irq_set_chained_handler_and_data(parent_irq[i], htvec_irq_dispatch, priv); + } + + htvec_priv = priv; + + register_syscore_ops(&htvec_syscore_ops); return 0; -irq_dispose: - for (; i > 0; i--) - irq_dispose_mapping(parent_irq[i - 1]); iounmap_base: iounmap(priv->base); -free_priv: + priv->domain_handle = NULL; kfree(priv); - return err; + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int htvec_of_init(struct device_node *node, + struct device_node *parent) +{ + int i, err; + int parent_irq[8]; + int num_parents = 0; + struct resource res; + + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + + /* Interrupt may come from any of the 8 interrupt lines */ + for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) { + parent_irq[i] = irq_of_parse_and_map(node, i); + if (parent_irq[i] <= 0) + break; + + num_parents++; + } + + err = htvec_init(res.start, resource_size(&res), + num_parents, parent_irq, of_node_to_fwnode(node)); + if (err < 0) + return err; + + return 0; } IRQCHIP_DECLARE(htvec, "loongson,htvec-1.0", htvec_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec) +{ + int i, ret; + int num_parents, parent_irq[8]; + struct fwnode_handle *domain_handle; + + if (!acpi_htvec) + return NULL; + + num_parents = HTVEC_MAX_PARENT_IRQ; + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_htvec); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return NULL; + } + + /* Interrupt may come from any of the 8 interrupt lines */ + for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) + parent_irq[i] = irq_create_mapping(parent, acpi_htvec->cascade[i]); + + ret = htvec_init(acpi_htvec->address, acpi_htvec->size, + num_parents, parent_irq, domain_handle); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_ANY); +} + +#endif -- Gitee From aa63f3d0f83942a4d0c271d6b1e9786da2b4d71c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 013/241] irqchip/loongson-liointc: Add ACPI init support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. LoongArch use ACPI other than DT as its boot protocol, so add ACPI init support. Change-Id: Idd634c401fcab956e03e81081d027046ab98693a Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-liointc.c | 151 +++++++++++++++++-------- 1 file changed, 106 insertions(+), 45 deletions(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 9ed1bc473663..ef15b638a8c0 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2020, Jiaxun Yang + * Jianmin Lv * Loongson Local IO Interrupt Controller support */ @@ -16,13 +17,13 @@ #include #include -#include +#include #define LIOINTC_CHIP_IRQ 32 -#define LIOINTC_NUM_PARENT 4 +#define LIOINTC_NUM_PARENT 4 #define LIOINTC_INTC_CHIP_START 0x20 - +#define LIOINTC_MEM_SIZE 0x80 #define LIOINTC_REG_INTC_STATUS (LIOINTC_INTC_CHIP_START + 0x20) #define LIOINTC_REG_INTC_EN_STATUS (LIOINTC_INTC_CHIP_START + 0x04) #define LIOINTC_REG_INTC_ENABLE (LIOINTC_INTC_CHIP_START + 0x08) @@ -40,6 +41,7 @@ struct liointc_handler_data { }; struct liointc_priv { + struct fwnode_handle *domain_handle; struct irq_chip_generic *gc; struct liointc_handler_data handler[LIOINTC_NUM_PARENT]; u8 map_cache[LIOINTC_CHIP_IRQ]; @@ -51,11 +53,11 @@ static void liointc_chained_handle_irq(struct irq_desc *desc) struct liointc_handler_data *handler = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_chip_generic *gc = handler->priv->gc; - u32 pending; + u32 pending, offset = cpu_logical_map(smp_processor_id()) * 8; chained_irq_enter(chip, desc); - pending = readl(gc->reg_base + LIOINTC_REG_INTC_STATUS); + pending = readl(gc->reg_base + LIOINTC_REG_INTC_STATUS + offset); if (!pending) { /* Always blame LPC IRQ if we have that bug */ @@ -140,67 +142,44 @@ static void liointc_resume(struct irq_chip_generic *gc) irq_gc_unlock_irqrestore(gc, flags); } -static const char * const parent_names[] = {"int0", "int1", "int2", "int3"}; +static int parent_irq[LIOINTC_NUM_PARENT]; +static u32 parent_int_map[LIOINTC_NUM_PARENT]; +static const char *const parent_names[] = {"int0", "int1", "int2", "int3"}; -int __init liointc_of_init(struct device_node *node, - struct device_node *parent) +static int liointc_init(phys_addr_t addr, unsigned long size, int revision, + struct fwnode_handle *domain_handle, struct device_node *node) { + int i, err; + void __iomem *base; + struct irq_chip_type *ct; struct irq_chip_generic *gc; struct irq_domain *domain; - struct irq_chip_type *ct; struct liointc_priv *priv; - void __iomem *base; - u32 of_parent_int_map[LIOINTC_NUM_PARENT]; - int parent_irq[LIOINTC_NUM_PARENT]; - bool have_parent = FALSE; - int sz, i, err = 0; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - base = of_iomap(node, 0); - if (!base) { - err = -ENODEV; + base = ioremap(addr, size); + if (!base) goto out_free_priv; - } - for (i = 0; i < LIOINTC_NUM_PARENT; i++) { - parent_irq[i] = of_irq_get_byname(node, parent_names[i]); - if (parent_irq[i] > 0) - have_parent = TRUE; - } - if (!have_parent) { - err = -ENODEV; - goto out_iounmap; - } - - sz = of_property_read_variable_u32_array(node, - "loongson,parent_int_map", - &of_parent_int_map[0], - LIOINTC_NUM_PARENT, - LIOINTC_NUM_PARENT); - if (sz < 4) { - pr_err("loongson-liointc: No parent_int_map\n"); - err = -ENODEV; - goto out_iounmap; - } + priv->domain_handle = domain_handle; for (i = 0; i < LIOINTC_NUM_PARENT; i++) - priv->handler[i].parent_int_map = of_parent_int_map[i]; + priv->handler[i].parent_int_map = parent_int_map[i]; /* Setup IRQ domain */ - domain = irq_domain_add_linear(node, 32, + domain = irq_domain_create_linear(domain_handle, LIOINTC_CHIP_IRQ, &irq_generic_chip_ops, priv); if (!domain) { pr_err("loongson-liointc: cannot add IRQ domain\n"); - err = -EINVAL; goto out_iounmap; } - err = irq_alloc_domain_generic_chips(domain, 32, 1, - node->full_name, handle_level_irq, - IRQ_NOPROBE, 0, 0); + err = irq_alloc_domain_generic_chips(domain, LIOINTC_CHIP_IRQ, 1, + (node ? node->full_name : "LIOINTC"), + handle_level_irq, 0, IRQ_NOPROBE, 0); if (err) { pr_err("loongson-liointc: unable to register IRQ domain\n"); goto out_free_domain; @@ -265,8 +244,90 @@ int __init liointc_of_init(struct device_node *node, out_free_priv: kfree(priv); - return err; + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int __init liointc_of_init(struct device_node *node, + struct device_node *parent) +{ + bool have_parent = FALSE; + int sz, i, index, revision, err = 0; + struct resource res; + + if (!of_device_is_compatible(node, "loongson,liointc-2.0")) { + index = 0; + revision = 1; + } else { + index = of_property_match_string(node, "reg-names", "main"); + revision = 2; + } + + if (of_address_to_resource(node, index, &res)) + return -EINVAL; + + for (i = 0; i < LIOINTC_NUM_PARENT; i++) { + parent_irq[i] = of_irq_get_byname(node, parent_names[i]); + if (parent_irq[i] > 0) + have_parent = TRUE; + } + if (!have_parent) + return -ENODEV; + + sz = of_property_read_variable_u32_array(node, + "loongson,parent_int_map", + &parent_int_map[0], + LIOINTC_NUM_PARENT, + LIOINTC_NUM_PARENT); + if (sz < 4) { + pr_err("loongson-liointc: No parent_int_map\n"); + return -ENODEV; + } + + err = liointc_init(res.start, resource_size(&res), + revision, of_node_to_fwnode(node), node); + if (err < 0) + return err; + + return 0; } IRQCHIP_DECLARE(loongson_liointc_1_0, "loongson,liointc-1.0", liointc_of_init); IRQCHIP_DECLARE(loongson_liointc_1_0a, "loongson,liointc-1.0a", liointc_of_init); + +#endif + +#ifdef CONFIG_ACPI + +struct irq_domain *liointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lio_pic *acpi_liointc) +{ + int ret; + struct fwnode_handle *domain_handle; + + if (!acpi_liointc) + return NULL; + + parent_int_map[0] = acpi_liointc->cascade_map[0]; + parent_int_map[1] = acpi_liointc->cascade_map[1]; + + parent_irq[0] = irq_create_mapping(parent, acpi_liointc->cascade[0]); + if (!cpu_has_extioi) + parent_irq[1] = irq_create_mapping(parent, acpi_liointc->cascade[1]); + + domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_liointc); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return NULL; + } + + ret = liointc_init(acpi_liointc->address, acpi_liointc->size, + 1, domain_handle, NULL); + if (ret < 0) + return NULL; + + return irq_find_matching_fwnode(domain_handle, DOMAIN_BUS_ANY); +} + +#endif -- Gitee From d92fc6b9ca9fb66aa0d6dac57640c225ffac1150 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 014/241] irqchip: Add LoongArch CPU interrupt controller support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. This patch add LoongArch CPU interrupt controller support. Change-Id: I898c467d5b63481cd31901e90f2777900c6916b7 Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 6 ++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-loongarch-cpu.c | 92 +++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 drivers/irqchip/irq-loongarch-cpu.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 58a321adcb1f..13d3fd9b3e4e 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -545,6 +545,12 @@ config EXYNOS_IRQ_COMBINER Say yes here to add support for the IRQ combiner devices embedded in Samsung Exynos chips. +config IRQ_LOONGARCH_CPU + bool + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + select GENERIC_IRQ_EFFECTIVE_AFF_MASK + config LOONGSON_LIOINTC bool "Loongson Local I/O Interrupt Controller" depends on MACH_LOONGSON64 diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 94c2885882ee..fa8df90e12fc 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -107,6 +107,7 @@ obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o +obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c new file mode 100644 index 000000000000..ea4841ffc470 --- /dev/null +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technologies, Inc. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static struct irq_domain *irq_domain; + +static void mask_loongarch_irq(struct irq_data *d) +{ + clear_csr_ecfg(ECFGF(d->hwirq)); +} + +static void unmask_loongarch_irq(struct irq_data *d) +{ + set_csr_ecfg(ECFGF(d->hwirq)); +} + +static struct irq_chip cpu_irq_controller = { + .name = "LoongArch", + .irq_mask = mask_loongarch_irq, + .irq_unmask = unmask_loongarch_irq, +}; + +static void handle_cpu_irq(struct pt_regs *regs) +{ + int hwirq; + unsigned int estat = read_csr_estat() & CSR_ESTAT_IS; + + while ((hwirq = ffs(estat))) { + estat &= ~BIT(hwirq - 1); + handle_domain_irq(irq_domain, hwirq - 1, regs); + } +} + +int get_ipi_irq(void) +{ + return irq_create_mapping(irq_domain, EXCCODE_IPI - EXCCODE_INT_START); +} + +int get_pmc_irq(void) +{ + return irq_create_mapping(irq_domain, EXCCODE_PMC - EXCCODE_INT_START); +} + +int get_timer_irq(void) +{ + return irq_create_mapping(irq_domain, EXCCODE_TIMER - EXCCODE_INT_START); +} + +static int loongarch_cpu_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + irq_set_noprobe(irq); + irq_set_chip_and_handler(irq, &cpu_irq_controller, handle_percpu_irq); + + return 0; +} + +static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = { + .map = loongarch_cpu_intc_map, + .xlate = irq_domain_xlate_onecell, +}; + +struct irq_domain * __init loongarch_cpu_irq_init(void) +{ + struct fwnode_handle *domain_handle; + + /* Mask interrupts. */ + clear_csr_ecfg(ECFG0_IM); + clear_csr_estat(ESTATF_IP); + + domain_handle = irq_domain_alloc_fwnode(NULL); + irq_domain = irq_domain_create_linear(domain_handle, EXCCODE_INT_NUM, + &loongarch_cpu_intc_irq_domain_ops, NULL); + + if (!irq_domain) + panic("Failed to add irqdomain for LoongArch CPU"); + + set_handle_irq(&handle_cpu_irq); + + return irq_domain; +} -- Gitee From d295cae7285c1f13d50c370523aa4fcfb0969b6f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 015/241] irqchip: Add Loongson Extended I/O interrupt controller support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. This patch add Loongson Extended I/O CPU interrupt controller support. Change-Id: I01436c77285429ea2385e9b91584e99e0f7bdc35 Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 10 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-loongson-eiointc.c | 380 +++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 392 insertions(+) create mode 100644 drivers/irqchip/irq-loongson-eiointc.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 13d3fd9b3e4e..36bc644b80ad 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -560,6 +560,16 @@ config LOONGSON_LIOINTC help Support for the Loongson Local I/O Interrupt Controller. +config LOONGSON_EIOINTC + bool "Loongson Extend I/O Interrupt Controller" + depends on LOONGARCH + depends on MACH_LOONGSON64 + default MACH_LOONGSON64 + select IRQ_DOMAIN_HIERARCHY + select GENERIC_IRQ_CHIP + help + Support for the Loongson3 Extend I/O Interrupt Vector Controller. + config LOONGSON_HTPIC bool "Loongson3 HyperTransport PIC Controller" depends on (MACH_LOONGSON64 && MIPS) diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index fa8df90e12fc..66fb3c560deb 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -109,6 +109,7 @@ obj-$(CONFIG_TI_SCI_INTA_IRQCHIP) += irq-ti-sci-inta.o obj-$(CONFIG_TI_PRUSS_INTC) += irq-pruss-intc.o obj-$(CONFIG_IRQ_LOONGARCH_CPU) += irq-loongarch-cpu.o obj-$(CONFIG_LOONGSON_LIOINTC) += irq-loongson-liointc.o +obj-$(CONFIG_LOONGSON_EIOINTC) += irq-loongson-eiointc.o obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o obj-$(CONFIG_LOONGSON_PCH_PIC) += irq-loongson-pch-pic.o diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c new file mode 100644 index 000000000000..9ddd6e4550ce --- /dev/null +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Jianmin Lv + * Loongson Extend I/O Interrupt Vector support + */ + +#define pr_fmt(fmt) "eiointc: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EIOINTC_REG_NODEMAP 0x14a0 +#define EIOINTC_REG_IPMAP 0x14c0 +#define EIOINTC_REG_ENABLE 0x1600 +#define EIOINTC_REG_BOUNCE 0x1680 +#define EIOINTC_REG_ISR 0x1800 +#define EIOINTC_REG_ROUTE 0x1c00 + +#define VEC_REG_COUNT 4 +#define VEC_COUNT_PER_REG 64 +#define VEC_COUNT (VEC_REG_COUNT * VEC_COUNT_PER_REG) +#define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG) +#define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG) +#define EIOINTC_ALL_ENABLE 0xffffffff + +#define MAX_EIO_NODES (NR_CPUS / CORES_PER_EIO_NODE) + +static int nr_pics; + +struct eiointc_priv { + u32 node; + nodemask_t node_map; + cpumask_t cpuspan_map; + struct fwnode_handle *domain_handle; + struct irq_domain *eiointc_domain; +}; + +static struct eiointc_priv *eiointc_priv[MAX_IO_PICS]; + +int eiointc_get_node(int id) +{ + return eiointc_priv[id]->node; +} + +static int cpu_to_eio_node(int cpu) +{ + return cpu_logical_map(cpu) / CORES_PER_EIO_NODE; +} + +static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, nodemask_t *node_map) +{ + int i, node, cpu_node, route_node; + unsigned char coremap[MAX_EIO_NODES]; + uint32_t pos_off, data, data_byte, data_mask; + + pos_off = pos & ~3; + data_byte = pos & 3; + data_mask = ~BIT_MASK(data_byte) & 0xf; + + memset(coremap, 0, sizeof(unsigned char) * MAX_EIO_NODES); + + /* Calculate node and coremap of target irq */ + cpu_node = cpu_logical_map(cpu) / CORES_PER_EIO_NODE; + coremap[cpu_node] |= BIT(cpu_logical_map(cpu) % CORES_PER_EIO_NODE); + + for_each_online_cpu(i) { + node = cpu_to_eio_node(i); + if (!node_isset(node, *node_map)) + continue; + + /* EIO node 0 is in charge of inter-node interrupt dispatch */ + route_node = (node == mnode) ? cpu_node : node; + data = ((coremap[node] | (route_node << 4)) << (data_byte * 8)); + csr_any_send(EIOINTC_REG_ROUTE + pos_off, data, data_mask, node * CORES_PER_EIO_NODE); + } +} + +static DEFINE_RAW_SPINLOCK(affinity_lock); + +static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) +{ + unsigned int cpu; + unsigned long flags; + uint32_t vector, regaddr; + struct cpumask intersect_affinity; + struct eiointc_priv *priv = d->domain->host_data; + + if (!IS_ENABLED(CONFIG_SMP)) + return -EPERM; + + raw_spin_lock_irqsave(&affinity_lock, flags); + + cpumask_and(&intersect_affinity, affinity, cpu_online_mask); + cpumask_and(&intersect_affinity, &intersect_affinity, &priv->cpuspan_map); + + if (cpumask_empty(&intersect_affinity)) { + raw_spin_unlock_irqrestore(&affinity_lock, flags); + return -EINVAL; + } + cpu = cpumask_first(&intersect_affinity); + + if (!d->parent_data) + vector = d->hwirq; + else + vector = d->parent_data->hwirq; + + regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); + + /* Mask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), + 0x0, priv->node * CORES_PER_EIO_NODE); + + /* Set route for target vector */ + eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); + + /* Unmask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE, + 0x0, priv->node * CORES_PER_EIO_NODE); + + irq_data_update_effective_affinity(d, cpumask_of(cpu)); + + raw_spin_unlock_irqrestore(&affinity_lock, flags); + + return IRQ_SET_MASK_OK; +} + +static int eiointc_index(int node) +{ + int i; + + for (i = 0; i < nr_pics; i++) { + if (node_isset(node, eiointc_priv[i]->node_map)) + return i; + } + + return -1; +} + +static int eiointc_router_init(unsigned int cpu) +{ + int i, bit; + int node = cpu_to_eio_node(cpu); + int index = eiointc_index(node); + uint32_t data; + + if (index < 0) { + pr_err("Error: invalid nodemap!\n"); + return -1; + } + + if ((cpu_logical_map(cpu) % CORES_PER_EIO_NODE) == 0) { + eiointc_enable(); + + for (i = 0; i < VEC_COUNT / 32; i++) { + data = (((1 << (i * 2 + 1)) << 16) | (1 << (i * 2))); + iocsr_write32(data, EIOINTC_REG_NODEMAP + i * 4); + } + + for (i = 0; i < VEC_COUNT / 32 / 4; i++) { + bit = BIT(1 + index); /* Route to IP[1 + index] */ + data = bit | (bit << 8) | (bit << 16) | (bit << 24); + iocsr_write32(data, EIOINTC_REG_IPMAP + i * 4); + } + + for (i = 0; i < VEC_COUNT / 4; i++) { + /* Route to Node-0 Core-0 */ + if (index == 0) + bit = BIT(cpu_logical_map(0)); + else + bit = (eiointc_priv[index]->node << 4) | 1; + + data = bit | (bit << 8) | (bit << 16) | (bit << 24); + iocsr_write32(data, EIOINTC_REG_ROUTE + i * 4); + } + + for (i = 0; i < VEC_COUNT / 32; i++) { + data = 0xffffffff; + iocsr_write32(data, EIOINTC_REG_ENABLE + i * 4); + iocsr_write32(data, EIOINTC_REG_BOUNCE + i * 4); + } + } + + return 0; +} + +static void eiointc_irq_dispatch(struct irq_desc *desc) +{ + int i; + u64 pending; + bool handled = false; + struct irq_chip *chip = irq_desc_get_chip(desc); + struct eiointc_priv *priv = irq_desc_get_handler_data(desc); + + chained_irq_enter(chip, desc); + + for (i = 0; i < VEC_REG_COUNT; i++) { + pending = iocsr_read64(EIOINTC_REG_ISR + (i << 3)); + iocsr_write64(pending, EIOINTC_REG_ISR + (i << 3)); + while (pending) { + int bit = __ffs(pending); + int virq = irq_linear_revmap(priv->eiointc_domain, bit + VEC_COUNT_PER_REG * i); + + generic_handle_irq(virq); + pending &= ~BIT(bit); + handled = true; + } + } + + if (!handled) + spurious_interrupt(); + + chained_irq_exit(chip, desc); +} + +static void eiointc_ack_irq(struct irq_data *d) +{ + if (d->parent_data) + irq_chip_ack_parent(d); +} + +static void eiointc_mask_irq(struct irq_data *d) +{ + if (d->parent_data) + irq_chip_mask_parent(d); +} + +static void eiointc_unmask_irq(struct irq_data *d) +{ + if (d->parent_data) + irq_chip_unmask_parent(d); +} + +static struct irq_chip eiointc_irq_chip = { + .name = "EIOINTC", + .irq_ack = eiointc_ack_irq, + .irq_mask = eiointc_mask_irq, + .irq_unmask = eiointc_unmask_irq, + .irq_set_affinity = eiointc_set_irq_affinity, +}; + +static int eiointc_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int ret; + unsigned int i, type; + unsigned long hwirq = 0; + struct eiointc *priv = domain->host_data; + + ret = irq_domain_translate_onecell(domain, arg, &hwirq, &type); + if (ret < 0) + return -EINVAL; + + if (hwirq >= IOCSR_EXTIOI_VECTOR_NUM) + return -EINVAL; + + for (i = 0; i < nr_irqs; i++) { + irq_domain_set_info(domain, virq + i, hwirq + i, &eiointc_irq_chip, + priv, handle_edge_irq, NULL, NULL); + } + + return 0; +} + +static void eiointc_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *d = irq_domain_get_irq_data(domain, virq + i); + + irq_set_handler(virq + i, NULL); + irq_domain_reset_irq_data(d); + } +} + +static const struct irq_domain_ops eiointc_domain_ops = { + .translate = irq_domain_translate_onecell, + .alloc = eiointc_domain_alloc, + .free = eiointc_domain_free, +}; + +static int eiointc_suspend(void) +{ + return 0; +} + +static void eiointc_resume(void) +{ + int i, j; + struct irq_desc *desc; + struct irq_data *irq_data; + + eiointc_router_init(0); + + for (i = 0; i < nr_pics; i++) { + for (j = 0; j < VEC_COUNT; j++) { + desc = irq_to_desc(irq_find_mapping(eiointc_priv[i]->eiointc_domain, j)); + if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { + raw_spin_lock(&desc->lock); + irq_data = &desc->irq_data; + eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); + raw_spin_unlock(&desc->lock); + } + } + } +} + +static struct syscore_ops eiointc_syscore_ops = { + .suspend = eiointc_suspend, + .resume = eiointc_resume, +}; + +struct irq_domain *eiointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_eio_pic *acpi_eiointc) +{ + int i, parent_irq; + unsigned long node_map; + struct eiointc_priv *priv; + + if (!acpi_eiointc) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + priv->domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_eiointc); + if (!priv->domain_handle) { + pr_err("Unable to allocate domain handle\n"); + goto out_free_priv; + } + + priv->node = acpi_eiointc->node; + node_map = acpi_eiointc->node_map ? : -1ULL; + + for_each_possible_cpu(i) { + if (node_map & (1ULL << cpu_to_eio_node(i))) { + node_set(cpu_to_eio_node(i), priv->node_map); + cpumask_or(&priv->cpuspan_map, &priv->cpuspan_map, cpumask_of(i)); + } + } + + /* Setup IRQ domain */ + priv->eiointc_domain = irq_domain_create_linear(priv->domain_handle, VEC_COUNT, + &eiointc_domain_ops, priv); + if (!priv->eiointc_domain) { + pr_err("loongson-eiointc: cannot add IRQ domain\n"); + goto out_free_priv; + } + + eiointc_priv[nr_pics++] = priv; + + eiointc_router_init(0); + + parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade); + irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv); + + register_syscore_ops(&eiointc_syscore_ops); + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING, + "irqchip/loongarch/intc:starting", + eiointc_router_init, NULL); + + return irq_find_matching_fwnode(priv->domain_handle, DOMAIN_BUS_ANY); + +out_free_priv: + priv->domain_handle = NULL; + kfree(priv); + + return NULL; +} diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 00205c2fcb5a..16122da6f950 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -108,6 +108,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_RISCV_STARTING, + CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_MICROCODE_LOADER, -- Gitee From c9d8a8303cb65a303524e3877297d2ff17399e1f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 016/241] irqchip: Add Loongson PCH LPC controller support We are preparing to add new Loongson (based on LoongArch, not MIPS) support. This patch add Loongson PCH LPC interrupt controller support. Change-Id: I2c5e0616f548755c8d8b4773a74c9f1f206bf0c1 Signed-off-by: Huacai Chen --- drivers/irqchip/Kconfig | 8 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-loongson-pch-lpc.c | 223 +++++++++++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 drivers/irqchip/irq-loongson-pch-lpc.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 36bc644b80ad..641d206326ab 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -606,6 +606,14 @@ config LOONGSON_PCH_MSI help Support for the Loongson PCH MSI Controller. +config LOONGSON_PCH_LPC + bool "Loongson PCH LPC Controller" + depends on MACH_LOONGSON64 || COMPILE_TEST + default (MACH_LOONGSON64 && LOONGARCH) + select IRQ_DOMAIN_HIERARCHY + help + Support for the Loongson PCH LPC Controller. + config MST_IRQ bool "MStar Interrupt Controller" depends on ARCH_MEDIATEK || ARCH_MSTARV7 || COMPILE_TEST diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 66fb3c560deb..44794b04e82a 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -114,5 +114,6 @@ obj-$(CONFIG_LOONGSON_HTPIC) += irq-loongson-htpic.o obj-$(CONFIG_LOONGSON_HTVEC) += irq-loongson-htvec.o obj-$(CONFIG_LOONGSON_PCH_PIC) += irq-loongson-pch-pic.o obj-$(CONFIG_LOONGSON_PCH_MSI) += irq-loongson-pch-msi.o +obj-$(CONFIG_LOONGSON_PCH_LPC) += irq-loongson-pch-lpc.o obj-$(CONFIG_MST_IRQ) += irq-mst-intc.o obj-$(CONFIG_SL28CPLD_INTC) += irq-sl28cpld.o diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c new file mode 100644 index 000000000000..c63f65095163 --- /dev/null +++ b/drivers/irqchip/irq-loongson-pch-lpc.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Jianmin Lv + * Loongson LPC support + */ + +#define pr_fmt(fmt) "lpc: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* Registers */ +#define LPC_INT_CTL 0x00 +#define LPC_INT_ENA 0x04 +#define LPC_INT_STS 0x08 +#define LPC_INT_CLR 0x0c +#define LPC_INT_POL 0x10 +#define LPC_COUNT 16 + +struct pch_lpc { + void __iomem *base; + struct irq_domain *lpc_domain; + struct fwnode_handle *domain_handle; + raw_spinlock_t lpc_lock; + u32 saved_reg_ctl; + u32 saved_reg_ena; + u32 saved_reg_pol; +}; + +static struct pch_lpc *pch_lpc_priv; + +static void ack_lpc_irq(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pch_lpc_priv->lpc_lock, flags); + writel(0x1 << d->irq, pch_lpc_priv->base + LPC_INT_CLR); + raw_spin_unlock_irqrestore(&pch_lpc_priv->lpc_lock, flags); +} +static void mask_lpc_irq(struct irq_data *d) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&pch_lpc_priv->lpc_lock, flags); + writel(readl(pch_lpc_priv->base + LPC_INT_ENA) & (~(0x1 << (d->irq))), + pch_lpc_priv->base + LPC_INT_ENA); + raw_spin_unlock_irqrestore(&pch_lpc_priv->lpc_lock, flags); +} + +static void mask_ack_lpc_irq(struct irq_data *d) +{ +} + +static void unmask_lpc_irq(struct irq_data *d) +{ + unsigned long flags; + raw_spin_lock_irqsave(&pch_lpc_priv->lpc_lock, flags); + writel(readl(pch_lpc_priv->base + LPC_INT_ENA) | (0x1 << (d->irq)), + pch_lpc_priv->base + LPC_INT_ENA); + raw_spin_unlock_irqrestore(&pch_lpc_priv->lpc_lock, flags); +} + +static int lpc_set_type(struct irq_data *d, unsigned int type) +{ + u32 val; + u32 mask = 0x1 << (d->hwirq); + + if (!(type & IRQ_TYPE_LEVEL_MASK)) + return 0; + + val = readl(pch_lpc_priv->base + LPC_INT_POL); + + if (type == IRQ_TYPE_LEVEL_HIGH) + val |= mask; + else + val &= ~mask; + + writel(val, pch_lpc_priv->base + LPC_INT_POL); + + return 0; +} + +static struct irq_chip pch_lpc_irq_chip = { + .name = "PCH LPC", + .irq_mask = mask_lpc_irq, + .irq_unmask = unmask_lpc_irq, + .irq_ack = ack_lpc_irq, + .irq_mask_ack = mask_ack_lpc_irq, + .irq_eoi = unmask_lpc_irq, + .irq_set_type = lpc_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static void lpc_irq_dispatch(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + u32 pending; + + chained_irq_enter(chip, desc); + + pending = readl(pch_lpc_priv->base + LPC_INT_ENA); + pending &= readl(pch_lpc_priv->base + LPC_INT_STS); + if (!pending) + spurious_interrupt(); + + while (pending) { + int bit = __ffs(pending); + generic_handle_irq(bit); + pending &= ~BIT(bit); + } + chained_irq_exit(chip, desc); +} + +static int pch_lpc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + irq_set_chip_and_handler(irq, &pch_lpc_irq_chip, handle_level_irq); + return 0; +} + +static const struct irq_domain_ops pch_lpc_domain_ops = { + .map = pch_lpc_map, + .translate = irq_domain_translate_twocell, +}; + +static void pch_lpc_reset(struct pch_lpc *priv) +{ + /* Enable the LPC interrupt, bit31: en bit30: edge */ + writel(0x80000000, priv->base + LPC_INT_CTL); + writel(0, priv->base + LPC_INT_ENA); + /* Clear all 18-bit interrpt bit */ + writel(0x3ffff, priv->base + LPC_INT_CLR); +} + +static int pch_lpc_disabled(struct pch_lpc *priv) +{ + return (readl(priv->base + LPC_INT_ENA) == 0xffffffff) && + (readl(priv->base + LPC_INT_STS) == 0xffffffff); +} + +static int pch_lpc_suspend(void) +{ + pch_lpc_priv->saved_reg_ctl = readl(pch_lpc_priv->base + LPC_INT_CTL); + pch_lpc_priv->saved_reg_ena = readl(pch_lpc_priv->base + LPC_INT_ENA); + pch_lpc_priv->saved_reg_pol = readl(pch_lpc_priv->base + LPC_INT_POL); + return 0; +} + +static void pch_lpc_resume(void) +{ + writel(pch_lpc_priv->saved_reg_ctl, pch_lpc_priv->base + LPC_INT_CTL); + writel(pch_lpc_priv->saved_reg_ena, pch_lpc_priv->base + LPC_INT_ENA); + writel(pch_lpc_priv->saved_reg_pol, pch_lpc_priv->base + LPC_INT_POL); +} + +static struct syscore_ops pch_lpc_syscore_ops = { + .suspend = pch_lpc_suspend, + .resume = pch_lpc_resume, +}; + +struct irq_domain *pch_lpc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lpc_pic *acpi_pchlpc) +{ + int parent_irq; + struct pch_lpc *priv; + struct irq_fwspec fwspec; + + if (!acpi_pchlpc) + return NULL; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + raw_spin_lock_init(&priv->lpc_lock); + + priv->base = ioremap(acpi_pchlpc->address, acpi_pchlpc->size); + if (!priv->base) { + goto free_priv; + } + + if (pch_lpc_disabled(priv)) { + pr_err("Failed to get LPC status\n"); + goto iounmap_base; + } + + priv->domain_handle = irq_domain_alloc_fwnode((phys_addr_t *)acpi_pchlpc); + if (!priv->domain_handle) { + pr_err("Unable to allocate domain handle\n"); + goto iounmap_base; + } + priv->lpc_domain = irq_domain_add_legacy(NULL, LPC_COUNT, 0, 0, + &pch_lpc_domain_ops, priv); + if (!priv->lpc_domain) { + pr_err("Failed to create IRQ domain\n"); + goto iounmap_base; + } + pch_lpc_reset(priv); + + fwspec.fwnode = parent->fwnode; + fwspec.param[0] = acpi_pchlpc->cascade; + fwspec.param[1] = IRQ_TYPE_LEVEL_HIGH; + fwspec.param_count = 2; + parent_irq = irq_create_fwspec_mapping(&fwspec); + irq_set_chained_handler_and_data(parent_irq, lpc_irq_dispatch, priv); + pch_lpc_priv = priv; + + register_syscore_ops(&pch_lpc_syscore_ops); + + return irq_find_matching_fwnode(priv->domain_handle, DOMAIN_BUS_ANY); + +iounmap_base: + iounmap(priv->base); +free_priv: + kfree(priv); + + return NULL; +} -- Gitee From dafbefa4091a336f6d3b227cf1380ba315a7bd91 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Dec 2022 20:26:34 +0800 Subject: [PATCH 017/241] irqchip/loongson-liointc: Save/restore int_edge/int_pol registers during S3/S4 If int_edge/int_pol registers are configured to not be the default values, we should save/restore them during S3/S4. Change-Id: I270db2cd7499f91f8e26a2ce16ac0c942639cb94 Signed-off-by: Yingkun Meng Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-liointc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index ef15b638a8c0..9643e18efcc3 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -45,6 +45,8 @@ struct liointc_priv { struct irq_chip_generic *gc; struct liointc_handler_data handler[LIOINTC_NUM_PARENT]; u8 map_cache[LIOINTC_CHIP_IRQ]; + u32 int_pol; + u32 int_edge; bool has_lpc_irq_errata; }; @@ -125,6 +127,14 @@ static int liointc_set_type(struct irq_data *data, unsigned int type) return 0; } +static void liointc_suspend(struct irq_chip_generic *gc) +{ + struct liointc_priv *priv = gc->private; + + priv->int_pol = readl(gc->reg_base + LIOINTC_REG_INTC_POL); + priv->int_edge = readl(gc->reg_base + LIOINTC_REG_INTC_EDGE); +} + static void liointc_resume(struct irq_chip_generic *gc) { struct liointc_priv *priv = gc->private; @@ -137,6 +147,8 @@ static void liointc_resume(struct irq_chip_generic *gc) /* Restore map cache */ for (i = 0; i < LIOINTC_CHIP_IRQ; i++) writeb(priv->map_cache[i], gc->reg_base + i); + writel(priv->int_pol, gc->reg_base + LIOINTC_REG_INTC_POL); + writel(priv->int_edge, gc->reg_base + LIOINTC_REG_INTC_EDGE); /* Restore mask cache */ writel(gc->mask_cache, gc->reg_base + LIOINTC_REG_INTC_ENABLE); irq_gc_unlock_irqrestore(gc, flags); @@ -213,6 +225,7 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, gc->private = priv; gc->reg_base = base; gc->domain = domain; + gc->suspend = liointc_suspend; gc->resume = liointc_resume; ct = gc->chip_types; -- Gitee From 8bf9b6d7a25bf7f55425ff4f837ba543da6217f3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 26 May 2023 14:57:47 +0800 Subject: [PATCH 018/241] irqchip/loongson-pch-msi: Add machanism to limit msi allocation Loongson machines can have as many as 256 logical cpus, but the maximum of msi vectors in one irqchip is also 256 (practically that is less than 256, because pch-pic consumes some of them). Even on a 64-core machine, 256 irqs can be easily exhausted if there are several NICs (NICs usually allocate msi irqs depending on the number of online cpus). So we want to limit the msi allocation. In this patch we add a machanism to limit msi allocation: 1, Modify input "nvec" by overriding the msi_domain_ops::msi_prepare(); 2, The default limit is 256, which is compatible with the old behavior; 3, Add a cmdline parameter "loongson_msi_limit=xxx" to control the limit. Change-Id: Ic336fe02a4366abe8e5a7a69e8423fe76745444a Signed-off-by: Juxin Gao Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-pch-msi.c | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index d148ea8e2d68..a333dce8ec9c 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -88,9 +88,36 @@ static void pch_msi_compose_msi_msg(struct irq_data *data, msg->data = data->hwirq; } +#define DEFAULT_MSI_LIMITS 256 + +static int pch_msi_limits = DEFAULT_MSI_LIMITS; + +static int __init pch_msi_limit(char *str) +{ + get_option(&str, &pch_msi_limits); + + if (pch_msi_limits <= 0) + pch_msi_limits = DEFAULT_MSI_LIMITS; + + return 0; +} + +early_param("loongson_msi_limit", pch_msi_limit); + +static int pch_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) +{ + memset(arg, 0, sizeof(*arg)); + return clamp_val(nvec, 0, pch_msi_limits); +} + +static struct msi_domain_ops pch_msi_ops = { + .msi_prepare = pch_msi_prepare, +}; + static struct msi_domain_info pch_msi_domain_info = { .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, + .ops = &pch_msi_ops, .chip = &pch_msi_irq_chip, }; -- Gitee From 4ec80fb9d0eef02ba2d0e9b85a40a1870d6bc3c0 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Wed, 14 Jun 2023 19:59:36 +0800 Subject: [PATCH 019/241] irqchip/loongson-eiointc: Fix irq affinity setting during resume The hierarchy of PCH PIC, PCH PCI MSI and EIONTC is as following: PCH PIC ------->| |---->EIOINTC PCH PCI MSI --->| so the irq_data list of irq_desc for IRQs on PCH PIC and PCH PCI MSI is like this: irq_desc->irq_data(domain: PCH PIC)->parent_data(domain: EIOINTC) irq_desc->irq_data(domain: PCH PCI MSI)->parent_data(domain: EIOINTC) In eiointc_resume(), the irq_data passed into eiointc_set_irq_affinity() should be matched to EIOINTC domain instead of PCH PIC or PCH PCI MSI domain, so fix it. Fixes: a90335c2dfb4 ("irqchip/loongson-eiointc: Add suspend/resume support") Change-Id: I3b157ffacf88c08c44391a2b03c0f151d1e15a14 Reported-by: Qiming Yang Signed-off-by: Jianmin Lv --- drivers/irqchip/irq-loongson-eiointc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 9ddd6e4550ce..429b67c7843d 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -307,8 +307,8 @@ static void eiointc_resume(void) desc = irq_to_desc(irq_find_mapping(eiointc_priv[i]->eiointc_domain, j)); if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { raw_spin_lock(&desc->lock); - irq_data = &desc->irq_data; - eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); + irq_data = irq_domain_get_irq_data(eiointc_priv[i]->eiointc_domain, irq_desc_get_irq(desc)); + if (irq_data) eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); raw_spin_unlock(&desc->lock); } } -- Gitee From e8ef90e85cdc9e3ff38e0b4ce02dbc9e57b7e913 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 9 Nov 2022 17:24:11 +0800 Subject: [PATCH 020/241] irqchip/loongson-eiointc: Improve IRQ affinity setting On multiple bridge platform, a MSIX vector is often affinitive with only one cpu, and the count of MSIX is determined as the count of cpus in the system. Unfortunately, the cpu group related to a brigde is only allowed to handle interrupts from devices behind the bridge, which breaks the normal affinity setting for multiple MSIX vectors, and causing following affinity setting: IRQ: 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 CPU: 0, 1, 2, 3, 4, 0, 0, 0, 0, 0 To balance the affinity, we improve the setting to assign cpu for IRQ as following: IRQ: 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 CPU: 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 Change-Id: I903261c76ed9cb11092ce6be02e6fd3223bd761f Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen --- drivers/irqchip/irq-loongson-eiointc.c | 39 ++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 429b67c7843d..23a4fcf8e30f 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -56,6 +56,17 @@ static int cpu_to_eio_node(int cpu) return cpu_logical_map(cpu) / CORES_PER_EIO_NODE; } +static unsigned int cpumask_nth(unsigned int idx, const struct cpumask *srcp) +{ + int cpu; + + for_each_cpu(cpu, srcp) + if (idx-- == 0) + return cpu; + + BUG(); +} + static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, nodemask_t *node_map) { int i, node, cpu_node, route_node; @@ -91,6 +102,7 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af unsigned int cpu; unsigned long flags; uint32_t vector, regaddr; + struct cpumask online_affinity; struct cpumask intersect_affinity; struct eiointc_priv *priv = d->domain->host_data; @@ -99,14 +111,31 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af raw_spin_lock_irqsave(&affinity_lock, flags); - cpumask_and(&intersect_affinity, affinity, cpu_online_mask); - cpumask_and(&intersect_affinity, &intersect_affinity, &priv->cpuspan_map); - - if (cpumask_empty(&intersect_affinity)) { + cpumask_and(&online_affinity, affinity, cpu_online_mask); + if (cpumask_empty(&online_affinity)) { raw_spin_unlock_irqrestore(&affinity_lock, flags); return -EINVAL; } - cpu = cpumask_first(&intersect_affinity); + cpumask_and(&intersect_affinity, &online_affinity, &priv->cpuspan_map); + + if (!cpumask_empty(&intersect_affinity)) + cpu = cpumask_first(&intersect_affinity); + else { + int c, idx = 0; + struct cpumask complement_map; + struct cpumask cpuspan_online_map; + + cpu = cpumask_first(&online_affinity); + cpumask_complement(&complement_map, &priv->cpuspan_map); + cpumask_and(&cpuspan_online_map, &priv->cpuspan_map, cpu_online_mask); + + for_each_cpu(c, &complement_map) { + if (c == cpu) break; idx++; + } + + idx = idx % cpumask_weight(&cpuspan_online_map); + cpu = cpumask_nth(idx, &cpuspan_online_map); + } if (!d->parent_data) vector = d->hwirq; -- Gitee From 17f112cb8ddf0b36e93bf9050d9b9b95c4b10bb9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 11 Feb 2023 10:33:21 +0800 Subject: [PATCH 021/241] PCI: loongson: Add more devices that need MRRS quirk [ Upstream commit c768f8c5f40fcdc6f058cc2f02592163d6c6716c ] Loongson-2K SOC and LS7A2000 chipset add new PCI IDs that need MRRS quirk. Add them. Change-Id: I9e2dd88e67d3fa94bf40e7238b6006eba1d0d2ef Link: https://lore.kernel.org/r/20230211023321.3530080-1-chenhuacai@loongson.cn Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin -- Gitee From 09bf063504d1cf41548ac133407d804732c5fc15 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 5 Sep 2020 15:14:50 +0800 Subject: [PATCH 022/241] PCI: loongson: Use correct pci config access operations LS2K/LS7A support 8/16/32-bits pci config access operations, so we can safely use pci_generic_config_read()/pci_generic_config_write() instead of pci_generic_config_read32()/pci_generic_config_write32(). Change-Id: I679311ad22b55b2ee09845b92fa759358364ec55 Signed-off-by: Huacai Chen --- drivers/pci/controller/pci-loongson.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index e73e18a73833..0d21c370471a 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -160,8 +160,15 @@ static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return val; } -/* H/w only accept 32-bit PCI operations */ +/* LS2K/LS7A accept 8/16/32-bit PCI operations */ static struct pci_ops loongson_pci_ops = { + .map_bus = pci_loongson_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + +/* RS780/SR5690 only accept 32-bit PCI operations */ +static struct pci_ops loongson_pci_ops32 = { .map_bus = pci_loongson_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write32, @@ -219,8 +226,11 @@ static int loongson_pci_probe(struct platform_device *pdev) } bridge->sysdata = priv; - bridge->ops = &loongson_pci_ops; bridge->map_irq = loongson_map_irq; + if (!of_device_is_compatible(node, "loongson,rs780e-pci")) + bridge->ops = &loongson_pci_ops; + else + bridge->ops = &loongson_pci_ops32; return pci_host_probe(bridge); } -- Gitee From cbd11c6d2d8dd66bcb508e461bb0aedd4defde57 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 5 Sep 2020 15:14:50 +0800 Subject: [PATCH 023/241] PCI/portdrv: Don't disable pci device during shutdown Use separate remove()/shutdown() callback, and don't disable pci device during shutdown. This can avoid some poweroff/reboot failures. The poweroff/reboot failures can easily reproduce on Loongson platforms. I think this is not a Loongson-specific problem, instead, is a problem related to some specific PCI hosts. On some x86 platforms, radeon/amdgpu devices can cause the same problem, and commit faefba95c9e8ca3a523831c2e ("drm/amdgpu: just suspend the hw on pci shutdown") can resolve it. As Tiezhu said, this occasionally shutdown or reboot failure is due to clear PCI_COMMAND_MASTER on the device in do_pci_disable_device(). drivers/pci/pci.c static void do_pci_disable_device(struct pci_dev *dev) { u16 pci_command; pci_read_config_word(dev, PCI_COMMAND, &pci_command); if (pci_command & PCI_COMMAND_MASTER) { pci_command &= ~PCI_COMMAND_MASTER; pci_write_config_word(dev, PCI_COMMAND, pci_command); } pcibios_disable_device(dev); } When remove "pci_command &= ~PCI_COMMAND_MASTER;", it can work well when shutdown or reboot. This may implies that there are DMA activities on the device while shutdown. Radeon driver is more difficult than amdgpu due to its confusing symbol names, and I have maintained an out-of-tree patch for a long time [1]. Recently, we found more and more devices can cause the same problem, and it is very difficult to modify all problematic drivers as radeon/amdgpu does (the .shutdown callback should make sure there is no DMA activity). So, I think modify the PCIe port driver is a simple and effective way. And as early discussed, kexec can still work after this patch. [1] https://github.com/chenhuacai/linux/commit/6612f9c1fc290d42a14618ce9a7d03014d8ebb1a Change-Id: Iee406bb8fe955044962821006934a1ee1fffdaad Signed-off-by: Huacai Chen Signed-off-by: Tiezhu Yang --- drivers/pci/pcie/portdrv.h | 2 +- drivers/pci/pcie/portdrv_core.c | 6 ++++-- drivers/pci/pcie/portdrv_pci.c | 15 +++++++++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 2ff5724b8f13..358d7281f6e8 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -117,7 +117,7 @@ int pcie_port_device_resume(struct device *dev); int pcie_port_device_runtime_suspend(struct device *dev); int pcie_port_device_runtime_resume(struct device *dev); #endif -void pcie_port_device_remove(struct pci_dev *dev); +void pcie_port_device_remove(struct pci_dev *dev, bool disable); int __must_check pcie_port_bus_register(void); void pcie_port_bus_unregister(void); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 3779b264dbec..682c1cce5965 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -492,11 +492,13 @@ EXPORT_SYMBOL_GPL(pcie_port_find_device); * Remove PCI Express port service devices associated with given port and * disable MSI-X or MSI for the port. */ -void pcie_port_device_remove(struct pci_dev *dev) +void pcie_port_device_remove(struct pci_dev *dev, bool disable) { device_for_each_child(&dev->dev, NULL, remove_iter); pci_free_irq_vectors(dev); - pci_disable_device(dev); + + if (disable) + pci_disable_device(dev); } /** diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index aac1a6828b4f..f46411507f9c 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -144,7 +144,18 @@ static void pcie_portdrv_remove(struct pci_dev *dev) pm_runtime_dont_use_autosuspend(&dev->dev); } - pcie_port_device_remove(dev); + pcie_port_device_remove(dev, true); +} + +static void pcie_portdrv_shutdown(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev, false); } static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, @@ -215,7 +226,7 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .shutdown = pcie_portdrv_remove, + .shutdown = pcie_portdrv_shutdown, .err_handler = &pcie_portdrv_err_handler, -- Gitee From 54d1ea7a7a9f821033cc68610af887e918c20285 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 024/241] PCI: Add quirk for multifunction devices of LS7A In LS7A, multifunction device use same pci PIN and different irq for different function, so fix it for standard pci PIN usage. Change-Id: I50fc449d8fdce8efd5b6331d4ea477e3df2a4e86 Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen --- drivers/pci/controller/pci-loongson.c | 23 +++++++++++++++++++++++ include/linux/pci_ids.h | 14 ++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index 0d21c370471a..1273838487f4 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -93,6 +93,29 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, DEV_LS7A_PCIE_PORT6, loongson_mrrs_quirk); +static void loongson_pci_pin_quirk(struct pci_dev *pdev) +{ + pdev->pin = 1 + (PCI_FUNC(pdev->devfn) & 3); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_AHCI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_EHCI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_OHCI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_DC1, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_DC2, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_GPU, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_HDMI, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_GMAC, loongson_pci_pin_quirk); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, + PCI_DEVICE_ID_LOONGSON_GNET, loongson_pci_pin_quirk); + static void __iomem *cfg1_map(struct loongson_pci *priv, int bus, unsigned int devfn, int where) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4b34a5c12599..88e22189f572 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -152,6 +152,20 @@ /* Vendors and devices. Sort key: vendor first, device next. */ #define PCI_VENDOR_ID_LOONGSON 0x0014 +#define PCI_DEVICE_ID_LOONGSON_APB 0x7a02 +#define PCI_DEVICE_ID_LOONGSON_GMAC 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GNET 0x7a13 +#define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06 +#define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36 +#define PCI_DEVICE_ID_LOONGSON_HDA 0x7a07 +#define PCI_DEVICE_ID_LOONGSON_GPU 0x7a15 +#define PCI_DEVICE_ID_LOONGSON_AHCI 0x7a08 +#define PCI_DEVICE_ID_LOONGSON_EHCI 0x7a14 +#define PCI_DEVICE_ID_LOONGSON_OHCI 0x7a24 +#define PCI_DEVICE_ID_LOONGSON_LPC 0x7a0c +#define PCI_DEVICE_ID_LOONGSON_DMA 0x7a0f +#define PCI_DEVICE_ID_LOONGSON_IOMMU 0x7a1f +#define PCI_DEVICE_ID_LOONGSON_HDMI 0x7a37 #define PCI_VENDOR_ID_TTTECH 0x0357 #define PCI_DEVICE_ID_TTTECH_MC322 0x000a -- Gitee From f63a903908f6cf4b02e38ed6776ee46a177e3a18 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 25 Apr 2021 15:57:56 +0800 Subject: [PATCH 025/241] PCI: Support ASpeed VGA cards behind a misbehaving bridge According to PCI-to-PCI bridge spec, bit 3 of Bridge Control Register is VGA Enable bit which modifies the response by the bridge to VGA compatible addresses. The Bridge Control register provides extensions to the Command register that are specific to a bridge. The Bridge Control register provides many of the same controls for the secondary interface that are provided by the Command register for the primary interface. There are some bits that affect the operation of both interfaces of the bridge. If the VGA Enable bit is set, the bridge will decode and forward the following accesses on the primary interface to the secondary interface. Forwarding of these accesses is qualified by the I/O Enable and Memory Enable bits in the Command register, and VGA Enable bit modifies the response by the bridge to VGA compatible addresses. when 0: do not forward VGA compatible memory and I/O addresses from the primary to secondary interface (addresses defined below) unless they are enabled for forwarding by the defined I/O and memory address ranges when 1: forward VGA compatible memory and I/O addresses (addresses defined below) from the primary interface to the secondary interface (if the I/O Enable and Memory Enable bits are set) independent of the I/O and memory address ranges and independent of the ISA Enable bit * memory accesses in the range 000A 0000h to 000B FFFFh * I/O addresses in the first 64 KB of the I/O address space (AD[31:16] are 0000h) where AD[9:: 0] are in the ranges 3B0h to 3BBh and 3C0h to 3DFh (inclusive of ISA address aliases - AD[15::10] are not decoded) If the VGA Enable bit is set, forwarding of these accesses is independent of the I/O address range and memory address ranges defined by the I/O Base and Limit registers, the Memory Base and Limit registers, and the Prefetchable Memory Base and Limit registers of the bridge. Forwarding of these accesses is also independent of the settings of the ISA Enable bit (in the Bridge Control register) or VGA Palette Snoop bits (in the Command register), The AST2500 hardward does not set the VGA Enable bit on its bridge control register, which causes vgaarb subsystem don't think the VGA card behind this bridge as a valid boot vga device. So we provide a quirk to fix Xorg auto-detection. See similar bug: https://patchwork.kernel.org/project/linux-pci/patch/20170619023528.11532-1-dja@axtens.net/ Change-Id: Icc9590b0088edd86cf93da184e46441f0fc09eec Signed-off-by: Huacai Chen Signed-off-by: Jingfeng Sui --- drivers/pci/quirks.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c0d113481191..c8ddcf001d26 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* isa_dma_bridge_buggy */ #include "pci.h" @@ -206,6 +207,50 @@ static void quirk_mmio_always_on(struct pci_dev *dev) DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_BRIDGE_HOST, 8, quirk_mmio_always_on); +static void aspeed_fixup_vgaarb(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + struct pci_dev *vdevp = NULL; + u16 config; + + bus = pdev->bus; + bridge = bus->self; + + /* Is VGA routed to us? */ + if (bridge && (pci_is_bridge(bridge))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, &config); + + /* Yes, this bridge is PCI bridge-to-bridge spec compliant, + * just return! + */ + if (config & PCI_BRIDGE_CTL_VGA) + return; + + dev_warn(&pdev->dev, "VGA bridge control is not enabled\n"); + } + + /* Just return if the system already have a default device */ + if (vga_default_device()) + return; + + /* No default vga device */ + while ((vdevp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, vdevp))) { + if (vdevp->vendor != 0x1a03) { + /* Have other vga devcie in the system, do nothing */ + dev_info(&pdev->dev, "Another boot vga device: 0x%x:0x%x\n", + vdevp->vendor, vdevp->device); + return; + } + } + + vga_set_default_device(pdev); + + dev_info(&pdev->dev, "Boot vga device set as 0x%x:0x%x\n", + pdev->vendor, pdev->device); +} +DECLARE_PCI_FIXUP_CLASS_FINAL(0x1a03, 0x2000, PCI_CLASS_DISPLAY_VGA, 8, aspeed_fixup_vgaarb); + /* * The Mellanox Tavor device gives false positive parity errors. Mark this * device with a broken_parity_status to allow PCI scanning code to "skip" -- Gitee From ff79af4682b9d3264305d2c972e1a8e2595cd611 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Sep 2022 19:45:35 +0200 Subject: [PATCH 026/241] efi: libstub: avoid efi_get_memory_map() for allocating the virt map The virt map is a set of efi_memory_desc_t descriptors that are passed to SetVirtualAddressMap() to inform the firmware about the desired virtual mapping of the regions marked as EFI_MEMORY_RUNTIME. The only reason we currently call the efi_get_memory_map() helper is that it gives us an allocation that is guaranteed to be of sufficient size. However, efi_get_memory_map() has grown some additional complexity over the years, and today, we're actually better off calling the EFI boot service directly with a zero size, which tells us how much memory should be enough for the virt map. While at it, avoid creating the VA map allocation if we will not be using it anyway, i.e., if efi_novamap is true. Change-Id: Ie86dbdb7463d93e6774697c5bed60695f2ee885d Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub.c | 31 +++++++++++++++++++++ drivers/firmware/efi/libstub/efistub.h | 2 ++ drivers/firmware/efi/libstub/fdt.c | 36 ++++++++++--------------- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 0ab439c53eee..960903d36403 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -318,6 +318,35 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, return status; } +/* + * efi_allocate_virtmap() - create a pool allocation for the virtmap + * + * Create an allocation that is of sufficient size to hold all the memory + * descriptors that will be passed to SetVirtualAddressMap() to inform the + * firmware about the virtual mapping that will be used under the OS to call + * into the firmware. + */ +efi_status_t efi_alloc_virtmap(efi_memory_desc_t **virtmap, + unsigned long *desc_size, u32 *desc_ver) +{ + unsigned long size, mmap_key; + efi_status_t status; + + /* + * Use the size of the current memory map as an upper bound for the + * size of the buffer we need to pass to SetVirtualAddressMap() to + * cover all EFI_MEMORY_RUNTIME regions. + */ + size = 0; + status = efi_bs_call(get_memory_map, &size, NULL, &mmap_key, desc_size, + desc_ver); + if (status != EFI_BUFFER_TOO_SMALL) + return EFI_LOAD_ERROR; + + return efi_bs_call(allocate_pool, EFI_LOADER_DATA, size, + (void **)virtmap); +} + /* * efi_get_virtmap() - create a virtual mapping for the EFI memory map * @@ -333,6 +362,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, efi_memory_desc_t *in, *out = runtime_map; int l; + *count = 0; + for (l = 0; l < map_size; l += desc_size) { u64 paddr, size; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index c5e0c6e99d20..463b5a3e55fb 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -758,6 +758,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, void *get_fdt(unsigned long *fdt_size); +efi_status_t efi_alloc_virtmap(efi_memory_desc_t **virtmap, + unsigned long *desc_size, u32 *desc_ver); void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, int *count); diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index d48b0de05b62..df699de805f3 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -199,7 +199,7 @@ static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) struct exit_boot_struct { efi_memory_desc_t *runtime_map; - int *runtime_entry_count; + int runtime_entry_count; void *new_fdt_addr; }; @@ -213,7 +213,7 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, * entries so that we can pass it straight to SetVirtualAddressMap() */ efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, - p->runtime_map, p->runtime_entry_count); + p->runtime_map, &p->runtime_entry_count); return update_fdt_memmap(p->new_fdt_addr, map); } @@ -247,29 +247,24 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long map_size, desc_size, buff_size; u32 desc_ver; unsigned long mmap_key; - efi_memory_desc_t *memory_map, *runtime_map; + efi_memory_desc_t *memory_map; efi_status_t status; - int runtime_entry_count; struct efi_boot_memmap map; struct exit_boot_struct priv; - map.map = &runtime_map; map.map_size = &map_size; map.desc_size = &desc_size; map.desc_ver = &desc_ver; map.key_ptr = &mmap_key; map.buff_size = &buff_size; - /* - * Get a copy of the current memory map that we will use to prepare - * the input for SetVirtualAddressMap(). We don't have to worry about - * subsequent allocations adding entries, since they could not affect - * the number of EFI_MEMORY_RUNTIME regions. - */ - status = efi_get_memory_map(&map); - if (status != EFI_SUCCESS) { - efi_err("Unable to retrieve UEFI memory map.\n"); - return status; + if (!efi_novamap) { + status = efi_alloc_virtmap(&priv.runtime_map, &desc_size, + &desc_ver); + if (status != EFI_SUCCESS) { + efi_err("Unable to retrieve UEFI memory map.\n"); + return status; + } } efi_info("Exiting boot services and installing virtual address map...\n"); @@ -290,10 +285,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, goto fail_free_new_fdt; } - runtime_entry_count = 0; - priv.runtime_map = runtime_map; - priv.runtime_entry_count = &runtime_entry_count; - priv.new_fdt_addr = (void *)*new_fdt_addr; + priv.new_fdt_addr = (void *)*new_fdt_addr; status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); @@ -305,8 +297,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, /* Install the new virtual address map */ svam = efi_system_table->runtime->set_virtual_address_map; - status = svam(runtime_entry_count * desc_size, desc_size, - desc_ver, runtime_map); + status = svam(priv.runtime_entry_count * desc_size, desc_size, + desc_ver, priv.runtime_map); /* * We are beyond the point of no return here, so if the call to @@ -338,7 +330,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, efi_free(MAX_FDT_SIZE, *new_fdt_addr); fail: - efi_system_table->boottime->free_pool(runtime_map); + efi_bs_call(free_pool, priv.runtime_map); return EFI_LOAD_ERROR; } -- Gitee From 5393c11c9bca9c8c282d8f97e43040dd8374fcff Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Jun 2022 15:29:22 +0200 Subject: [PATCH 027/241] efi: libstub: simplify efi_get_memory_map() and struct efi_boot_memmap Currently, struct efi_boot_memmap is a struct that is passed around between callers of efi_get_memory_map() and the users of the resulting data, and which carries pointers to various variables whose values are provided by the EFI GetMemoryMap() boot service. This is overly complex, and it is much easier to carry these values in the struct itself. So turn the struct into one that carries these data items directly, including a flex array for the variable number of EFI memory descriptors that the boot service may return. Change-Id: I27ce9221e8aef3f8b53bbdc5695b6c42dff3afeb Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 17 +---- .../firmware/efi/libstub/efi-stub-helper.c | 26 +++---- drivers/firmware/efi/libstub/efistub.h | 15 +--- drivers/firmware/efi/libstub/fdt.c | 37 ++++------ drivers/firmware/efi/libstub/mem.c | 74 ++++++------------- drivers/firmware/efi/libstub/randomalloc.c | 23 ++---- drivers/firmware/efi/libstub/relocate.c | 21 ++---- drivers/firmware/efi/libstub/x86-stub.c | 20 ++--- include/linux/efi.h | 9 +++ 9 files changed, 85 insertions(+), 157 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 7f4bafcd9d33..43ddf338d09c 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -42,26 +42,17 @@ efi_status_t check_platform_features(void) */ static bool check_image_region(u64 base, u64 size) { - unsigned long map_size, desc_size, buff_size; - efi_memory_desc_t *memory_map; - struct efi_boot_memmap map; + struct efi_boot_memmap *map; efi_status_t status; bool ret = false; int map_offset; - map.map = &memory_map; - map.map_size = &map_size; - map.desc_size = &desc_size; - map.desc_ver = NULL; - map.key_ptr = NULL; - map.buff_size = &buff_size; - status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) return false; - for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { - efi_memory_desc_t *md = (void *)memory_map + map_offset; + for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { + efi_memory_desc_t *md = (void *)map->map + map_offset; u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE; /* @@ -74,7 +65,7 @@ static bool check_image_region(u64 base, u64 size) } } - efi_bs_call(free_pool, memory_map); + efi_bs_call(free_pool, map); return ret; } diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aa8da0a49829..62858261e072 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -419,7 +419,6 @@ char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len) /** * efi_exit_boot_services() - Exit boot services * @handle: handle of the exiting image - * @map: pointer to receive the memory map * @priv: argument to be passed to @priv_func * @priv_func: function to process the memory map before exiting boot services * @@ -432,14 +431,13 @@ char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len) * * Return: status code */ -efi_status_t efi_exit_boot_services(void *handle, - struct efi_boot_memmap *map, - void *priv, +efi_status_t efi_exit_boot_services(void *handle, void *priv, efi_exit_boot_map_processing priv_func) { + struct efi_boot_memmap *map; efi_status_t status; - status = efi_get_memory_map(map); + status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) goto fail; @@ -451,7 +449,7 @@ efi_status_t efi_exit_boot_services(void *handle, if (efi_disable_pci_dma) efi_pci_disable_bridge_busmaster(); - status = efi_bs_call(exit_boot_services, handle, *map->key_ptr); + status = efi_bs_call(exit_boot_services, handle, map->map_key); if (status == EFI_INVALID_PARAMETER) { /* @@ -467,13 +465,13 @@ efi_status_t efi_exit_boot_services(void *handle, * buffer should account for any changes in the map so the call * to get_memory_map() is expected to succeed here. */ - *map->map_size = *map->buff_size; + map->map_size = map->buff_size; status = efi_bs_call(get_memory_map, - map->map_size, - *map->map, - map->key_ptr, - map->desc_size, - map->desc_ver); + &map->map_size, + &map->map, + &map->map_key, + &map->desc_size, + &map->desc_ver); /* exit_boot_services() was called, thus cannot free */ if (status != EFI_SUCCESS) @@ -484,7 +482,7 @@ efi_status_t efi_exit_boot_services(void *handle, if (status != EFI_SUCCESS) goto fail; - status = efi_bs_call(exit_boot_services, handle, *map->key_ptr); + status = efi_bs_call(exit_boot_services, handle, map->map_key); } /* exit_boot_services() was called, thus cannot free */ @@ -494,7 +492,7 @@ efi_status_t efi_exit_boot_services(void *handle, return EFI_SUCCESS; free_map: - efi_bs_call(free_pool, *map->map); + efi_bs_call(free_pool, map); fail: return status; } diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 463b5a3e55fb..540545296cf2 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -157,15 +157,6 @@ void efi_set_u64_split(u64 data, u32 *lo, u32 *hi) */ #define EFI_MMAP_NR_SLACK_SLOTS 8 -struct efi_boot_memmap { - efi_memory_desc_t **map; - unsigned long *map_size; - unsigned long *desc_size; - u32 *desc_ver; - unsigned long *key_ptr; - unsigned long *buff_size; -}; - typedef struct efi_generic_dev_path efi_device_path_protocol_t; typedef void *efi_event_t; @@ -743,9 +734,7 @@ typedef efi_status_t (*efi_exit_boot_map_processing)( struct efi_boot_memmap *map, void *priv); -efi_status_t efi_exit_boot_services(void *handle, - struct efi_boot_memmap *map, - void *priv, +efi_status_t efi_exit_boot_services(void *handle, void *priv, efi_exit_boot_map_processing priv_func); efi_status_t allocate_new_fdt_and_exit_boot(void *handle, @@ -787,7 +776,7 @@ void efi_apply_loadoptions_quirk(const void **load_options, int *load_options_si char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len); -efi_status_t efi_get_memory_map(struct efi_boot_memmap *map); +efi_status_t efi_get_memory_map(struct efi_boot_memmap **map); efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, unsigned long max); diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index df699de805f3..dd62b2a9fd6f 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -170,25 +170,25 @@ static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) if (node < 0) return EFI_LOAD_ERROR; - fdt_val64 = cpu_to_fdt64((unsigned long)*map->map); + fdt_val64 = cpu_to_fdt64((unsigned long)map->map); err = fdt_setprop_inplace_var(fdt, node, "linux,uefi-mmap-start", fdt_val64); if (err) return EFI_LOAD_ERROR; - fdt_val32 = cpu_to_fdt32(*map->map_size); + fdt_val32 = cpu_to_fdt32(map->map_size); err = fdt_setprop_inplace_var(fdt, node, "linux,uefi-mmap-size", fdt_val32); if (err) return EFI_LOAD_ERROR; - fdt_val32 = cpu_to_fdt32(*map->desc_size); + fdt_val32 = cpu_to_fdt32(map->desc_size); err = fdt_setprop_inplace_var(fdt, node, "linux,uefi-mmap-desc-size", fdt_val32); if (err) return EFI_LOAD_ERROR; - fdt_val32 = cpu_to_fdt32(*map->desc_ver); + fdt_val32 = cpu_to_fdt32(map->desc_ver); err = fdt_setprop_inplace_var(fdt, node, "linux,uefi-mmap-desc-ver", fdt_val32); if (err) @@ -198,21 +198,24 @@ static efi_status_t update_fdt_memmap(void *fdt, struct efi_boot_memmap *map) } struct exit_boot_struct { + struct efi_boot_memmap *boot_memmap; efi_memory_desc_t *runtime_map; int runtime_entry_count; void *new_fdt_addr; }; -static efi_status_t exit_boot_func(struct efi_boot_memmap *map, - void *priv) +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) { struct exit_boot_struct *p = priv; + + p->boot_memmap = map; + /* * Update the memory map with virtual addresses. The function will also * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME * entries so that we can pass it straight to SetVirtualAddressMap() */ - efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, + efi_get_virtmap(map->map, map->map_size, map->desc_size, p->runtime_map, &p->runtime_entry_count); return update_fdt_memmap(p->new_fdt_addr, map); @@ -244,20 +247,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long fdt_addr, unsigned long fdt_size) { - unsigned long map_size, desc_size, buff_size; + unsigned long desc_size; u32 desc_ver; - unsigned long mmap_key; - efi_memory_desc_t *memory_map; efi_status_t status; - struct efi_boot_memmap map; struct exit_boot_struct priv; - map.map_size = &map_size; - map.desc_size = &desc_size; - map.desc_ver = &desc_ver; - map.key_ptr = &mmap_key; - map.buff_size = &buff_size; - if (!efi_novamap) { status = efi_alloc_virtmap(&priv.runtime_map, &desc_size, &desc_ver); @@ -269,7 +263,6 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, efi_info("Exiting boot services and installing virtual address map...\n"); - map.map = &memory_map; status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, max_addr); if (status != EFI_SUCCESS) { efi_err("Unable to allocate memory for new device tree.\n"); @@ -287,7 +280,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, priv.new_fdt_addr = (void *)*new_fdt_addr; - status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); + status = efi_exit_boot_services(handle, &priv, exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; @@ -306,6 +299,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, * incoming kernel but proceed normally otherwise. */ if (status != EFI_SUCCESS) { + efi_memory_desc_t *p; int l; /* @@ -314,8 +308,9 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, * the incoming kernel that no virtual translation has * been installed. */ - for (l = 0; l < map_size; l += desc_size) { - efi_memory_desc_t *p = (void *)memory_map + l; + for (l = 0; l < priv.boot_memmap->map_size; + l += priv.boot_memmap->desc_size) { + p = (void *)priv.boot_memmap->map + l; if (p->attribute & EFI_MEMORY_RUNTIME) p->virt_addr = 0; diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index feef8d4be113..c92b7dbc6dfe 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -5,71 +5,45 @@ #include "efistub.h" -static inline bool mmap_has_headroom(unsigned long buff_size, - unsigned long map_size, - unsigned long desc_size) -{ - unsigned long slack = buff_size - map_size; - - return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS; -} - /** * efi_get_memory_map() - get memory map - * @map: on return pointer to memory map + * @map: pointer to memory map pointer to which to assign the + * newly allocated memory map * * Retrieve the UEFI memory map. The allocated memory leaves room for * up to EFI_MMAP_NR_SLACK_SLOTS additional memory map entries. * * Return: status code */ -efi_status_t efi_get_memory_map(struct efi_boot_memmap *map) +efi_status_t efi_get_memory_map(struct efi_boot_memmap **map) { - efi_memory_desc_t *m = NULL; + struct efi_boot_memmap *m, tmp; efi_status_t status; - unsigned long key; - u32 desc_version; + unsigned long size; - *map->desc_size = sizeof(*m); - *map->map_size = *map->desc_size * 32; - *map->buff_size = *map->map_size; -again: - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, - *map->map_size, (void **)&m); + tmp.map_size = 0; + status = efi_bs_call(get_memory_map, &tmp.map_size, NULL, &tmp.map_key, + &tmp.desc_size, &tmp.desc_ver); + if (status != EFI_BUFFER_TOO_SMALL) + return EFI_LOAD_ERROR; + + size = tmp.map_size + tmp.desc_size * EFI_MMAP_NR_SLACK_SLOTS; + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*m) + size, + (void **)&m); if (status != EFI_SUCCESS) - goto fail; + return status; - *map->desc_size = 0; - key = 0; - status = efi_bs_call(get_memory_map, map->map_size, m, - &key, map->desc_size, &desc_version); - if (status == EFI_BUFFER_TOO_SMALL || - !mmap_has_headroom(*map->buff_size, *map->map_size, - *map->desc_size)) { - efi_bs_call(free_pool, m); - /* - * Make sure there is some entries of headroom so that the - * buffer can be reused for a new map after allocations are - * no longer permitted. Its unlikely that the map will grow to - * exceed this headroom once we are ready to trigger - * ExitBootServices() - */ - *map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS; - *map->buff_size = *map->map_size; - goto again; - } + m->buff_size = m->map_size = size; + status = efi_bs_call(get_memory_map, &m->map_size, m->map, &m->map_key, + &m->desc_size, &m->desc_ver); + if (status != EFI_SUCCESS) + goto free_map; - if (status == EFI_SUCCESS) { - if (map->key_ptr) - *map->key_ptr = key; - if (map->desc_ver) - *map->desc_ver = desc_version; - } else { - efi_bs_call(free_pool, m); - } + *map = m; + return EFI_SUCCESS; -fail: - *map->map = m; +free_map: + efi_bs_call(free_pool, m); return status; } diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index 724155b9e10d..b04b3a357148 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -55,19 +55,10 @@ efi_status_t efi_random_alloc(unsigned long size, unsigned long *addr, unsigned long random_seed) { - unsigned long map_size, desc_size, total_slots = 0, target_slot; - unsigned long buff_size; + unsigned long total_slots = 0, target_slot; + struct efi_boot_memmap *map; efi_status_t status; - efi_memory_desc_t *memory_map; int map_offset; - struct efi_boot_memmap map; - - map.map = &memory_map; - map.map_size = &map_size; - map.desc_size = &desc_size; - map.desc_ver = NULL; - map.key_ptr = NULL; - map.buff_size = &buff_size; status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) @@ -79,8 +70,8 @@ efi_status_t efi_random_alloc(unsigned long size, size = round_up(size, EFI_ALLOC_ALIGN); /* count the suitable slots in each memory map entry */ - for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { - efi_memory_desc_t *md = (void *)memory_map + map_offset; + for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { + efi_memory_desc_t *md = (void *)map->map + map_offset; unsigned long slots; slots = get_entry_num_slots(md, size, ilog2(align)); @@ -102,8 +93,8 @@ efi_status_t efi_random_alloc(unsigned long size, * to calculate the randomly chosen address, and allocate it directly * using EFI_ALLOCATE_ADDRESS. */ - for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { - efi_memory_desc_t *md = (void *)memory_map + map_offset; + for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) { + efi_memory_desc_t *md = (void *)map->map + map_offset; efi_physical_addr_t target; unsigned long pages; @@ -122,7 +113,7 @@ efi_status_t efi_random_alloc(unsigned long size, break; } - efi_bs_call(free_pool, memory_map); + efi_bs_call(free_pool, map); return status; } diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c index 8ee9eb2b9039..cd80db33ab1e 100644 --- a/drivers/firmware/efi/libstub/relocate.c +++ b/drivers/firmware/efi/libstub/relocate.c @@ -23,21 +23,12 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long *addr, unsigned long min) { - unsigned long map_size, desc_size, buff_size; - efi_memory_desc_t *map; + struct efi_boot_memmap *map; efi_status_t status; unsigned long nr_pages; int i; - struct efi_boot_memmap boot_map; - boot_map.map = ↦ - boot_map.map_size = &map_size; - boot_map.desc_size = &desc_size; - boot_map.desc_ver = NULL; - boot_map.key_ptr = NULL; - boot_map.buff_size = &buff_size; - - status = efi_get_memory_map(&boot_map); + status = efi_get_memory_map(&map); if (status != EFI_SUCCESS) goto fail; @@ -52,12 +43,12 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, size = round_up(size, EFI_ALLOC_ALIGN); nr_pages = size / EFI_PAGE_SIZE; - for (i = 0; i < map_size / desc_size; i++) { + for (i = 0; i < map->map_size / map->desc_size; i++) { efi_memory_desc_t *desc; - unsigned long m = (unsigned long)map; + unsigned long m = (unsigned long)map->map; u64 start, end; - desc = efi_early_memdesc_ptr(m, desc_size, i); + desc = efi_early_memdesc_ptr(m, map->desc_size, i); if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; @@ -87,7 +78,7 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, } } - if (i == map_size / desc_size) + if (i == map->map_size / map->desc_size) status = EFI_NOT_FOUND; efi_bs_call(free_pool, map); diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 9f998e6bff95..3f382899bf87 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -620,32 +620,22 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, efi_set_u64_split((unsigned long)efi_system_table, &p->efi->efi_systab, &p->efi->efi_systab_hi); - p->efi->efi_memdesc_size = *map->desc_size; - p->efi->efi_memdesc_version = *map->desc_ver; - efi_set_u64_split((unsigned long)*map->map, + p->efi->efi_memdesc_size = map->desc_size; + p->efi->efi_memdesc_version = map->desc_ver; + efi_set_u64_split((unsigned long)map->map, &p->efi->efi_memmap, &p->efi->efi_memmap_hi); - p->efi->efi_memmap_size = *map->map_size; + p->efi->efi_memmap_size = map->map_size; return EFI_SUCCESS; } static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) { - unsigned long map_sz, key, desc_size, buff_size; - efi_memory_desc_t *mem_map; struct setup_data *e820ext = NULL; __u32 e820ext_size = 0; efi_status_t status; - __u32 desc_version; - struct efi_boot_memmap map; struct exit_boot_struct priv; - map.map = &mem_map; - map.map_size = &map_sz; - map.desc_size = &desc_size; - map.desc_ver = &desc_version; - map.key_ptr = &key; - map.buff_size = &buff_size; priv.boot_params = boot_params; priv.efi = &boot_params->efi_info; @@ -654,7 +644,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) return status; /* Might as well exit boot services now */ - status = efi_exit_boot_services(handle, &map, &priv, exit_boot_func); + status = efi_exit_boot_services(handle, &priv, exit_boot_func); if (status != EFI_SUCCESS) return status; diff --git a/include/linux/efi.h b/include/linux/efi.h index 084990390420..670f0fbc4647 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -456,6 +456,15 @@ typedef union { efi_system_table_32_t mixed_mode; } efi_system_table_t; +struct efi_boot_memmap { + unsigned long map_size; + unsigned long desc_size; + u32 desc_ver; + unsigned long map_key; + unsigned long buff_size; + efi_memory_desc_t map[]; +}; + /* * Architecture independent structure for describing a memory map for the * benefit of efi_memmap_init_early(), and for passing context between -- Gitee From 37fe08f0a5440be8dfcf6a895142c925bd12001d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 18 Sep 2022 20:02:44 +0200 Subject: [PATCH 028/241] efi: libstub: remove pointless goto kludge Remove some goto cruft that serves no purpose and obfuscates the code. Change-Id: I51ad9ec23be276c7b93b175ca4ffa4543f0d14d4 Signed-off-by: Ard Biesheuvel --- .../firmware/efi/libstub/efi-stub-helper.c | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 62858261e072..a6fa0b5e00f3 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -438,13 +438,14 @@ efi_status_t efi_exit_boot_services(void *handle, void *priv, efi_status_t status; status = efi_get_memory_map(&map); - if (status != EFI_SUCCESS) - goto fail; + return status; status = priv_func(map, priv); - if (status != EFI_SUCCESS) - goto free_map; + if (status != EFI_SUCCESS) { + efi_bs_call(free_pool, map); + return status; + } if (efi_disable_pci_dma) efi_pci_disable_bridge_busmaster(); @@ -475,25 +476,16 @@ efi_status_t efi_exit_boot_services(void *handle, void *priv, /* exit_boot_services() was called, thus cannot free */ if (status != EFI_SUCCESS) - goto fail; + return status; status = priv_func(map, priv); /* exit_boot_services() was called, thus cannot free */ if (status != EFI_SUCCESS) - goto fail; + return status; status = efi_bs_call(exit_boot_services, handle, map->map_key); } - /* exit_boot_services() was called, thus cannot free */ - if (status != EFI_SUCCESS) - goto fail; - - return EFI_SUCCESS; - -free_map: - efi_bs_call(free_pool, map); -fail: return status; } -- Gitee From 9f242c734f2d3458bf48d64946ec275a2fe3b4c3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 16 Sep 2022 14:03:06 +0200 Subject: [PATCH 029/241] efi: libstub: unify initrd loading between architectures Use a EFI configuration table to pass the initrd to the core kernel, instead of per-arch methods. This cleans up the code considerably, and should make it easier for architectures to get rid of their reliance on DT for doing EFI boot in the future. Change-Id: I0d3885c14ba44ba7327770335cc67913dff89a62 Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 15 ++++ .../firmware/efi/libstub/efi-stub-helper.c | 79 +++++++++++-------- drivers/firmware/efi/libstub/efi-stub.c | 14 ++-- drivers/firmware/efi/libstub/efistub.h | 6 +- drivers/firmware/efi/libstub/fdt.c | 41 +++------- drivers/firmware/efi/libstub/file.c | 3 + drivers/firmware/efi/libstub/x86-stub.c | 11 +-- include/linux/efi.h | 5 ++ 8 files changed, 93 insertions(+), 81 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 332739f3eded..7670fab162ff 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,7 @@ EXPORT_SYMBOL(efi); unsigned long __ro_after_init efi_rng_seed = EFI_INVALID_TABLE_ADDR; static unsigned long __initdata mem_reserve = EFI_INVALID_TABLE_ADDR; static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR; +static unsigned long __initdata initrd = EFI_INVALID_TABLE_ADDR; struct mm_struct efi_mm = { .mm_rb = RB_ROOT, @@ -522,6 +524,7 @@ static const efi_config_table_type_t common_tables[] __initconst = { {LINUX_EFI_TPM_EVENT_LOG_GUID, &efi.tpm_log, "TPMEventLog" }, {LINUX_EFI_TPM_FINAL_LOG_GUID, &efi.tpm_final_log, "TPMFinalLog" }, {LINUX_EFI_MEMRESERVE_TABLE_GUID, &mem_reserve, "MEMRESERVE" }, + {LINUX_EFI_INITRD_MEDIA_GUID, &initrd, "INITRD" }, {EFI_RT_PROPERTIES_TABLE_GUID, &rt_prop, "RTPROP" }, #ifdef CONFIG_EFI_RCI2_TABLE {DELLEMC_EFI_RCI2_TABLE_GUID, &rci2_table_phys }, @@ -661,6 +664,18 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, } } + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && + initrd != EFI_INVALID_TABLE_ADDR && phys_initrd_size == 0) { + struct linux_efi_initrd *tbl; + + tbl = early_memremap(initrd, sizeof(*tbl)); + if (tbl) { + phys_initrd_start = tbl->base; + phys_initrd_size = tbl->size; + early_memunmap(tbl, sizeof(*tbl)); + } + } + return 0; } diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index a6fa0b5e00f3..729277144739 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -550,20 +550,16 @@ static const struct { * * %EFI_SUCCESS if the initrd was loaded successfully, in which * case @load_addr and @load_size are assigned accordingly * * %EFI_NOT_FOUND if no LoadFile2 protocol exists on the initrd device path - * * %EFI_INVALID_PARAMETER if load_addr == NULL or load_size == NULL * * %EFI_OUT_OF_RESOURCES if memory allocation failed * * %EFI_LOAD_ERROR in all other cases */ static -efi_status_t efi_load_initrd_dev_path(unsigned long *load_addr, - unsigned long *load_size, +efi_status_t efi_load_initrd_dev_path(struct linux_efi_initrd *initrd, unsigned long max) { efi_guid_t lf2_proto_guid = EFI_LOAD_FILE2_PROTOCOL_GUID; efi_device_path_protocol_t *dp; efi_load_file2_protocol_t *lf2; - unsigned long initrd_addr; - unsigned long initrd_size; efi_handle_t handle; efi_status_t status; @@ -577,75 +573,94 @@ efi_status_t efi_load_initrd_dev_path(unsigned long *load_addr, if (status != EFI_SUCCESS) return status; - status = efi_call_proto(lf2, load_file, dp, false, &initrd_size, NULL); + initrd->size = 0; + status = efi_call_proto(lf2, load_file, dp, false, &initrd->size, NULL); if (status != EFI_BUFFER_TOO_SMALL) return EFI_LOAD_ERROR; - status = efi_allocate_pages(initrd_size, &initrd_addr, max); + status = efi_allocate_pages(initrd->size, &initrd->base, max); if (status != EFI_SUCCESS) return status; - status = efi_call_proto(lf2, load_file, dp, false, &initrd_size, - (void *)initrd_addr); + status = efi_call_proto(lf2, load_file, dp, false, &initrd->size, + (void *)initrd->base); if (status != EFI_SUCCESS) { - efi_free(initrd_size, initrd_addr); + efi_free(initrd->size, initrd->base); return EFI_LOAD_ERROR; } - - *load_addr = initrd_addr; - *load_size = initrd_size; return EFI_SUCCESS; } static efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image, - unsigned long *load_addr, - unsigned long *load_size, + struct linux_efi_initrd *initrd, unsigned long soft_limit, unsigned long hard_limit) { if (!IS_ENABLED(CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER) || - (IS_ENABLED(CONFIG_X86) && (!efi_is_native() || image == NULL))) { - *load_addr = *load_size = 0; - return EFI_SUCCESS; - } + (IS_ENABLED(CONFIG_X86) && (!efi_is_native() || image == NULL))) + return EFI_UNSUPPORTED; return handle_cmdline_files(image, L"initrd=", sizeof(L"initrd=") - 2, soft_limit, hard_limit, - load_addr, load_size); + &initrd->base, &initrd->size); } /** * efi_load_initrd() - Load initial RAM disk * @image: EFI loaded image protocol - * @load_addr: pointer to loaded initrd - * @load_size: size of loaded initrd * @soft_limit: preferred size of allocated memory for loading the initrd * @hard_limit: minimum size of allocated memory * * Return: status code */ efi_status_t efi_load_initrd(efi_loaded_image_t *image, - unsigned long *load_addr, - unsigned long *load_size, unsigned long soft_limit, - unsigned long hard_limit) + unsigned long hard_limit, + const struct linux_efi_initrd **out) { - efi_status_t status; + efi_guid_t tbl_guid = LINUX_EFI_INITRD_MEDIA_GUID; + efi_status_t status = EFI_SUCCESS; + struct linux_efi_initrd initrd, *tbl; - if (!load_addr || !load_size) - return EFI_INVALID_PARAMETER; + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || efi_noinitrd) + return EFI_SUCCESS; - status = efi_load_initrd_dev_path(load_addr, load_size, hard_limit); + status = efi_load_initrd_dev_path(&initrd, hard_limit); if (status == EFI_SUCCESS) { efi_info("Loaded initrd from LINUX_EFI_INITRD_MEDIA_GUID device path\n"); } else if (status == EFI_NOT_FOUND) { - status = efi_load_initrd_cmdline(image, load_addr, load_size, - soft_limit, hard_limit); - if (status == EFI_SUCCESS && *load_size > 0) + status = efi_load_initrd_cmdline(image, &initrd, soft_limit, + hard_limit); + /* command line loader disabled or no initrd= passed? */ + if (status == EFI_UNSUPPORTED || status == EFI_NOT_READY) + return EFI_SUCCESS; + if (status == EFI_SUCCESS) efi_info("Loaded initrd from command line option\n"); } + if (status != EFI_SUCCESS) + goto failed; + + status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(initrd), + (void **)&tbl); + if (status != EFI_SUCCESS) + goto free_initrd; + + *tbl = initrd; + status = efi_bs_call(install_configuration_table, &tbl_guid, tbl); + if (status != EFI_SUCCESS) + goto free_tbl; + + if (out) + *out = tbl; + return EFI_SUCCESS; +free_tbl: + efi_bs_call(free_pool, tbl); +free_initrd: + efi_free(initrd.size, initrd.base); +failed: + efi_err("Failed to load initrd: 0x%lx\n", status); return status; } diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 960903d36403..b6dca8a21664 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -122,8 +122,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, unsigned long image_addr; unsigned long image_size = 0; /* addr/point and size pairs for memory management*/ - unsigned long initrd_addr = 0; - unsigned long initrd_size = 0; unsigned long fdt_addr = 0; /* Original DTB */ unsigned long fdt_size = 0; char *cmdline_ptr = NULL; @@ -222,7 +220,7 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, } else { status = efi_load_dtb(image, &fdt_addr, &fdt_size); - if (status != EFI_SUCCESS) { + if (status != EFI_SUCCESS && status != EFI_NOT_READY) { efi_err("Failed to load device tree!\n"); goto fail_free_image; } @@ -242,8 +240,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, if (!efi_noinitrd) { max_addr = efi_get_max_initrd_addr(image_addr); - status = efi_load_initrd(image, &initrd_addr, &initrd_size, - ULONG_MAX, max_addr); + efi_load_initrd(image, ULONG_MAX, efi_get_max_initrd_addr(image_addr), + NULL); if (status != EFI_SUCCESS) efi_err("Failed to load initrd!\n"); } @@ -290,10 +288,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, status = allocate_new_fdt_and_exit_boot(handle, &fdt_addr, efi_get_max_fdt_addr(image_addr), - initrd_addr, initrd_size, cmdline_ptr, fdt_addr, fdt_size); if (status != EFI_SUCCESS) - goto fail_free_initrd; + goto fail_free_fdt; if (IS_ENABLED(CONFIG_ARM)) efi_handle_post_ebs_state(); @@ -301,10 +298,9 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, efi_enter_kernel(image_addr, fdt_addr, fdt_totalsize((void *)fdt_addr)); /* not reached */ -fail_free_initrd: +fail_free_fdt: efi_err("Failed to update FDT and exit boot services\n"); - efi_free(initrd_size, initrd_addr); efi_free(fdt_size, fdt_addr); fail_free_image: diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 540545296cf2..28efc8e8b054 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -740,7 +740,6 @@ efi_status_t efi_exit_boot_services(void *handle, void *priv, efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, unsigned long max_addr, - u64 initrd_addr, u64 initrd_size, char *cmdline_ptr, unsigned long fdt_addr, unsigned long fdt_size); @@ -819,10 +818,9 @@ static inline efi_status_t efi_load_dtb(efi_loaded_image_t *image, } efi_status_t efi_load_initrd(efi_loaded_image_t *image, - unsigned long *load_addr, - unsigned long *load_size, unsigned long soft_limit, - unsigned long hard_limit); + unsigned long hard_limit, + const struct linux_efi_initrd **out); /* * This function handles the architcture specific differences between arm and * arm64 regarding where the kernel image must be loaded and any memory that diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index dd62b2a9fd6f..dcf7503d5665 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -28,8 +28,7 @@ static void fdt_update_cell_size(void *fdt) } static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size, - void *fdt, int new_fdt_size, char *cmdline_ptr, - u64 initrd_addr, u64 initrd_size) + void *fdt, int new_fdt_size, char *cmdline_ptr) { int node, num_rsv; int status; @@ -93,21 +92,6 @@ static efi_status_t update_fdt(void *orig_fdt, unsigned long orig_fdt_size, goto fdt_set_fail; } - /* Set initrd address/end in device tree, if present */ - if (initrd_size != 0) { - u64 initrd_image_end; - u64 initrd_image_start = cpu_to_fdt64(initrd_addr); - - status = fdt_setprop_var(fdt, node, "linux,initrd-start", initrd_image_start); - if (status) - goto fdt_set_fail; - - initrd_image_end = cpu_to_fdt64(initrd_addr + initrd_size); - status = fdt_setprop_var(fdt, node, "linux,initrd-end", initrd_image_end); - if (status) - goto fdt_set_fail; - } - /* Add FDT entries for EFI runtime services in chosen node. */ node = fdt_subnode_offset(fdt, 0, "chosen"); fdt_val64 = cpu_to_fdt64((u64)(unsigned long)efi_system_table); @@ -226,23 +210,19 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) #endif /* - * Allocate memory for a new FDT, then add EFI, commandline, and - * initrd related fields to the FDT. This routine increases the - * FDT allocation size until the allocated memory is large - * enough. EFI allocations are in EFI_PAGE_SIZE granules, - * which are fixed at 4K bytes, so in most cases the first - * allocation should succeed. - * EFI boot services are exited at the end of this function. - * There must be no allocations between the get_memory_map() - * call and the exit_boot_services() call, so the exiting of - * boot services is very tightly tied to the creation of the FDT - * with the final memory map in it. + * Allocate memory for a new FDT, then add EFI and commandline related fields + * to the FDT. This routine increases the FDT allocation size until the + * allocated memory is large enough. EFI allocations are in EFI_PAGE_SIZE + * granules, which are fixed at 4K bytes, so in most cases the first allocation + * should succeed. EFI boot services are exited at the end of this function. + * There must be no allocations between the get_memory_map() call and the + * exit_boot_services() call, so the exiting of boot services is very tightly + * tied to the creation of the FDT with the final memory map in it. */ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, unsigned long *new_fdt_addr, unsigned long max_addr, - u64 initrd_addr, u64 initrd_size, char *cmdline_ptr, unsigned long fdt_addr, unsigned long fdt_size) @@ -270,8 +250,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, } status = update_fdt((void *)fdt_addr, fdt_size, - (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr, - initrd_addr, initrd_size); + (void *)*new_fdt_addr, MAX_FDT_SIZE, cmdline_ptr); if (status != EFI_SUCCESS) { efi_err("Unable to construct new device tree.\n"); diff --git a/drivers/firmware/efi/libstub/file.c b/drivers/firmware/efi/libstub/file.c index dd95f330fe6e..488a8027518d 100644 --- a/drivers/firmware/efi/libstub/file.c +++ b/drivers/firmware/efi/libstub/file.c @@ -238,6 +238,9 @@ efi_status_t handle_cmdline_files(efi_loaded_image_t *image, if (volume) volume->close(volume); + + if (*load_size == 0) + return EFI_NOT_READY; return EFI_SUCCESS; err_close_file: diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 3f382899bf87..ff960ed7bc7e 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -670,6 +670,7 @@ unsigned long efi_main(efi_handle_t handle, unsigned long bzimage_addr = (unsigned long)startup_32; unsigned long buffer_start, buffer_end; struct setup_header *hdr = &boot_params->hdr; + const struct linux_efi_initrd *initrd = NULL; efi_status_t status; efi_system_table = sys_table_arg; @@ -758,17 +759,17 @@ unsigned long efi_main(efi_handle_t handle, if (!efi_noinitrd) { unsigned long addr, size; - status = efi_load_initrd(image, &addr, &size, - hdr->initrd_addr_max, ULONG_MAX); + status = efi_load_initrd(image, hdr->initrd_addr_max, ULONG_MAX, + &initrd); if (status != EFI_SUCCESS) { efi_err("Failed to load initrd!\n"); goto fail; } - if (size > 0) { - efi_set_u64_split(addr, &hdr->ramdisk_image, + if (initrd && initrd->size > 0) { + efi_set_u64_split(initrd->base, &hdr->ramdisk_image, &boot_params->ext_ramdisk_image); - efi_set_u64_split(size, &hdr->ramdisk_size, + efi_set_u64_split(initrd->size, &hdr->ramdisk_size, &boot_params->ext_ramdisk_size); } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 670f0fbc4647..244d561b6c10 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1287,4 +1287,9 @@ static inline struct efi_mokvar_table_entry *efi_mokvar_entry_find( } #endif +struct linux_efi_initrd { + unsigned long base; + unsigned long size; +}; + #endif /* _LINUX_EFI_H */ -- Gitee From 69b0b7d93c4b638af0329f0c66ee3c011c10ddce Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 16 Sep 2022 18:51:36 +0200 Subject: [PATCH 030/241] efi: libstub: remove DT dependency from generic stub Refactor the generic EFI stub entry code so that all the dependencies on device tree are abstracted and hidden behind a generic efi_boot_kernel() routine that can also be implemented in other ways. This allows users of the generic stub to avoid using FDT for passing information to the core kernel. Change-Id: I46c8004ea076f7e50c5ffad09f46c176cd009745 Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/efi-stub.c | 54 +-------------------- drivers/firmware/efi/libstub/efistub.h | 8 +--- drivers/firmware/efi/libstub/fdt.c | 62 +++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 63 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index b6dca8a21664..84c881e84229 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -10,7 +10,6 @@ */ #include -#include #include #include "efistub.h" @@ -122,14 +121,11 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, unsigned long image_addr; unsigned long image_size = 0; /* addr/point and size pairs for memory management*/ - unsigned long fdt_addr = 0; /* Original DTB */ - unsigned long fdt_size = 0; char *cmdline_ptr = NULL; int cmdline_size = 0; efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID; unsigned long reserve_addr = 0; unsigned long reserve_size = 0; - enum efi_secureboot_mode secure_boot; struct screen_info *si; efi_properties_table_t *prop_tbl; unsigned long max_addr; @@ -206,38 +202,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, /* Ask the firmware to clear memory on unclean shutdown */ efi_enable_reset_attack_mitigation(); - secure_boot = efi_get_secureboot(); - - /* - * Unauthenticated device tree data is a security hazard, so ignore - * 'dtb=' unless UEFI Secure Boot is disabled. We assume that secure - * boot is enabled if we can't determine its state. - */ - if (!IS_ENABLED(CONFIG_EFI_ARMSTUB_DTB_LOADER) || - secure_boot != efi_secureboot_mode_disabled) { - if (strstr(cmdline_ptr, "dtb=")) - efi_err("Ignoring DTB from command line.\n"); - } else { - status = efi_load_dtb(image, &fdt_addr, &fdt_size); - - if (status != EFI_SUCCESS && status != EFI_NOT_READY) { - efi_err("Failed to load device tree!\n"); - goto fail_free_image; - } - } - - if (fdt_addr) { - efi_info("Using DTB from command line\n"); - } else { - /* Look for a device tree configuration table entry. */ - fdt_addr = (uintptr_t)get_fdt(&fdt_size); - if (fdt_addr) - efi_info("Using DTB from configuration table\n"); - } - - if (!fdt_addr) - efi_info("Generating empty DTB\n"); - if (!efi_noinitrd) { max_addr = efi_get_max_initrd_addr(image_addr); efi_load_initrd(image, ULONG_MAX, efi_get_max_initrd_addr(image_addr), @@ -286,24 +250,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, install_memreserve_table(); - status = allocate_new_fdt_and_exit_boot(handle, &fdt_addr, - efi_get_max_fdt_addr(image_addr), - cmdline_ptr, fdt_addr, fdt_size); - if (status != EFI_SUCCESS) - goto fail_free_fdt; - - if (IS_ENABLED(CONFIG_ARM)) - efi_handle_post_ebs_state(); - - efi_enter_kernel(image_addr, fdt_addr, fdt_totalsize((void *)fdt_addr)); - /* not reached */ - -fail_free_fdt: - efi_err("Failed to update FDT and exit boot services\n"); - - efi_free(fdt_size, fdt_addr); + status = efi_boot_kernel(handle, image, image_addr, cmdline_ptr); -fail_free_image: efi_free(image_size, image_addr); efi_free(reserve_size, reserve_addr); fail_free_screeninfo: diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 28efc8e8b054..409575783819 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -737,12 +737,8 @@ typedef efi_status_t (*efi_exit_boot_map_processing)( efi_status_t efi_exit_boot_services(void *handle, void *priv, efi_exit_boot_map_processing priv_func); -efi_status_t allocate_new_fdt_and_exit_boot(void *handle, - unsigned long *new_fdt_addr, - unsigned long max_addr, - char *cmdline_ptr, - unsigned long fdt_addr, - unsigned long fdt_size); +efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, + unsigned long kernel_addr, char *cmdline_ptr); void *get_fdt(unsigned long *fdt_size); diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index dcf7503d5665..2769f2b5d943 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -219,18 +219,19 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) * exit_boot_services() call, so the exiting of boot services is very tightly * tied to the creation of the FDT with the final memory map in it. */ - +static efi_status_t allocate_new_fdt_and_exit_boot(void *handle, + efi_loaded_image_t *image, unsigned long *new_fdt_addr, unsigned long max_addr, - char *cmdline_ptr, - unsigned long fdt_addr, - unsigned long fdt_size) + char *cmdline_ptr) { unsigned long desc_size; u32 desc_ver; efi_status_t status; struct exit_boot_struct priv; + unsigned long fdt_addr = 0; + unsigned long fdt_size = 0; if (!efi_novamap) { status = efi_alloc_virtmap(&priv.runtime_map, &desc_size, @@ -241,6 +242,36 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, } } + /* + * Unauthenticated device tree data is a security hazard, so ignore + * 'dtb=' unless UEFI Secure Boot is disabled. We assume that secure + * boot is enabled if we can't determine its state. + */ + if (!IS_ENABLED(CONFIG_EFI_ARMSTUB_DTB_LOADER) || + efi_get_secureboot() != efi_secureboot_mode_disabled) { + if (strstr(cmdline_ptr, "dtb=")) + efi_err("Ignoring DTB from command line.\n"); + } else { + status = efi_load_dtb(image, &fdt_addr, &fdt_size); + + if (status != EFI_SUCCESS && status != EFI_NOT_READY) { + efi_err("Failed to load device tree!\n"); + goto fail; + } + } + + if (fdt_addr) { + efi_info("Using DTB from command line\n"); + } else { + /* Look for a device tree configuration table entry. */ + fdt_addr = (uintptr_t)get_fdt(&fdt_size); + if (fdt_addr) + efi_info("Using DTB from configuration table\n"); + } + + if (!fdt_addr) + efi_info("Generating empty DTB\n"); + efi_info("Exiting boot services and installing virtual address map...\n"); status = efi_allocate_pages(MAX_FDT_SIZE, new_fdt_addr, max_addr); @@ -304,11 +335,34 @@ efi_status_t allocate_new_fdt_and_exit_boot(void *handle, efi_free(MAX_FDT_SIZE, *new_fdt_addr); fail: + efi_free(fdt_size, fdt_addr); + efi_bs_call(free_pool, priv.runtime_map); return EFI_LOAD_ERROR; } +efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, + unsigned long kernel_addr, char *cmdline_ptr) +{ + unsigned long fdt_addr; + efi_status_t status; + + status = allocate_new_fdt_and_exit_boot(handle, image, &fdt_addr, + efi_get_max_fdt_addr(image_addr), + cmdline_ptr); + if (status != EFI_SUCCESS) { + efi_err("Failed to update FDT and exit boot services\n"); + return status; + } + + if (IS_ENABLED(CONFIG_ARM)) + efi_handle_post_ebs_state(); + + efi_enter_kernel(kernel_addr, fdt_addr, fdt_totalsize((void *)fdt_addr)); + /* not reached */ +} + void *get_fdt(unsigned long *fdt_size) { void *fdt; -- Gitee From caf2551038a4e7dc347d5a350dcb148b995fac5a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Sep 2022 23:20:06 +0200 Subject: [PATCH 031/241] efi: libstub: install boot-time memory map as config table Expose the EFI boot time memory map to the kernel via a configuration table. This is arch agnostic and enables future changes that remove the dependency on DT on architectures that don't otherwise rely on it. Change-Id: Iad6a2b5793e395acc163283c3e14373ad8cdbc76 Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/arm64-stub.c | 2 +- .../firmware/efi/libstub/efi-stub-helper.c | 2 +- drivers/firmware/efi/libstub/efistub.h | 3 ++- drivers/firmware/efi/libstub/mem.c | 27 ++++++++++++++++--- drivers/firmware/efi/libstub/randomalloc.c | 2 +- drivers/firmware/efi/libstub/relocate.c | 2 +- include/linux/efi.h | 1 + 7 files changed, 31 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 43ddf338d09c..0e721c069781 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -47,7 +47,7 @@ static bool check_image_region(u64 base, u64 size) bool ret = false; int map_offset; - status = efi_get_memory_map(&map); + status = efi_get_memory_map(&map, false); if (status != EFI_SUCCESS) return false; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 729277144739..210f6f8bfa6a 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -437,7 +437,7 @@ efi_status_t efi_exit_boot_services(void *handle, void *priv, struct efi_boot_memmap *map; efi_status_t status; - status = efi_get_memory_map(&map); + status = efi_get_memory_map(&map, true); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 409575783819..881a698f96a7 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -771,7 +771,8 @@ void efi_apply_loadoptions_quirk(const void **load_options, int *load_options_si char *efi_convert_cmdline(efi_loaded_image_t *image, int *cmd_line_len); -efi_status_t efi_get_memory_map(struct efi_boot_memmap **map); +efi_status_t efi_get_memory_map(struct efi_boot_memmap **map, + bool install_cfg_tbl); efi_status_t efi_allocate_pages(unsigned long size, unsigned long *addr, unsigned long max); diff --git a/drivers/firmware/efi/libstub/mem.c b/drivers/firmware/efi/libstub/mem.c index c92b7dbc6dfe..45841ef55a9f 100644 --- a/drivers/firmware/efi/libstub/mem.c +++ b/drivers/firmware/efi/libstub/mem.c @@ -9,14 +9,20 @@ * efi_get_memory_map() - get memory map * @map: pointer to memory map pointer to which to assign the * newly allocated memory map + * @install_cfg_tbl: whether or not to install the boot memory map as a + * configuration table * * Retrieve the UEFI memory map. The allocated memory leaves room for * up to EFI_MMAP_NR_SLACK_SLOTS additional memory map entries. * * Return: status code */ -efi_status_t efi_get_memory_map(struct efi_boot_memmap **map) +efi_status_t efi_get_memory_map(struct efi_boot_memmap **map, + bool install_cfg_tbl) { + int memtype = install_cfg_tbl ? EFI_ACPI_RECLAIM_MEMORY + : EFI_LOADER_DATA; + efi_guid_t tbl_guid = LINUX_EFI_BOOT_MEMMAP_GUID; struct efi_boot_memmap *m, tmp; efi_status_t status; unsigned long size; @@ -28,20 +34,35 @@ efi_status_t efi_get_memory_map(struct efi_boot_memmap **map) return EFI_LOAD_ERROR; size = tmp.map_size + tmp.desc_size * EFI_MMAP_NR_SLACK_SLOTS; - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, sizeof(*m) + size, + status = efi_bs_call(allocate_pool, memtype, sizeof(*m) + size, (void **)&m); if (status != EFI_SUCCESS) return status; + if (install_cfg_tbl) { + /* + * Installing a configuration table might allocate memory, and + * this may modify the memory map. This means we should install + * the configuration table first, and re-install or delete it + * as needed. + */ + status = efi_bs_call(install_configuration_table, &tbl_guid, m); + if (status != EFI_SUCCESS) + goto free_map; + } + m->buff_size = m->map_size = size; status = efi_bs_call(get_memory_map, &m->map_size, m->map, &m->map_key, &m->desc_size, &m->desc_ver); if (status != EFI_SUCCESS) - goto free_map; + goto uninstall_table; *map = m; return EFI_SUCCESS; +uninstall_table: + if (install_cfg_tbl) + efi_bs_call(install_configuration_table, &tbl_guid, NULL); free_map: efi_bs_call(free_pool, m); return status; diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index b04b3a357148..4f15e6103a2e 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -60,7 +60,7 @@ efi_status_t efi_random_alloc(unsigned long size, efi_status_t status; int map_offset; - status = efi_get_memory_map(&map); + status = efi_get_memory_map(&map, false); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c index cd80db33ab1e..bf6fbd5d22a1 100644 --- a/drivers/firmware/efi/libstub/relocate.c +++ b/drivers/firmware/efi/libstub/relocate.c @@ -28,7 +28,7 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, unsigned long nr_pages; int i; - status = efi_get_memory_map(&map); + status = efi_get_memory_map(&map, false); if (status != EFI_SUCCESS) goto fail; diff --git a/include/linux/efi.h b/include/linux/efi.h index 244d561b6c10..ff717832b345 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -363,6 +363,7 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) +#define LINUX_EFI_BOOT_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) /* OEM GUIDs */ #define DELLEMC_EFI_RCI2_TABLE_GUID EFI_GUID(0x2d9f28a2, 0xa886, 0x456a, 0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55) -- Gitee From 5c42ec9d197b433cd4fb75c297dd82e0c19ea130 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 26 Sep 2022 23:23:04 +0200 Subject: [PATCH 032/241] efi: libstub: Undeprecate the command line initrd loader The initrd= command line loader can be useful for development, but it was limited to loading files from the same file system as the loaded kernel (and it didn't work on x86 mixed mode). As both issues have been fixed, and the initrd= can now be used with files residing on any simple file system exposed by the EFI firmware, let's permit it to be enabled on RISC-V and LoongArch, which did not support it up to this point. Note that LoadFile2 remains the preferred option, as it is much simpler to use and implement, but generic loaders (including the UEFI shell) may not implement this so there, initrd= can now be used as well (if enabled in the build) Change-Id: Idc2ea1e977ead6a5efd3308fd45998644e3cccdb Signed-off-by: Ard Biesheuvel Signed-off-by: Huacai Chen --- drivers/firmware/efi/Kconfig | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index d9895491ff34..3aded32bdfdb 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -123,13 +123,16 @@ config EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER bool "Enable the command line initrd loader" if !X86 depends on EFI_STUB && (EFI_GENERIC_STUB || X86) default y - depends on !RISCV help Select this config option to add support for the initrd= command - line parameter, allowing an initrd that resides on the same volume - as the kernel image to be loaded into memory. - - This method is deprecated. + line parameter, allowing an initrd to be loaded into memory that + resides on a file system backed by an implementation of + EFI_SIMPLE_FILE_SYSTEM_PROTOCOL. + + This method has been superseded by the simpler LoadFile2 based + initrd loading method, but the initrd= loader is retained as it + can be used from the UEFI Shell or other generic loaders that + don't implement the Linux specific LoadFile2 method. config EFI_BOOTLOADER_CONTROL tristate "EFI Bootloader Control" -- Gitee From 8329d7c9356b8824d6aa2382012b86dff0da473a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 033/241] LoongArch: Add elf-related definitions for LoongArch Add elf-related definitions for LoongArch, including: EM_LOONGARCH, KEXEC_ARCH_LOONGARCH, AUDIT_ARCH_LOONGARCH32, AUDIT_ARCH_LOONGARCH64 and NT_LOONGARCH_*. Change-Id: I06366b06820b483f98584d66401551cea2f987e9 Signed-off-by: Huacai Chen --- include/uapi/linux/audit.h | 2 ++ include/uapi/linux/elf-em.h | 1 + include/uapi/linux/elf.h | 5 +++++ include/uapi/linux/kexec.h | 1 + scripts/sorttable.c | 5 +++++ 5 files changed, 14 insertions(+) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index cb4e8e6e86a9..01f047415ab0 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -434,6 +434,8 @@ enum { #define AUDIT_ARCH_UNICORE (EM_UNICORE|__AUDIT_ARCH_LE) #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_XTENSA (EM_XTENSA) +#define AUDIT_ARCH_LOONGARCH32 (EM_LOONGARCH|__AUDIT_ARCH_LE) +#define AUDIT_ARCH_LOONGARCH64 (EM_LOONGARCH|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_PERM_EXEC 1 #define AUDIT_PERM_WRITE 2 diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index f47e853546fa..ef38c2bc5ab7 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -51,6 +51,7 @@ #define EM_RISCV 243 /* RISC-V */ #define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_CSKY 252 /* C-SKY */ +#define EM_LOONGARCH 258 /* LoongArch */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ /* diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 2ae08e605c3c..85590f879d53 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -434,6 +434,11 @@ typedef struct elf64_shdr { #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ /* Note types with note name "GNU" */ #define NT_GNU_PROPERTY_TYPE_0 5 diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 05669c87a0af..d55db902fe6e 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -42,6 +42,7 @@ #define KEXEC_ARCH_MIPS_LE (10 << 16) #define KEXEC_ARCH_MIPS ( 8 << 16) #define KEXEC_ARCH_AARCH64 (183 << 16) +#define KEXEC_ARCH_LOONGARCH (258 << 16) /* The artificial cap on the number of segments passed to kexec_load. */ #define KEXEC_SEGMENT_MAX 16 diff --git a/scripts/sorttable.c b/scripts/sorttable.c index ac93e033b7cb..3edef1c6aaa0 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -54,6 +54,10 @@ #define EM_ARCV2 195 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + static uint32_t (*r)(const uint32_t *); static uint16_t (*r2)(const uint16_t *); static uint64_t (*r8)(const uint64_t *); @@ -347,6 +351,7 @@ static int do_file(char const *const fname, void *addr) break; case EM_ARCOMPACT: case EM_ARCV2: + case EM_LOONGARCH: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: -- Gitee From f01da0514a076ab9d11a26add0dc2e194737860e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 6 Feb 2021 17:18:08 +0800 Subject: [PATCH 034/241] LoongArch: Add writecombine support for drm Change-Id: I4917091c1164183964438b0c31d997519011bc8a Signed-off-by: Huacai Chen --- drivers/gpu/drm/drm_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- include/drm/drm_cache.h | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index 1a6369633789..ad10d640fa94 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -74,7 +74,7 @@ static pgprot_t drm_io_prot(struct drm_local_map *map, tmp = pgprot_decrypted(tmp); #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \ - defined(__mips__) + defined(__mips__) || defined(__loongarch__) if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING)) tmp = pgprot_noncached(tmp); else diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1968df9743fc..293d9d926f96 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -397,7 +397,7 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp) tmp = pgprot_noncached(tmp); #endif #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \ - defined(__powerpc__) || defined(__mips__) + defined(__powerpc__) || defined(__mips__) || defined(__loongarch__) if (caching_flags & TTM_PL_FLAG_WC) tmp = pgprot_writecombine(tmp); else diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index e9ad4863d915..9d1c3c8da570 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -65,6 +65,14 @@ static inline bool drm_arch_can_wc_memory(void) * optimization entirely for ARM and arm64. */ return false; +#elif defined(CONFIG_LOONGARCH) + /* + * LoongArch maintains cache coherency in hardware, but its WUC attribute + * (Weak-ordered UnCached, which is similar to WC) is out of the scope of + * cache coherency machanism. This means WUC can only used for write-only + * memory regions. + */ + return false; #else return true; #endif -- Gitee From 4b7bb9893d22e9a594b5efdc5e42bff240c95e26 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 035/241] LoongArch: Add basic booting support Change-Id: Ie5918d09e84638aef4b36e412ffe990bb5fbca6a Signed-off-by: Huacai Chen --- Documentation/admin-guide/sysctl/kernel.rst | 8 +- arch/loongarch/.gitignore | 9 + arch/loongarch/Kbuild | 21 + arch/loongarch/Kbuild.platforms | 6 + arch/loongarch/Kconfig | 470 +++++ arch/loongarch/Kconfig.debug | 0 arch/loongarch/Makefile | 116 ++ arch/loongarch/boot/dts/Makefile | 0 arch/loongarch/include/asm/Kbuild | 30 + arch/loongarch/include/asm/acenv.h | 21 + arch/loongarch/include/asm/acpi.h | 42 + arch/loongarch/include/asm/addrspace.h | 107 ++ arch/loongarch/include/asm/asm-offsets.h | 5 + arch/loongarch/include/asm/asm-prototypes.h | 7 + arch/loongarch/include/asm/asm.h | 191 +++ arch/loongarch/include/asm/asmmacro-64.h | 44 + arch/loongarch/include/asm/asmmacro.h | 272 +++ arch/loongarch/include/asm/atomic.h | 368 ++++ arch/loongarch/include/asm/barrier.h | 53 + arch/loongarch/include/asm/bitops.h | 34 + arch/loongarch/include/asm/bitrev.h | 38 + arch/loongarch/include/asm/bootinfo.h | 50 + arch/loongarch/include/asm/branch.h | 25 + arch/loongarch/include/asm/bug.h | 23 + arch/loongarch/include/asm/cache.h | 13 + arch/loongarch/include/asm/cacheflush.h | 80 + arch/loongarch/include/asm/cacheops.h | 37 + arch/loongarch/include/asm/clocksource.h | 12 + arch/loongarch/include/asm/cmpxchg.h | 136 ++ arch/loongarch/include/asm/compiler.h | 19 + arch/loongarch/include/asm/cpu-features.h | 69 + arch/loongarch/include/asm/cpu-info.h | 117 ++ arch/loongarch/include/asm/cpu.h | 126 ++ arch/loongarch/include/asm/cpufeature.h | 24 + arch/loongarch/include/asm/delay.h | 34 + arch/loongarch/include/asm/dma-direct.h | 16 + arch/loongarch/include/asm/dma.h | 18 + arch/loongarch/include/asm/dmi.h | 24 + arch/loongarch/include/asm/efi.h | 49 + arch/loongarch/include/asm/elf.h | 302 ++++ arch/loongarch/include/asm/entry-common.h | 13 + arch/loongarch/include/asm/exec.h | 19 + arch/loongarch/include/asm/fb.h | 24 + arch/loongarch/include/asm/fixmap.h | 19 + arch/loongarch/include/asm/fpregdef.h | 57 + arch/loongarch/include/asm/fpu.h | 136 ++ arch/loongarch/include/asm/futex.h | 107 ++ arch/loongarch/include/asm/hardirq.h | 24 + arch/loongarch/include/asm/hugetlb.h | 80 + arch/loongarch/include/asm/hw_irq.h | 22 + arch/loongarch/include/asm/idle.h | 9 + arch/loongarch/include/asm/inst.h | 85 + arch/loongarch/include/asm/io.h | 129 ++ arch/loongarch/include/asm/irq.h | 55 + arch/loongarch/include/asm/irq_regs.h | 33 + arch/loongarch/include/asm/irqflags.h | 78 + arch/loongarch/include/asm/kdebug.h | 23 + arch/loongarch/include/asm/kmap_types.h | 7 + arch/loongarch/include/asm/linkage.h | 36 + arch/loongarch/include/asm/local.h | 143 ++ arch/loongarch/include/asm/loongarchregs.h | 1516 +++++++++++++++++ .../include/asm/mach-loongson64/irq.h | 83 + .../include/asm/mach-loongson64/loongson.h | 153 ++ arch/loongarch/include/asm/mmu.h | 16 + arch/loongarch/include/asm/mmu_context.h | 152 ++ arch/loongarch/include/asm/module.h | 80 + arch/loongarch/include/asm/module.lds.h | 7 + arch/loongarch/include/asm/page.h | 119 ++ arch/loongarch/include/asm/pci.h | 55 + arch/loongarch/include/asm/percpu.h | 20 + arch/loongarch/include/asm/perf_event.h | 16 + arch/loongarch/include/asm/pgalloc.h | 104 ++ arch/loongarch/include/asm/pgtable-64.h | 249 +++ arch/loongarch/include/asm/pgtable-bits.h | 131 ++ arch/loongarch/include/asm/pgtable.h | 292 ++++ arch/loongarch/include/asm/prefetch.h | 33 + arch/loongarch/include/asm/processor.h | 210 +++ arch/loongarch/include/asm/ptrace.h | 156 ++ arch/loongarch/include/asm/reboot.h | 10 + arch/loongarch/include/asm/regdef.h | 45 + arch/loongarch/include/asm/serial.h | 11 + arch/loongarch/include/asm/setup.h | 21 + arch/loongarch/include/asm/shmparam.h | 13 + arch/loongarch/include/asm/sparsemem.h | 23 + arch/loongarch/include/asm/spinlock.h | 18 + arch/loongarch/include/asm/spinlock_types.h | 15 + arch/loongarch/include/asm/stackframe.h | 212 +++ arch/loongarch/include/asm/stacktrace.h | 74 + arch/loongarch/include/asm/string.h | 21 + arch/loongarch/include/asm/switch_to.h | 37 + arch/loongarch/include/asm/syscall.h | 68 + arch/loongarch/include/asm/thread_info.h | 113 ++ arch/loongarch/include/asm/time.h | 55 + arch/loongarch/include/asm/timex.h | 34 + arch/loongarch/include/asm/tlb.h | 180 ++ arch/loongarch/include/asm/tlbflush.h | 38 + arch/loongarch/include/asm/topology.h | 15 + arch/loongarch/include/asm/types.h | 25 + arch/loongarch/include/asm/uaccess.h | 378 ++++ arch/loongarch/include/asm/unistd.h | 10 + arch/loongarch/include/asm/vdso.h | 43 + arch/loongarch/include/asm/vdso/clocksource.h | 8 + .../loongarch/include/asm/vdso/gettimeofday.h | 104 ++ arch/loongarch/include/asm/vdso/processor.h | 14 + arch/loongarch/include/asm/vdso/vdso.h | 31 + arch/loongarch/include/asm/vdso/vsyscall.h | 27 + arch/loongarch/include/asm/vermagic.h | 19 + arch/loongarch/include/asm/vmalloc.h | 4 + arch/loongarch/include/uapi/asm/Kbuild | 2 + arch/loongarch/include/uapi/asm/auxvec.h | 16 + arch/loongarch/include/uapi/asm/bitfield.h | 17 + arch/loongarch/include/uapi/asm/bitsperlong.h | 9 + arch/loongarch/include/uapi/asm/break.h | 27 + arch/loongarch/include/uapi/asm/byteorder.h | 12 + arch/loongarch/include/uapi/asm/hwcap.h | 20 + arch/loongarch/include/uapi/asm/inst.h | 474 ++++++ arch/loongarch/include/uapi/asm/ptrace.h | 52 + arch/loongarch/include/uapi/asm/reg.h | 63 + arch/loongarch/include/uapi/asm/sigcontext.h | 63 + arch/loongarch/include/uapi/asm/signal.h | 13 + arch/loongarch/include/uapi/asm/swab.h | 54 + arch/loongarch/include/uapi/asm/ucontext.h | 35 + arch/loongarch/include/uapi/asm/unistd.h | 6 + arch/loongarch/kernel/Makefile | 22 + arch/loongarch/kernel/access-helper.h | 13 + arch/loongarch/kernel/acpi.c | 339 ++++ arch/loongarch/kernel/asm-offsets.c | 254 +++ arch/loongarch/kernel/cacheinfo.c | 123 ++ arch/loongarch/kernel/cmpxchg.c | 101 ++ arch/loongarch/kernel/cpu-probe.c | 292 ++++ arch/loongarch/kernel/efi.c | 100 ++ arch/loongarch/kernel/elf.c | 35 + arch/loongarch/kernel/entry.S | 89 + arch/loongarch/kernel/fpu.S | 264 +++ arch/loongarch/kernel/genex.S | 89 + arch/loongarch/kernel/head.S | 70 + arch/loongarch/kernel/idle.c | 16 + arch/loongarch/kernel/io.c | 98 ++ arch/loongarch/kernel/irq.c | 64 + arch/loongarch/kernel/module-sections.c | 121 ++ arch/loongarch/kernel/module.c | 374 ++++ arch/loongarch/kernel/proc.c | 122 ++ arch/loongarch/kernel/process.c | 261 +++ arch/loongarch/kernel/ptrace.c | 424 +++++ arch/loongarch/kernel/reset.c | 51 + arch/loongarch/kernel/setup.c | 243 +++ arch/loongarch/kernel/signal-common.h | 51 + arch/loongarch/kernel/signal.c | 536 ++++++ arch/loongarch/kernel/switch.S | 40 + arch/loongarch/kernel/syscall.c | 65 + arch/loongarch/kernel/time.c | 219 +++ arch/loongarch/kernel/topology.c | 13 + arch/loongarch/kernel/traps.c | 741 ++++++++ arch/loongarch/kernel/unaligned.c | 499 ++++++ arch/loongarch/kernel/vdso.c | 138 ++ arch/loongarch/kernel/vmlinux.lds.S | 114 ++ arch/loongarch/lib/Makefile | 7 + arch/loongarch/lib/clear_user.S | 43 + arch/loongarch/lib/copy_user.S | 47 + arch/loongarch/lib/delay.c | 43 + arch/loongarch/lib/dump_tlb.c | 107 ++ arch/loongarch/lib/memcpy.S | 32 + arch/loongarch/lib/memmove.S | 45 + arch/loongarch/lib/memset.S | 30 + arch/loongarch/lib/unaligned.S | 91 + arch/loongarch/loongson64/Makefile | 5 + arch/loongarch/loongson64/Platform | 13 + arch/loongarch/loongson64/env.c | 101 ++ arch/loongarch/loongson64/init.c | 130 ++ arch/loongarch/loongson64/irq.c | 88 + arch/loongarch/loongson64/mem.c | 65 + arch/loongarch/loongson64/reset.c | 53 + arch/loongarch/loongson64/rtc.c | 36 + arch/loongarch/loongson64/setup.c | 41 + arch/loongarch/mm/Makefile | 10 + arch/loongarch/mm/cache.c | 139 ++ arch/loongarch/mm/extable.c | 22 + arch/loongarch/mm/fault.c | 261 +++ arch/loongarch/mm/hugetlbpage.c | 87 + arch/loongarch/mm/init.c | 169 ++ arch/loongarch/mm/ioremap.c | 31 + arch/loongarch/mm/maccess.c | 10 + arch/loongarch/mm/mmap.c | 125 ++ arch/loongarch/mm/page.S | 84 + arch/loongarch/mm/pgtable-64.c | 114 ++ arch/loongarch/mm/pgtable.c | 24 + arch/loongarch/mm/tlb.c | 281 +++ arch/loongarch/mm/tlbex.S | 481 ++++++ arch/loongarch/pci/Makefile | 7 + arch/loongarch/pci/acpi.c | 174 ++ arch/loongarch/pci/pci.c | 93 + arch/loongarch/vdso/Makefile | 96 ++ arch/loongarch/vdso/elf.S | 19 + arch/loongarch/vdso/gen_vdso_offsets.sh | 13 + arch/loongarch/vdso/sigreturn.S | 29 + arch/loongarch/vdso/vdso.S | 22 + arch/loongarch/vdso/vdso.lds.S | 72 + arch/loongarch/vdso/vgettimeofday.c | 27 + drivers/acpi/bus.c | 3 + drivers/acpi/pci_irq.c | 3 +- drivers/acpi/pci_mcfg.c | 10 + drivers/pci/controller/Kconfig | 2 +- drivers/pci/controller/pci-loongson.c | 160 +- include/linux/acpi.h | 1 + include/linux/pci-ecam.h | 1 + scripts/subarch.include | 2 +- 206 files changed, 19519 insertions(+), 47 deletions(-) create mode 100644 arch/loongarch/.gitignore create mode 100644 arch/loongarch/Kbuild create mode 100644 arch/loongarch/Kbuild.platforms create mode 100644 arch/loongarch/Kconfig create mode 100644 arch/loongarch/Kconfig.debug create mode 100644 arch/loongarch/Makefile create mode 100644 arch/loongarch/boot/dts/Makefile create mode 100644 arch/loongarch/include/asm/Kbuild create mode 100644 arch/loongarch/include/asm/acenv.h create mode 100644 arch/loongarch/include/asm/acpi.h create mode 100644 arch/loongarch/include/asm/addrspace.h create mode 100644 arch/loongarch/include/asm/asm-offsets.h create mode 100644 arch/loongarch/include/asm/asm-prototypes.h create mode 100644 arch/loongarch/include/asm/asm.h create mode 100644 arch/loongarch/include/asm/asmmacro-64.h create mode 100644 arch/loongarch/include/asm/asmmacro.h create mode 100644 arch/loongarch/include/asm/atomic.h create mode 100644 arch/loongarch/include/asm/barrier.h create mode 100644 arch/loongarch/include/asm/bitops.h create mode 100644 arch/loongarch/include/asm/bitrev.h create mode 100644 arch/loongarch/include/asm/bootinfo.h create mode 100644 arch/loongarch/include/asm/branch.h create mode 100644 arch/loongarch/include/asm/bug.h create mode 100644 arch/loongarch/include/asm/cache.h create mode 100644 arch/loongarch/include/asm/cacheflush.h create mode 100644 arch/loongarch/include/asm/cacheops.h create mode 100644 arch/loongarch/include/asm/clocksource.h create mode 100644 arch/loongarch/include/asm/cmpxchg.h create mode 100644 arch/loongarch/include/asm/compiler.h create mode 100644 arch/loongarch/include/asm/cpu-features.h create mode 100644 arch/loongarch/include/asm/cpu-info.h create mode 100644 arch/loongarch/include/asm/cpu.h create mode 100644 arch/loongarch/include/asm/cpufeature.h create mode 100644 arch/loongarch/include/asm/delay.h create mode 100644 arch/loongarch/include/asm/dma-direct.h create mode 100644 arch/loongarch/include/asm/dma.h create mode 100644 arch/loongarch/include/asm/dmi.h create mode 100644 arch/loongarch/include/asm/efi.h create mode 100644 arch/loongarch/include/asm/elf.h create mode 100644 arch/loongarch/include/asm/entry-common.h create mode 100644 arch/loongarch/include/asm/exec.h create mode 100644 arch/loongarch/include/asm/fb.h create mode 100644 arch/loongarch/include/asm/fixmap.h create mode 100644 arch/loongarch/include/asm/fpregdef.h create mode 100644 arch/loongarch/include/asm/fpu.h create mode 100644 arch/loongarch/include/asm/futex.h create mode 100644 arch/loongarch/include/asm/hardirq.h create mode 100644 arch/loongarch/include/asm/hugetlb.h create mode 100644 arch/loongarch/include/asm/hw_irq.h create mode 100644 arch/loongarch/include/asm/idle.h create mode 100644 arch/loongarch/include/asm/inst.h create mode 100644 arch/loongarch/include/asm/io.h create mode 100644 arch/loongarch/include/asm/irq.h create mode 100644 arch/loongarch/include/asm/irq_regs.h create mode 100644 arch/loongarch/include/asm/irqflags.h create mode 100644 arch/loongarch/include/asm/kdebug.h create mode 100644 arch/loongarch/include/asm/kmap_types.h create mode 100644 arch/loongarch/include/asm/linkage.h create mode 100644 arch/loongarch/include/asm/local.h create mode 100644 arch/loongarch/include/asm/loongarchregs.h create mode 100644 arch/loongarch/include/asm/mach-loongson64/irq.h create mode 100644 arch/loongarch/include/asm/mach-loongson64/loongson.h create mode 100644 arch/loongarch/include/asm/mmu.h create mode 100644 arch/loongarch/include/asm/mmu_context.h create mode 100644 arch/loongarch/include/asm/module.h create mode 100644 arch/loongarch/include/asm/module.lds.h create mode 100644 arch/loongarch/include/asm/page.h create mode 100644 arch/loongarch/include/asm/pci.h create mode 100644 arch/loongarch/include/asm/percpu.h create mode 100644 arch/loongarch/include/asm/perf_event.h create mode 100644 arch/loongarch/include/asm/pgalloc.h create mode 100644 arch/loongarch/include/asm/pgtable-64.h create mode 100644 arch/loongarch/include/asm/pgtable-bits.h create mode 100644 arch/loongarch/include/asm/pgtable.h create mode 100644 arch/loongarch/include/asm/prefetch.h create mode 100644 arch/loongarch/include/asm/processor.h create mode 100644 arch/loongarch/include/asm/ptrace.h create mode 100644 arch/loongarch/include/asm/reboot.h create mode 100644 arch/loongarch/include/asm/regdef.h create mode 100644 arch/loongarch/include/asm/serial.h create mode 100644 arch/loongarch/include/asm/setup.h create mode 100644 arch/loongarch/include/asm/shmparam.h create mode 100644 arch/loongarch/include/asm/sparsemem.h create mode 100644 arch/loongarch/include/asm/spinlock.h create mode 100644 arch/loongarch/include/asm/spinlock_types.h create mode 100644 arch/loongarch/include/asm/stackframe.h create mode 100644 arch/loongarch/include/asm/stacktrace.h create mode 100644 arch/loongarch/include/asm/string.h create mode 100644 arch/loongarch/include/asm/switch_to.h create mode 100644 arch/loongarch/include/asm/syscall.h create mode 100644 arch/loongarch/include/asm/thread_info.h create mode 100644 arch/loongarch/include/asm/time.h create mode 100644 arch/loongarch/include/asm/timex.h create mode 100644 arch/loongarch/include/asm/tlb.h create mode 100644 arch/loongarch/include/asm/tlbflush.h create mode 100644 arch/loongarch/include/asm/topology.h create mode 100644 arch/loongarch/include/asm/types.h create mode 100644 arch/loongarch/include/asm/uaccess.h create mode 100644 arch/loongarch/include/asm/unistd.h create mode 100644 arch/loongarch/include/asm/vdso.h create mode 100644 arch/loongarch/include/asm/vdso/clocksource.h create mode 100644 arch/loongarch/include/asm/vdso/gettimeofday.h create mode 100644 arch/loongarch/include/asm/vdso/processor.h create mode 100644 arch/loongarch/include/asm/vdso/vdso.h create mode 100644 arch/loongarch/include/asm/vdso/vsyscall.h create mode 100644 arch/loongarch/include/asm/vermagic.h create mode 100644 arch/loongarch/include/asm/vmalloc.h create mode 100644 arch/loongarch/include/uapi/asm/Kbuild create mode 100644 arch/loongarch/include/uapi/asm/auxvec.h create mode 100644 arch/loongarch/include/uapi/asm/bitfield.h create mode 100644 arch/loongarch/include/uapi/asm/bitsperlong.h create mode 100644 arch/loongarch/include/uapi/asm/break.h create mode 100644 arch/loongarch/include/uapi/asm/byteorder.h create mode 100644 arch/loongarch/include/uapi/asm/hwcap.h create mode 100644 arch/loongarch/include/uapi/asm/inst.h create mode 100644 arch/loongarch/include/uapi/asm/ptrace.h create mode 100644 arch/loongarch/include/uapi/asm/reg.h create mode 100644 arch/loongarch/include/uapi/asm/sigcontext.h create mode 100644 arch/loongarch/include/uapi/asm/signal.h create mode 100644 arch/loongarch/include/uapi/asm/swab.h create mode 100644 arch/loongarch/include/uapi/asm/ucontext.h create mode 100644 arch/loongarch/include/uapi/asm/unistd.h create mode 100644 arch/loongarch/kernel/Makefile create mode 100644 arch/loongarch/kernel/access-helper.h create mode 100644 arch/loongarch/kernel/acpi.c create mode 100644 arch/loongarch/kernel/asm-offsets.c create mode 100644 arch/loongarch/kernel/cacheinfo.c create mode 100644 arch/loongarch/kernel/cmpxchg.c create mode 100644 arch/loongarch/kernel/cpu-probe.c create mode 100644 arch/loongarch/kernel/efi.c create mode 100644 arch/loongarch/kernel/elf.c create mode 100644 arch/loongarch/kernel/entry.S create mode 100644 arch/loongarch/kernel/fpu.S create mode 100644 arch/loongarch/kernel/genex.S create mode 100644 arch/loongarch/kernel/head.S create mode 100644 arch/loongarch/kernel/idle.c create mode 100644 arch/loongarch/kernel/io.c create mode 100644 arch/loongarch/kernel/irq.c create mode 100644 arch/loongarch/kernel/module-sections.c create mode 100644 arch/loongarch/kernel/module.c create mode 100644 arch/loongarch/kernel/proc.c create mode 100644 arch/loongarch/kernel/process.c create mode 100644 arch/loongarch/kernel/ptrace.c create mode 100644 arch/loongarch/kernel/reset.c create mode 100644 arch/loongarch/kernel/setup.c create mode 100644 arch/loongarch/kernel/signal-common.h create mode 100644 arch/loongarch/kernel/signal.c create mode 100644 arch/loongarch/kernel/switch.S create mode 100644 arch/loongarch/kernel/syscall.c create mode 100644 arch/loongarch/kernel/time.c create mode 100644 arch/loongarch/kernel/topology.c create mode 100644 arch/loongarch/kernel/traps.c create mode 100644 arch/loongarch/kernel/unaligned.c create mode 100644 arch/loongarch/kernel/vdso.c create mode 100644 arch/loongarch/kernel/vmlinux.lds.S create mode 100644 arch/loongarch/lib/Makefile create mode 100644 arch/loongarch/lib/clear_user.S create mode 100644 arch/loongarch/lib/copy_user.S create mode 100644 arch/loongarch/lib/delay.c create mode 100644 arch/loongarch/lib/dump_tlb.c create mode 100644 arch/loongarch/lib/memcpy.S create mode 100644 arch/loongarch/lib/memmove.S create mode 100644 arch/loongarch/lib/memset.S create mode 100644 arch/loongarch/lib/unaligned.S create mode 100644 arch/loongarch/loongson64/Makefile create mode 100644 arch/loongarch/loongson64/Platform create mode 100644 arch/loongarch/loongson64/env.c create mode 100644 arch/loongarch/loongson64/init.c create mode 100644 arch/loongarch/loongson64/irq.c create mode 100644 arch/loongarch/loongson64/mem.c create mode 100644 arch/loongarch/loongson64/reset.c create mode 100644 arch/loongarch/loongson64/rtc.c create mode 100644 arch/loongarch/loongson64/setup.c create mode 100644 arch/loongarch/mm/Makefile create mode 100644 arch/loongarch/mm/cache.c create mode 100644 arch/loongarch/mm/extable.c create mode 100644 arch/loongarch/mm/fault.c create mode 100644 arch/loongarch/mm/hugetlbpage.c create mode 100644 arch/loongarch/mm/init.c create mode 100644 arch/loongarch/mm/ioremap.c create mode 100644 arch/loongarch/mm/maccess.c create mode 100644 arch/loongarch/mm/mmap.c create mode 100644 arch/loongarch/mm/page.S create mode 100644 arch/loongarch/mm/pgtable-64.c create mode 100644 arch/loongarch/mm/pgtable.c create mode 100644 arch/loongarch/mm/tlb.c create mode 100644 arch/loongarch/mm/tlbex.S create mode 100644 arch/loongarch/pci/Makefile create mode 100644 arch/loongarch/pci/acpi.c create mode 100644 arch/loongarch/pci/pci.c create mode 100644 arch/loongarch/vdso/Makefile create mode 100644 arch/loongarch/vdso/elf.S create mode 100755 arch/loongarch/vdso/gen_vdso_offsets.sh create mode 100644 arch/loongarch/vdso/sigreturn.S create mode 100644 arch/loongarch/vdso/vdso.S create mode 100644 arch/loongarch/vdso/vdso.lds.S create mode 100644 arch/loongarch/vdso/vgettimeofday.c diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 6b0c7b650dea..ec5ca90f3dd4 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -422,8 +422,8 @@ ignore-unaligned-usertrap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``; -currently, ``arc`` and ``ia64``), controls whether all unaligned traps -are logged. +currently, ``arc``, ``ia64`` and ``loongarch``), controls whether all +unaligned traps are logged. = ============================================================= 0 Log all unaligned accesses. @@ -1435,8 +1435,8 @@ unaligned-trap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently, -``arc`` and ``parisc``), controls whether unaligned traps are caught -and emulated (instead of failing). +``arc``, ``parisc`` and ``loongarch``), controls whether unaligned traps +are caught and emulated (instead of failing). = ======================================================== 0 Do not emulate unaligned accesses. diff --git a/arch/loongarch/.gitignore b/arch/loongarch/.gitignore new file mode 100644 index 000000000000..e9c2dedc1c5e --- /dev/null +++ b/arch/loongarch/.gitignore @@ -0,0 +1,9 @@ +*.lds +*.raw +calc_vmlinuz_load_addr +elf-entry +relocs +vmlinux.* +vmlinuz.* + +!kernel/vmlinux.lds.S diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild new file mode 100644 index 000000000000..e997ba117bd9 --- /dev/null +++ b/arch/loongarch/Kbuild @@ -0,0 +1,21 @@ +# Fail on warnings - also for files referenced in subdirs +# -Werror can be disabled for specific files using: +# CFLAGS_ := -Wno-error +ifeq ($(W),) +subdir-ccflags-y := -Werror +endif + +# platform specific definitions +include arch/loongarch/Kbuild.platforms +obj-y := $(platform-y) + +# make clean traverses $(obj-) without having included .config, so +# everything ends up here +obj- := $(platform-) + +# LoongArch object files +# The object files are linked as core-y files would be linked + +obj-y += kernel/ +obj-y += mm/ +obj-y += vdso/ diff --git a/arch/loongarch/Kbuild.platforms b/arch/loongarch/Kbuild.platforms new file mode 100644 index 000000000000..ad390d5c00f2 --- /dev/null +++ b/arch/loongarch/Kbuild.platforms @@ -0,0 +1,6 @@ +# All platforms listed in alphabetic order + +platforms += loongson64 + +# include the platform specific files +include $(patsubst %, $(srctree)/arch/loongarch/%/Platform, $(platforms)) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig new file mode 100644 index 000000000000..de4ca3a51147 --- /dev/null +++ b/arch/loongarch/Kconfig @@ -0,0 +1,470 @@ +# SPDX-License-Identifier: GPL-2.0 +config LOONGARCH + bool + default y + select ACPI_MCFG if ACPI + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ARCH_BINFMT_ELF_STATE + select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_PTE_SPECIAL if !32BIT + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_INLINE_READ_LOCK if !PREEMPTION + select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_SUPPORTS_ACPI + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if 64BIT + select ARCH_USE_QUEUED_RWLOCKS + select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select BUILDTIME_TABLE_SORT + select COMMON_CLK + select GENERIC_ATOMIC64 if !64BIT + select GENERIC_CLOCKEVENTS + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_ENTRY + select GENERIC_FIND_FIRST_BIT + select GENERIC_GETTIMEOFDAY + select GENERIC_IRQ_MULTI_HANDLER + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select GENERIC_LIB_ASHLDI3 + select GENERIC_LIB_ASHRDI3 + select GENERIC_LIB_CMPDI2 + select GENERIC_LIB_LSHRDI3 + select GENERIC_LIB_UCMPDI2 + select GENERIC_PCI_IOMAP + select GENERIC_SCHED_CLOCK + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL + select HANDLE_DOMAIN_IRQ + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_COMPILER_H + select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ASM_MODVERSIONS + select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_CONTIGUOUS + select HAVE_EXIT_THREAD + select HAVE_FAST_GUP + select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GENERIC_VDSO + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI + select HAVE_PERF_EVENTS + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_TIF_NOHZ + select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT + select IRQ_FORCED_THREADING + select MODULES_USE_ELF_RELA if MODULES + select OF + select OF_EARLY_FLATTREE + select PCI_DOMAINS_GENERIC if PCI + select PCI_ECAM if ACPI + select PCI_MSI_ARCH_FALLBACKS + select PERF_USE_VMALLOC + select RTC_LIB + select SYSCTL_ARCH_UNALIGN_ALLOW + select SYSCTL_ARCH_UNALIGN_NO_WARN + select SYSCTL_EXCEPTION_TRACE + +menu "Machine selection" + +choice + prompt "System type" + default MACH_LOONGSON64 + +config MACH_LOONGSON64 + bool "Loongson 64-bit family of machines" + select ARCH_SPARSEMEM_ENABLE + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select HAVE_PCI + select PCI + select IRQ_LOONGARCH_CPU + select NR_CPUS_DEFAULT_64 + select SPARSE_IRQ + select SYS_HAS_CPU_LOONGSON64 + select SYS_SUPPORTS_64BIT_KERNEL + select ZONE_DMA32 + help + This enables the support of Loongson 64-bit family of machines. These + machines are based on new Loongson-3 processors (Old Loongson is MIPS + compatible, while new Loongson is based on LoongArch ISA). + +endchoice + +endmenu + +config GENERIC_HWEIGHT + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config SCHED_OMIT_FRAME_POINTER + bool + default y + +config GENERIC_CSUM + def_bool y + +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config L1_CACHE_SHIFT + int + default "6" + +menu "CPU selection" + +choice + prompt "CPU type" + default CPU_LOONGSON64 + +config CPU_LOONGSON64 + bool "Loongson 64-bit CPU" + depends on SYS_HAS_CPU_LOONGSON64 + select CPU_SUPPORTS_64BIT_KERNEL + select GPIOLIB + select SWIOTLB + select ARCH_SUPPORTS_ATOMIC_RMW + help + The Loongson 64-bit processor implements the LoongArch64 (the 64-bit + version of LoongArch) instruction set. + +endchoice + +config SYS_HAS_CPU_LOONGSON64 + bool + +endmenu + +config SYS_SUPPORTS_32BIT_KERNEL + bool +config SYS_SUPPORTS_64BIT_KERNEL + bool +config CPU_SUPPORTS_32BIT_KERNEL + bool +config CPU_SUPPORTS_64BIT_KERNEL + bool + +menu "Kernel type and options" + +choice + prompt "Kernel code model" + help + You should only select this option if you have a workload that + actually benefits from 64-bit processing or if your machine has + large memory. You will only be presented a single option in this + menu if your system does not support both 32-bit and 64-bit kernels. + +config 32BIT + bool "32-bit kernel" + depends on CPU_SUPPORTS_32BIT_KERNEL && SYS_SUPPORTS_32BIT_KERNEL + help + Select this option if you want to build a 32-bit kernel. + +config 64BIT + bool "64-bit kernel" + depends on CPU_SUPPORTS_64BIT_KERNEL && SYS_SUPPORTS_64BIT_KERNEL + help + Select this option if you want to build a 64-bit kernel. + +endchoice + +config PAGE_SIZE_4KB + bool + +config PAGE_SIZE_16KB + bool + +config PAGE_SIZE_64KB + bool + +config PGTABLE_2LEVEL + bool + +config PGTABLE_3LEVEL + bool + +config PGTABLE_4LEVEL + bool + +config PGTABLE_LEVELS + int + default 2 if PGTABLE_2LEVEL + default 3 if PGTABLE_3LEVEL + default 4 if PGTABLE_4LEVEL + +choice + prompt "Page Table Layout" + default 16KB_2LEVEL if 32BIT + default 16KB_3LEVEL if 64BIT + help + Allows choosing the page table layout, which is a combination + of page size and page table levels. The size of virtual memory + address space are determined by the page table layout. + +config 4KB_3LEVEL + bool "4KB with 3 levels" + select PAGE_SIZE_4KB + select PGTABLE_3LEVEL + help + This option selects 4KB page size with 3 level page tables, which + support a maximum of 39 bits of application virtual memory. + +config 4KB_4LEVEL + bool "4KB with 4 levels" + select PAGE_SIZE_4KB + select PGTABLE_4LEVEL + help + This option selects 4KB page size with 4 level page tables, which + support a maximum of 48 bits of application virtual memory. + +config 16KB_2LEVEL + bool "16KB with 2 levels" + select PAGE_SIZE_16KB + select PGTABLE_2LEVEL + help + This option selects 16KB page size with 2 level page tables, which + support a maximum of 36 bits of application virtual memory. + +config 16KB_3LEVEL + bool "16KB with 3 levels" + select PAGE_SIZE_16KB + select PGTABLE_3LEVEL + help + This option selects 16KB page size with 3 level page tables, which + support a maximum of 47 bits of application virtual memory. + +config 64KB_2LEVEL + bool "64KB with 2 levels" + select PAGE_SIZE_64KB + select PGTABLE_2LEVEL + help + This option selects 64KB page size with 2 level page tables, which + support a maximum of 42 bits of application virtual memory. + +config 64KB_3LEVEL + bool "64KB with 3 levels" + select PAGE_SIZE_64KB + select PGTABLE_3LEVEL + help + This option selects 64KB page size with 3 level page tables, which + support a maximum of 55 bits of application virtual memory. + +endchoice + +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + range 14 64 if PAGE_SIZE_64KB + default "14" if PAGE_SIZE_64KB + range 12 64 if PAGE_SIZE_16KB + default "12" if PAGE_SIZE_16KB + range 11 64 + default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + + The page size is not necessarily 4KB. Keep this in mind + when choosing a value for this option. + +config CPU_HAS_PREFETCH + bool + default y + +config CPU_HAS_FPU + bool + default y + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + help + Say Y to support efficient handling of sparse physical memory, + for architectures which are either NUMA (Non-Uniform Memory Access) + or have huge holes in the physical address space for other reasons. + See for more. + +config CMDLINE + string "Built-in kernel command line" + help + For most platforms, the arguments for the kernel's command line + are provided at run-time, during boot. However, there are cases + where either no arguments are being provided or the provided + arguments are insufficient or even invalid. + + When that occurs, it is possible to define a built-in command + line here and choose how the kernel should use it later on. + +choice + prompt "Kernel command line type" + default CMDLINE_BOOTLOADER + help + Choose how the kernel will handle the provided built-in command + line. + +config CMDLINE_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Prefer the command-line passed by the boot loader if available. + Use the built-in command line as fallback in case we get nothing + during boot. This is the default behaviour. + +config CMDLINE_EXTEND + bool "Use built-in to extend bootloader kernel arguments" + help + The command-line arguments provided during boot will be + appended to the built-in command line. This is useful in + cases where the provided arguments are insufficient and + you don't want to or cannot modify them. + +config CMDLINE_FORCE + bool "Always use the built-in kernel command string" + help + Always use the built-in command line, even if we get one during + boot. This is useful in case you need to override the provided + command line on systems where you don't have or want control + over it. + +endchoice + +config DMI + bool "Enable DMI scanning" + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK + default y + help + This enables SMBIOS/DMI feature for systems, and scanning of + DMI to identify machine quirks. + +config EFI + bool "EFI runtime service support" + select UCS2_STRING + select EFI_RUNTIME_WRAPPERS + help + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). + +source "kernel/Kconfig.hz" + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc//seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +endmenu + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on LOONGARCH + +config ARCH_ENABLE_MEMORY_HOTREMOVE + def_bool y + depends on MEMORY_HOTPLUG + +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends on TRANSPARENT_HUGEPAGE + +config ARCH_MEMORY_PROBE + def_bool y + depends on MEMORY_HOTPLUG + +config LOCKDEP_SUPPORT + bool + default y + +config TRACE_IRQFLAGS_SUPPORT + bool + default y + +config MMU + bool + default y + +config ARCH_MMAP_RND_BITS_MIN + default 12 if 64BIT + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 18 if 64BIT + default 15 + +config ZONE_DMA + bool + +config ZONE_DMA32 + bool + +menu "Power management options" + +source "drivers/acpi/Kconfig" + +endmenu + +source "drivers/firmware/Kconfig" diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile new file mode 100644 index 000000000000..5cfcba7e8161 --- /dev/null +++ b/arch/loongarch/Makefile @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Huacai Chen +# Copyright (C) 2020 Loongson Technology Corporation Limited + +# +# Select the object file format to substitute into the linker script. +# +64bit-tool-archpref = loongarch64 +32bit-bfd = elf32-loongarch +64bit-bfd = elf64-loongarch +32bit-emul = elf32loongarch +64bit-emul = elf64loongarch + +ifdef CONFIG_64BIT +tool-archpref = $(64bit-tool-archpref) +UTS_MACHINE := loongarch64 +endif + +ifneq ($(SUBARCH),$(ARCH)) + ifeq ($(CROSS_COMPILE),) + CROSS_COMPILE := $(call cc-cross-prefix, $(tool-archpref)-linux- $(tool-archpref)-linux-gnu- $(tool-archpref)-unknown-linux-gnu-) + endif +endif + +cflags-y += $(call cc-option, -mno-check-zero-division) + +ifdef CONFIG_64BIT +ld-emul = $(64bit-emul) +cflags-y += -mabi=lp64s +endif + +all-y := vmlinux + +cflags-y += -G0 -pipe -msoft-float +LDFLAGS_vmlinux += -G0 -static -n -nostdlib +KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel +KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel +KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs +KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs + +cflags-y += -ffreestanding + +# +# Board-dependent options and extra files +# +include arch/loongarch/Kbuild.platforms + +ifdef CONFIG_PHYSICAL_START +load-y = $(CONFIG_PHYSICAL_START) +endif + +drivers-$(CONFIG_PCI) += arch/loongarch/pci/ + +KBUILD_AFLAGS += $(cflags-y) +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) + +bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) PLATFORM="$(platform-y)" + +ifdef CONFIG_64BIT +bootvars-y += ADDR_BITS=64 +endif + +# This is required to get dwarf unwinding tables into .debug_frame +# instead of .eh_frame so we don't discard them. +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS += $(call cc-option,-mstrict-align) + +KBUILD_LDFLAGS += -m $(ld-emul) + +ifdef CONFIG_LOONGARCH +CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') +endif + +head-y := arch/loongarch/kernel/head.o + +libs-y += arch/loongarch/lib/ + +# See arch/loongarch/Kbuild for content of core part of the kernel +core-y += arch/loongarch/ + +ifeq ($(KBUILD_EXTMOD),) +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/loongarch/vdso include/generated/vdso-offsets.h +endif + +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/loongarch/vdso $@ + +# boot image targets (arch/loongarch/boot/) +boot-y := vmlinux.bin + +all: $(all-y) + +# boot +$(boot-y): vmlinux FORCE + $(Q)$(MAKE) $(build)=arch/loongarch/boot VMLINUX=vmlinux \ + $(bootvars-y) arch/loongarch/boot/$@ + +CLEAN_FILES += vmlinux + +install: + $(Q)install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) + $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) + $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) + +define archhelp + echo ' install - install kernel into $(INSTALL_PATH)' + echo ' vmlinux.bin - Raw binary boot image' + echo +endef diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild new file mode 100644 index 000000000000..0c311f3098eb --- /dev/null +++ b/arch/loongarch/include/asm/Kbuild @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +generic-y += dma-contiguous.h +generic-y += export.h +generic-y += mcs_spinlock.h +generic-y += parport.h +generic-y += early_ioremap.h +generic-y += qrwlock.h +generic-y += qspinlock.h +generic-y += rwsem.h +generic-y += segment.h +generic-y += user.h +generic-y += stat.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ioctls.h +generic-y += mman.h +generic-y += msgbuf.h +generic-y += sembuf.h +generic-y += shmbuf.h +generic-y += statfs.h +generic-y += socket.h +generic-y += sockios.h +generic-y += termios.h +generic-y += termbits.h +generic-y += poll.h +generic-y += param.h +generic-y += posix_types.h +generic-y += resource.h +generic-y += seccomp.h +generic-y += kvm_para.h diff --git a/arch/loongarch/include/asm/acenv.h b/arch/loongarch/include/asm/acenv.h new file mode 100644 index 000000000000..a0353eb2b8c4 --- /dev/null +++ b/arch/loongarch/include/asm/acenv.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch specific ACPICA environments and implementation + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: lvjianmin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_LOONGARCH_ACENV_H +#define _ASM_LOONGARCH_ACENV_H + +/* + * This header is required by ACPI core, but we have nothing to fill in + * right now. Will be updated later when needed. + */ + +#endif /* _ASM_LOONGARCH_ACENV_H */ diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h new file mode 100644 index 000000000000..978ac2419d70 --- /dev/null +++ b/arch/loongarch/include/asm/acpi.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Jianmin Lv + * Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_LOONGARCH_ACPI_H +#define _ASM_LOONGARCH_ACPI_H + +#ifdef CONFIG_ACPI +extern int acpi_strict; +extern int acpi_disabled; +extern int acpi_pci_disabled; +extern int acpi_noirq; + +#define acpi_os_ioremap acpi_os_ioremap +void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); + +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} + +static inline bool acpi_has_cpu_in_madt(void) +{ + return true; +} + +extern struct list_head acpi_wakeup_device_list; + +#endif /* !CONFIG_ACPI */ + +#define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT + +#endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h new file mode 100644 index 000000000000..8a32896d392b --- /dev/null +++ b/arch/loongarch/include/asm/addrspace.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ADDRSPACE_H +#define _ASM_ADDRSPACE_H + +#include + +#include + +/* + * This gives the physical RAM offset. + */ +#ifndef __ASSEMBLY__ +#ifndef PHYS_OFFSET +#define PHYS_OFFSET _AC(0, UL) +#endif +extern unsigned long vm_map_base; +#endif /* __ASSEMBLY__ */ + +#ifndef IO_BASE +#define IO_BASE CSR_DMW0_BASE +#endif + +#ifndef CACHE_BASE +#define CACHE_BASE CSR_DMW1_BASE +#endif + +#ifndef UNCACHE_BASE +#define UNCACHE_BASE CSR_DMW0_BASE +#endif + +#define DMW_PABITS 48 +#define TO_PHYS_MASK ((1ULL << DMW_PABITS) - 1) + +/* + * Memory above this physical address will be considered highmem. + */ +#ifndef HIGHMEM_START +#define HIGHMEM_START (_AC(1, UL) << _AC(DMW_PABITS, UL)) +#endif + +#define TO_PHYS(x) ( ((x) & TO_PHYS_MASK)) +#define TO_CACHE(x) (CACHE_BASE | ((x) & TO_PHYS_MASK)) +#define TO_UNCACHE(x) (UNCACHE_BASE | ((x) & TO_PHYS_MASK)) + +/* + * This handles the memory map. + */ +#ifndef PAGE_OFFSET +#define PAGE_OFFSET (CACHE_BASE + PHYS_OFFSET) +#endif + +#ifndef FIXADDR_TOP +#define FIXADDR_TOP ((unsigned long)(long)(int)0xfffe0000) +#endif + +#ifdef __ASSEMBLY__ +#define _ATYPE_ +#define _ATYPE32_ +#define _ATYPE64_ +#define _CONST64_(x) x +#else +#define _ATYPE_ __PTRDIFF_TYPE__ +#define _ATYPE32_ int +#define _ATYPE64_ __s64 +#ifdef CONFIG_64BIT +#define _CONST64_(x) x ## L +#else +#define _CONST64_(x) x ## LL +#endif +#endif + +/* + * 32/64-bit LoongArch address spaces + */ +#ifdef __ASSEMBLY__ +#define _ACAST32_ +#define _ACAST64_ +#else +#define _ACAST32_ (_ATYPE_)(_ATYPE32_) /* widen if necessary */ +#define _ACAST64_ (_ATYPE64_) /* do _not_ narrow */ +#endif + +#ifdef CONFIG_32BIT + +#define UVRANGE 0x00000000 +#define KPRANGE0 0x80000000 +#define KPRANGE1 0xa0000000 +#define KVRANGE 0xc0000000 + +#else + +#define XUVRANGE _CONST64_(0x0000000000000000) +#define XSPRANGE _CONST64_(0x4000000000000000) +#define XKPRANGE _CONST64_(0x8000000000000000) +#define XKVRANGE _CONST64_(0xc000000000000000) + +#endif + +/* + * Returns the physical address of a KPRANGEx / XKPRANGE address + */ +#define PHYSADDR(a) ((_ACAST64_(a)) & TO_PHYS_MASK) + +#endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/asm-offsets.h b/arch/loongarch/include/asm/asm-offsets.h new file mode 100644 index 000000000000..b49215b280fe --- /dev/null +++ b/arch/loongarch/include/asm/asm-offsets.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..a63d09f8ee86 --- /dev/null +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h new file mode 100644 index 000000000000..290c1cc0c8c9 --- /dev/null +++ b/arch/loongarch/include/asm/asm.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Some useful macros for LoongArch assembler code + * + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1995, 1996, 1997, 1999, 2001 by Ralf Baechle + * Copyright (C) 1999 by Silicon Graphics, Inc. + * Copyright (C) 2001 MIPS Technologies, Inc. + * Copyright (C) 2002 Maciej W. Rozycki + */ +#ifndef __ASM_ASM_H +#define __ASM_ASM_H + +/* LoongArch pref instruction. */ +#ifdef CONFIG_CPU_HAS_PREFETCH + +#define PREF(hint, addr, offs) \ + preld hint, addr, offs; \ + +#define PREFX(hint, addr, index) \ + preldx hint, addr, index; \ + +#else /* !CONFIG_CPU_HAS_PREFETCH */ + +#define PREF(hint, addr, offs) +#define PREFX(hint, addr, index) + +#endif /* !CONFIG_CPU_HAS_PREFETCH */ + +/* + * Stack alignment + */ +#define STACK_ALIGN ~(0xf) + +/* + * Macros to handle different pointer/register sizes for 32/64-bit code + */ + +/* + * Size of a register + */ +#ifndef __loongarch64 +#define SZREG 4 +#else +#define SZREG 8 +#endif + +/* + * Use the following macros in assemblercode to load/store registers, + * pointers etc. + */ +#if (SZREG == 4) +#define REG_L ld.w +#define REG_S st.w +#define REG_ADD add.w +#define REG_SUB sub.w +#else /* SZREG == 8 */ +#define REG_L ld.d +#define REG_S st.d +#define REG_ADD add.d +#define REG_SUB sub.d +#endif + +/* + * How to add/sub/load/store/shift C int variables. + */ +#if (__SIZEOF_INT__ == 4) +#define INT_ADD add.w +#define INT_ADDI addi.w +#define INT_SUB sub.w +#define INT_L ld.w +#define INT_S st.w +#define INT_SLL slli.w +#define INT_SLLV sll.w +#define INT_SRL srli.w +#define INT_SRLV srl.w +#define INT_SRA srai.w +#define INT_SRAV sra.w +#endif + +#if (__SIZEOF_INT__ == 8) +#define INT_ADD add.d +#define INT_ADDI addi.d +#define INT_SUB sub.d +#define INT_L ld.d +#define INT_S st.d +#define INT_SLL slli.d +#define INT_SLLV sll.d +#define INT_SRL srli.d +#define INT_SRLV srl.d +#define INT_SRA srai.d +#define INT_SRAV sra.d +#endif + +/* + * How to add/sub/load/store/shift C long variables. + */ +#if (__SIZEOF_LONG__ == 4) +#define LONG_ADD add.w +#define LONG_ADDI addi.w +#define LONG_SUB sub.w +#define LONG_L ld.w +#define LONG_S st.w +#define LONG_SLL slli.w +#define LONG_SLLV sll.w +#define LONG_SRL srli.w +#define LONG_SRLV srl.w +#define LONG_SRA srai.w +#define LONG_SRAV sra.w + +#ifdef __ASSEMBLY__ +#define LONG .word +#endif +#define LONGSIZE 4 +#define LONGMASK 3 +#define LONGLOG 2 +#endif + +#if (__SIZEOF_LONG__ == 8) +#define LONG_ADD add.d +#define LONG_ADDI addi.d +#define LONG_SUB sub.d +#define LONG_L ld.d +#define LONG_S st.d +#define LONG_SLL slli.d +#define LONG_SLLV sll.d +#define LONG_SRL srli.d +#define LONG_SRLV srl.d +#define LONG_SRA srai.d +#define LONG_SRAV sra.d + +#ifdef __ASSEMBLY__ +#define LONG .dword +#endif +#define LONGSIZE 8 +#define LONGMASK 7 +#define LONGLOG 3 +#endif + +/* + * How to add/sub/load/store/shift pointers. + */ +#if (__SIZEOF_POINTER__ == 4) +#define PTR_ADD add.w +#define PTR_ADDI addi.w +#define PTR_SUB sub.w +#define PTR_L ld.w +#define PTR_S st.w +#define PTR_LI li.w +#define PTR_SLL slli.w +#define PTR_SLLV sll.w +#define PTR_SRL srli.w +#define PTR_SRLV srl.w +#define PTR_SRA srai.w +#define PTR_SRAV sra.w + +#define PTR_SCALESHIFT 2 + +#ifdef __ASSEMBLY__ +#define PTR .word +#endif +#define PTRSIZE 4 +#define PTRLOG 2 +#endif + +#if (__SIZEOF_POINTER__ == 8) +#define PTR_ADD add.d +#define PTR_ADDI addi.d +#define PTR_SUB sub.d +#define PTR_L ld.d +#define PTR_S st.d +#define PTR_LI li.d +#define PTR_SLL slli.d +#define PTR_SLLV sll.d +#define PTR_SRL srli.d +#define PTR_SRLV srl.d +#define PTR_SRA srai.d +#define PTR_SRAV sra.d + +#define PTR_SCALESHIFT 3 + +#ifdef __ASSEMBLY__ +#define PTR .dword +#endif +#define PTRSIZE 8 +#define PTRLOG 3 +#endif + +#endif /* __ASM_ASM_H */ diff --git a/arch/loongarch/include/asm/asmmacro-64.h b/arch/loongarch/include/asm/asmmacro-64.h new file mode 100644 index 000000000000..90843d1002d9 --- /dev/null +++ b/arch/loongarch/include/asm/asmmacro-64.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * asmmacro.h: Assembler macros to make things easier to read. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ASMMACRO_64_H +#define _ASM_ASMMACRO_64_H + +#include +#include +#include +#include + + .macro cpu_save_nonscratch thread + stptr.d s0, \thread, THREAD_REG23 + stptr.d s1, \thread, THREAD_REG24 + stptr.d s2, \thread, THREAD_REG25 + stptr.d s3, \thread, THREAD_REG26 + stptr.d s4, \thread, THREAD_REG27 + stptr.d s5, \thread, THREAD_REG28 + stptr.d s6, \thread, THREAD_REG29 + stptr.d s7, \thread, THREAD_REG30 + stptr.d s8, \thread, THREAD_REG31 + stptr.d sp, \thread, THREAD_REG03 + stptr.d fp, \thread, THREAD_REG22 + .endm + + .macro cpu_restore_nonscratch thread + ldptr.d s0, \thread, THREAD_REG23 + ldptr.d s1, \thread, THREAD_REG24 + ldptr.d s2, \thread, THREAD_REG25 + ldptr.d s3, \thread, THREAD_REG26 + ldptr.d s4, \thread, THREAD_REG27 + ldptr.d s5, \thread, THREAD_REG28 + ldptr.d s6, \thread, THREAD_REG29 + ldptr.d s7, \thread, THREAD_REG30 + ldptr.d s8, \thread, THREAD_REG31 + ldptr.d sp, \thread, THREAD_REG03 + ldptr.d fp, \thread, THREAD_REG22 + ldptr.d ra, \thread, THREAD_REG01 + .endm + +#endif /* _ASM_ASMMACRO_64_H */ diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h new file mode 100644 index 000000000000..40414d605ed6 --- /dev/null +++ b/arch/loongarch/include/asm/asmmacro.h @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_ASMMACRO_H +#define _ASM_ASMMACRO_H + +#include + +#ifdef CONFIG_32BIT +#include +#endif +#ifdef CONFIG_64BIT +#include +#endif + + .macro parse_v var val + \var = \val + .endm + + .macro parse_r var r + \var = -1 + .ifc \r, $r0 + \var = 0 + .endif + .ifc \r, $r1 + \var = 1 + .endif + .ifc \r, $r2 + \var = 2 + .endif + .ifc \r, $r3 + \var = 3 + .endif + .ifc \r, $r4 + \var = 4 + .endif + .ifc \r, $r5 + \var = 5 + .endif + .ifc \r, $r6 + \var = 6 + .endif + .ifc \r, $r7 + \var = 7 + .endif + .ifc \r, $r8 + \var = 8 + .endif + .ifc \r, $r9 + \var = 9 + .endif + .ifc \r, $r10 + \var = 10 + .endif + .ifc \r, $r11 + \var = 11 + .endif + .ifc \r, $r12 + \var = 12 + .endif + .ifc \r, $r13 + \var = 13 + .endif + .ifc \r, $r14 + \var = 14 + .endif + .ifc \r, $r15 + \var = 15 + .endif + .ifc \r, $r16 + \var = 16 + .endif + .ifc \r, $r17 + \var = 17 + .endif + .ifc \r, $r18 + \var = 18 + .endif + .ifc \r, $r19 + \var = 19 + .endif + .ifc \r, $r20 + \var = 20 + .endif + .ifc \r, $r21 + \var = 21 + .endif + .ifc \r, $r22 + \var = 22 + .endif + .ifc \r, $r23 + \var = 23 + .endif + .ifc \r, $r24 + \var = 24 + .endif + .ifc \r, $r25 + \var = 25 + .endif + .ifc \r, $r26 + \var = 26 + .endif + .ifc \r, $r27 + \var = 27 + .endif + .ifc \r, $r28 + \var = 28 + .endif + .ifc \r, $r29 + \var = 29 + .endif + .ifc \r, $r30 + \var = 30 + .endif + .ifc \r, $r31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + + .macro fpu_save_csr thread tmp + movfcsr2gr \tmp, fcsr0 + stptr.w \tmp, \thread, THREAD_FCSR + .endm + + .macro fpu_restore_csr thread tmp + ldptr.w \tmp, \thread, THREAD_FCSR + movgr2fcsr fcsr0, \tmp + .endm + + .macro fpu_save_cc thread tmp0 tmp1 + movcf2gr \tmp0, $fcc0 + move \tmp1, \tmp0 + movcf2gr \tmp0, $fcc1 + bstrins.d \tmp1, \tmp0, 15, 8 + movcf2gr \tmp0, $fcc2 + bstrins.d \tmp1, \tmp0, 23, 16 + movcf2gr \tmp0, $fcc3 + bstrins.d \tmp1, \tmp0, 31, 24 + movcf2gr \tmp0, $fcc4 + bstrins.d \tmp1, \tmp0, 39, 32 + movcf2gr \tmp0, $fcc5 + bstrins.d \tmp1, \tmp0, 47, 40 + movcf2gr \tmp0, $fcc6 + bstrins.d \tmp1, \tmp0, 55, 48 + movcf2gr \tmp0, $fcc7 + bstrins.d \tmp1, \tmp0, 63, 56 + stptr.d \tmp1, \thread, THREAD_FCC + .endm + + .macro fpu_restore_cc thread tmp0 tmp1 + ldptr.d \tmp0, \thread, THREAD_FCC + bstrpick.d \tmp1, \tmp0, 7, 0 + movgr2cf $fcc0, \tmp1 + bstrpick.d \tmp1, \tmp0, 15, 8 + movgr2cf $fcc1, \tmp1 + bstrpick.d \tmp1, \tmp0, 23, 16 + movgr2cf $fcc2, \tmp1 + bstrpick.d \tmp1, \tmp0, 31, 24 + movgr2cf $fcc3, \tmp1 + bstrpick.d \tmp1, \tmp0, 39, 32 + movgr2cf $fcc4, \tmp1 + bstrpick.d \tmp1, \tmp0, 47, 40 + movgr2cf $fcc5, \tmp1 + bstrpick.d \tmp1, \tmp0, 55, 48 + movgr2cf $fcc6, \tmp1 + bstrpick.d \tmp1, \tmp0, 63, 56 + movgr2cf $fcc7, \tmp1 + .endm + + .macro fpu_save_double thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \tmp, \thread + fst.d $f0, \tmp, THREAD_FPR0 - THREAD_FPR0 + fst.d $f1, \tmp, THREAD_FPR1 - THREAD_FPR0 + fst.d $f2, \tmp, THREAD_FPR2 - THREAD_FPR0 + fst.d $f3, \tmp, THREAD_FPR3 - THREAD_FPR0 + fst.d $f4, \tmp, THREAD_FPR4 - THREAD_FPR0 + fst.d $f5, \tmp, THREAD_FPR5 - THREAD_FPR0 + fst.d $f6, \tmp, THREAD_FPR6 - THREAD_FPR0 + fst.d $f7, \tmp, THREAD_FPR7 - THREAD_FPR0 + fst.d $f8, \tmp, THREAD_FPR8 - THREAD_FPR0 + fst.d $f9, \tmp, THREAD_FPR9 - THREAD_FPR0 + fst.d $f10, \tmp, THREAD_FPR10 - THREAD_FPR0 + fst.d $f11, \tmp, THREAD_FPR11 - THREAD_FPR0 + fst.d $f12, \tmp, THREAD_FPR12 - THREAD_FPR0 + fst.d $f13, \tmp, THREAD_FPR13 - THREAD_FPR0 + fst.d $f14, \tmp, THREAD_FPR14 - THREAD_FPR0 + fst.d $f15, \tmp, THREAD_FPR15 - THREAD_FPR0 + fst.d $f16, \tmp, THREAD_FPR16 - THREAD_FPR0 + fst.d $f17, \tmp, THREAD_FPR17 - THREAD_FPR0 + fst.d $f18, \tmp, THREAD_FPR18 - THREAD_FPR0 + fst.d $f19, \tmp, THREAD_FPR19 - THREAD_FPR0 + fst.d $f20, \tmp, THREAD_FPR20 - THREAD_FPR0 + fst.d $f21, \tmp, THREAD_FPR21 - THREAD_FPR0 + fst.d $f22, \tmp, THREAD_FPR22 - THREAD_FPR0 + fst.d $f23, \tmp, THREAD_FPR23 - THREAD_FPR0 + fst.d $f24, \tmp, THREAD_FPR24 - THREAD_FPR0 + fst.d $f25, \tmp, THREAD_FPR25 - THREAD_FPR0 + fst.d $f26, \tmp, THREAD_FPR26 - THREAD_FPR0 + fst.d $f27, \tmp, THREAD_FPR27 - THREAD_FPR0 + fst.d $f28, \tmp, THREAD_FPR28 - THREAD_FPR0 + fst.d $f29, \tmp, THREAD_FPR29 - THREAD_FPR0 + fst.d $f30, \tmp, THREAD_FPR30 - THREAD_FPR0 + fst.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + + .macro fpu_restore_double thread tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \tmp, \thread + fld.d $f0, \tmp, THREAD_FPR0 - THREAD_FPR0 + fld.d $f1, \tmp, THREAD_FPR1 - THREAD_FPR0 + fld.d $f2, \tmp, THREAD_FPR2 - THREAD_FPR0 + fld.d $f3, \tmp, THREAD_FPR3 - THREAD_FPR0 + fld.d $f4, \tmp, THREAD_FPR4 - THREAD_FPR0 + fld.d $f5, \tmp, THREAD_FPR5 - THREAD_FPR0 + fld.d $f6, \tmp, THREAD_FPR6 - THREAD_FPR0 + fld.d $f7, \tmp, THREAD_FPR7 - THREAD_FPR0 + fld.d $f8, \tmp, THREAD_FPR8 - THREAD_FPR0 + fld.d $f9, \tmp, THREAD_FPR9 - THREAD_FPR0 + fld.d $f10, \tmp, THREAD_FPR10 - THREAD_FPR0 + fld.d $f11, \tmp, THREAD_FPR11 - THREAD_FPR0 + fld.d $f12, \tmp, THREAD_FPR12 - THREAD_FPR0 + fld.d $f13, \tmp, THREAD_FPR13 - THREAD_FPR0 + fld.d $f14, \tmp, THREAD_FPR14 - THREAD_FPR0 + fld.d $f15, \tmp, THREAD_FPR15 - THREAD_FPR0 + fld.d $f16, \tmp, THREAD_FPR16 - THREAD_FPR0 + fld.d $f17, \tmp, THREAD_FPR17 - THREAD_FPR0 + fld.d $f18, \tmp, THREAD_FPR18 - THREAD_FPR0 + fld.d $f19, \tmp, THREAD_FPR19 - THREAD_FPR0 + fld.d $f20, \tmp, THREAD_FPR20 - THREAD_FPR0 + fld.d $f21, \tmp, THREAD_FPR21 - THREAD_FPR0 + fld.d $f22, \tmp, THREAD_FPR22 - THREAD_FPR0 + fld.d $f23, \tmp, THREAD_FPR23 - THREAD_FPR0 + fld.d $f24, \tmp, THREAD_FPR24 - THREAD_FPR0 + fld.d $f25, \tmp, THREAD_FPR25 - THREAD_FPR0 + fld.d $f26, \tmp, THREAD_FPR26 - THREAD_FPR0 + fld.d $f27, \tmp, THREAD_FPR27 - THREAD_FPR0 + fld.d $f28, \tmp, THREAD_FPR28 - THREAD_FPR0 + fld.d $f29, \tmp, THREAD_FPR29 - THREAD_FPR0 + fld.d $f30, \tmp, THREAD_FPR30 - THREAD_FPR0 + fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 + .endm + +.macro jr dst + jirl zero, \dst, 0 +.endm + +.macro jalr dst + jirl ra, \dst, 0 +.endm + +.macro not dst src + nor \dst, \src, zero +.endm + +.macro bgt r0 r1 label + blt \r1, \r0, \label +.endm + +.macro bltz r0 label + blt \r0, zero, \label +.endm + +.macro bgez r0 label + bge \r0, zero, \label +.endm + +#endif /* _ASM_ASMMACRO_H */ diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h new file mode 100644 index 000000000000..81fa9f97eb51 --- /dev/null +++ b/arch/loongarch/include/asm/atomic.h @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + * + * But use these as seldom as possible since they are much more slower + * than regular operations. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_ATOMIC_H +#define _ASM_ATOMIC_H + +#include +#include +#include +#include +#include + +#if __SIZEOF_LONG__ == 4 +#define __LL "ll.w " +#define __SC "sc.w " +#define __AMADD "amadd.w " +#define __AMAND_DB "amand_db.w " +#define __AMOR_DB "amor_db.w " +#define __AMXOR_DB "amxor_db.w " +#elif __SIZEOF_LONG__ == 8 +#define __LL "ll.d " +#define __SC "sc.d " +#define __AMADD "amadd.d " +#define __AMAND_DB "amand_db.d " +#define __AMOR_DB "amor_db.d " +#define __AMXOR_DB "amxor_db.d " +#endif + +#define ATOMIC_INIT(i) { (i) } + +/* + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +#define atomic_read(v) READ_ONCE((v)->counter) + +/* + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) + +#define ATOMIC_OP(op, I, asm_op) \ +static __inline__ void atomic_##op(int i, atomic_t * v) \ +{ \ + __asm__ __volatile__( \ + "am"#asm_op"_db.w" " $zero, %1, %0 \n" \ + : "+ZB" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} + +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ +static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ +{ \ + int result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result c_op I; \ +} + +#define ATOMIC_FETCH_OP(op, I, asm_op) \ +static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ +{ \ + int result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, I, asm_op, c_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ + ATOMIC_FETCH_OP(op, I, asm_op) + +ATOMIC_OPS(add, i, add, +) +ATOMIC_OPS(sub, -i, add, +) + +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS + +#define ATOMIC_OPS(op, I, asm_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_FETCH_OP(op, I, asm_op) + +ATOMIC_OPS(and, i, and) +ATOMIC_OPS(or, i, or) +ATOMIC_OPS(xor, i, xor) + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int prev, rc; + + __asm__ __volatile__ ( + "0: ll.w %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add.w %[rc], %[p], %[a]\n" + " sc.w %[rc], %[c]\n" + " beqz %[rc], 0b\n" + " b 2f\n" + "1:\n" + __WEAK_LLSC_MB + "2:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), + [c]"=ZB" (v->counter) + : [a]"r" (a), [u]"r" (u) + : "memory"); + + return prev; +} +#define atomic_fetch_add_unless atomic_fetch_add_unless + +/* + * atomic_sub_if_positive - conditionally subtract integer from atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically test @v and subtract @i if @v is greater or equal than @i. + * The function returns the old value of @v minus @i. + */ +static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) +{ + int result; + int temp; + + if (__builtin_constant_p(i)) { + __asm__ __volatile__( + "1: ll.w %1, %2 # atomic_sub_if_positive\n" + " addi.w %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.w %1, %2 \n" + " beq $zero, %1, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "I" (-i)); + } else { + __asm__ __volatile__( + "1: ll.w %1, %2 # atomic_sub_if_positive\n" + " sub.w %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.w %1, %2 \n" + " beq $zero, %1, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "r" (i)); + } + + return result; +} + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), (new))) + +/* + * atomic_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic_t + */ +#define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v) + +#ifdef CONFIG_64BIT + +#define ATOMIC64_INIT(i) { (i) } + +/* + * atomic64_read - read atomic variable + * @v: pointer of type atomic64_t + * + */ +#define atomic64_read(v) READ_ONCE((v)->counter) + +/* + * atomic64_set - set atomic variable + * @v: pointer of type atomic64_t + * @i: required value + */ +#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) + +#define ATOMIC64_OP(op, I, asm_op) \ +static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +{ \ + __asm__ __volatile__( \ + "am"#asm_op"_db.d " " $zero, %1, %0 \n" \ + : "+ZB" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} + +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ +static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ +{ \ + long result; \ + __asm__ __volatile__( \ + "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result c_op I; \ +} + +#define ATOMIC64_FETCH_OP(op, I, asm_op) \ +static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +{ \ + long result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result; \ +} + +#define ATOMIC64_OPS(op, I, asm_op, c_op) \ + ATOMIC64_OP(op, I, asm_op) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ + ATOMIC64_FETCH_OP(op, I, asm_op) + +ATOMIC64_OPS(add, i, add, +) +ATOMIC64_OPS(sub, -i, add, +) + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS + +#define ATOMIC64_OPS(op, I, asm_op) \ + ATOMIC64_OP(op, I, asm_op) \ + ATOMIC64_FETCH_OP(op, I, asm_op) + +ATOMIC64_OPS(and, i, and) +ATOMIC64_OPS(or, i, or) +ATOMIC64_OPS(xor, i, xor) + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +{ + long prev, rc; + + __asm__ __volatile__ ( + "0: ll.d %[p], %[c]\n" + " beq %[p], %[u], 1f\n" + " add.d %[rc], %[p], %[a]\n" + " sc.d %[rc], %[c]\n" + " beqz %[rc], 0b\n" + " b 2f\n" + "1:\n" + __WEAK_LLSC_MB + "2:\n" + : [p]"=&r" (prev), [rc]"=&r" (rc), + [c] "=ZB" (v->counter) + : [a]"r" (a), [u]"r" (u) + : "memory"); + + return prev; +} +#define atomic64_fetch_add_unless atomic64_fetch_add_unless + +/* + * atomic64_sub_if_positive - conditionally subtract integer from atomic + * variable + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically test @v and subtract @i if @v is greater or equal than @i. + * The function returns the old value of @v minus @i. + */ +static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) +{ + long result; + long temp; + + if (__builtin_constant_p(i)) { + __asm__ __volatile__( + "1: ll.d %1, %2 # atomic64_sub_if_positive \n" + " addi.d %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.d %1, %2 \n" + " beq %1, $zero, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "I" (-i)); + } else { + __asm__ __volatile__( + "1: ll.d %1, %2 # atomic64_sub_if_positive \n" + " sub.d %0, %1, %3 \n" + " or %1, %0, $zero \n" + " blt %0, $zero, 2f \n" + " sc.d %1, %2 \n" + " beq %1, $zero, 1b \n" + "2: \n" + : "=&r" (result), "=&r" (temp), + "+" GCC_OFF_SMALL_ASM() (v->counter) + : "r" (i)); + } + + return result; +} + +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), (new))) + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic64_t + */ +#define atomic64_dec_if_positive(v) atomic64_sub_if_positive(1, v) + +#endif /* CONFIG_64BIT */ + +#endif /* _ASM_ATOMIC_H */ diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h new file mode 100644 index 000000000000..03b97918e750 --- /dev/null +++ b/arch/loongarch/include/asm/barrier.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#include + +#define __sync() __asm__ __volatile__("dbar 0" : : :"memory") + +#define fast_wmb() __sync() +#define fast_rmb() __sync() +#define fast_mb() __sync() +#define fast_iob() __sync() +#define wbflush() __sync() + +#define wmb() fast_wmb() +#define rmb() fast_rmb() +#define mb() fast_mb() +#define iob() fast_iob() + +/** + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (@index < @size) + */ +#define array_index_mask_nospec array_index_mask_nospec +static inline unsigned long array_index_mask_nospec(unsigned long index, + unsigned long size) +{ + unsigned long mask; + + __asm__ __volatile__( + "sltu %0, %1, %2\n\t" +#if (__SIZEOF_LONG__ == 4) + "sub.w %0, $r0, %0\n\t" +#elif (__SIZEOF_LONG__ == 8) + "sub.d %0, $r0, %0\n\t" +#endif + : "=r" (mask) + : "r" (index), "r" (size) + :); + + return mask; +} + +#include + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/loongarch/include/asm/bitops.h b/arch/loongarch/include/asm/bitops.h new file mode 100644 index 000000000000..c7384870267b --- /dev/null +++ b/arch/loongarch/include/asm/bitops.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_BITOPS_H +#define _ASM_BITOPS_H + +#include + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#endif /* _ASM_BITOPS_H */ diff --git a/arch/loongarch/include/asm/bitrev.h b/arch/loongarch/include/asm/bitrev.h new file mode 100644 index 000000000000..38147173f2ad --- /dev/null +++ b/arch/loongarch/include/asm/bitrev.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __LOONGARCH_ASM_BITREV_H__ +#define __LOONGARCH_ASM_BITREV_H__ + +#include + +static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x) +{ + u32 ret; + + asm("bitrev.4b %0, %1" : "=r"(ret) : "r"(__swab32(x))); + return ret; +} + +static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x) +{ + u16 ret; + + asm("bitrev.4b %0, %1" : "=r"(ret) : "r"(__swab16(x))); + return ret; +} + +static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x) +{ + u8 ret; + + asm("bitrev.4b %0, %1" : "=r"(ret) : "r"(x)); + return ret; +} + +#endif /* __LOONGARCH_ASM_BITREV_H__ */ diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h new file mode 100644 index 000000000000..60a0c9f39a1e --- /dev/null +++ b/arch/loongarch/include/asm/bootinfo.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_BOOTINFO_H +#define _ASM_BOOTINFO_H + +#include +#include + +const char *get_system_type(void); + +extern void memblock_init(void); +extern void detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max); + +extern void early_init(void); +extern void init_environ(void); +extern void platform_init(void); +extern void plat_mem_setup(void); + +struct loongson_board_info { + int bios_size; + const char *bios_vendor; + const char *bios_version; + const char *bios_release_date; + const char *board_name; + const char *board_vendor; +}; + +struct loongson_system_configuration { + int nr_cpus; + int nr_nodes; + int nr_io_pics; + int boot_cpu_id; + int cores_per_node; + int cores_per_package; + const char *cpuname; + u64 suspend_addr; +}; + +extern u64 efi_system_table; +extern struct loongson_board_info b_info; +extern struct loongson_system_configuration loongson_sysconf; +extern unsigned long fw_arg0, fw_arg1, fw_arg2; + +#endif /* _ASM_BOOTINFO_H */ diff --git a/arch/loongarch/include/asm/branch.h b/arch/loongarch/include/asm/branch.h new file mode 100644 index 000000000000..830279859f4c --- /dev/null +++ b/arch/loongarch/include/asm/branch.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_BRANCH_H +#define _ASM_BRANCH_H + +#include + +static inline unsigned long exception_era(struct pt_regs *regs) +{ + return regs->csr_era; +} + +static inline int compute_return_era(struct pt_regs *regs) +{ + regs->csr_era += 4; + return 0; +} + +#endif /* _ASM_BRANCH_H */ diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h new file mode 100644 index 000000000000..bda49108a76d --- /dev/null +++ b/arch/loongarch/include/asm/bug.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BUG_H +#define __ASM_BUG_H + +#include + +#ifdef CONFIG_BUG + +#include + +static inline void __noreturn BUG(void) +{ + __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); + unreachable(); +} + +#define HAVE_ARCH_BUG + +#endif + +#include + +#endif /* __ASM_BUG_H */ diff --git a/arch/loongarch/include/asm/cache.h b/arch/loongarch/include/asm/cache.h new file mode 100644 index 000000000000..62eb6271af42 --- /dev/null +++ b/arch/loongarch/include/asm/cache.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_CACHE_H +#define _ASM_CACHE_H + +#define L1_CACHE_SHIFT CONFIG_L1_CACHE_SHIFT +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + +#endif /* _ASM_CACHE_H */ diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h new file mode 100644 index 000000000000..053cedd08d4f --- /dev/null +++ b/arch/loongarch/include/asm/cacheflush.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_CACHEFLUSH_H +#define _ASM_CACHEFLUSH_H + +#include +#include +#include + +extern void local_flush_icache_range(unsigned long start, unsigned long end); + +#define flush_icache_range local_flush_icache_range +#define flush_icache_user_range local_flush_icache_range + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 + +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) +#define flush_icache_page(vma, page) do { } while (0) +#define flush_icache_user_page(vma, page, addr, len) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) + +#define cache_op(op,addr) \ + __asm__ __volatile__( \ + " cacop %0, %1 \n" \ + : \ + : "i" (op), "ZC" (*(unsigned char *)(addr))) + +static inline void flush_icache_line_indexed(unsigned long addr) +{ + cache_op(Index_Invalidate_I, addr); +} + +static inline void flush_dcache_line_indexed(unsigned long addr) +{ + cache_op(Index_Writeback_Inv_D, addr); +} + +static inline void flush_vcache_line_indexed(unsigned long addr) +{ + cache_op(Index_Writeback_Inv_V, addr); +} + +static inline void flush_scache_line_indexed(unsigned long addr) +{ + cache_op(Index_Writeback_Inv_S, addr); +} + +static inline void flush_icache_line(unsigned long addr) +{ + cache_op(Hit_Invalidate_I, addr); +} + +static inline void flush_dcache_line(unsigned long addr) +{ + cache_op(Hit_Writeback_Inv_D, addr); +} + +static inline void flush_vcache_line(unsigned long addr) +{ + cache_op(Hit_Writeback_Inv_V, addr); +} + +static inline void flush_scache_line(unsigned long addr) +{ + cache_op(Hit_Writeback_Inv_S, addr); +} + +#include + +#endif /* _ASM_CACHEFLUSH_H */ diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h new file mode 100644 index 000000000000..8d7649a918fe --- /dev/null +++ b/arch/loongarch/include/asm/cacheops.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Cache operations for the cache instruction. + */ +#ifndef __ASM_CACHEOPS_H +#define __ASM_CACHEOPS_H + +/* + * Most cache ops are split into a 2 bit field identifying the cache, and a 3 + * bit field identifying the cache operation. + */ +#define CacheOp_Cache 0x03 +#define CacheOp_Op 0x1c + +#define Cache_I 0x00 +#define Cache_D 0x01 +#define Cache_V 0x02 +#define Cache_S 0x03 + +#define Index_Invalidate 0x08 +#define Index_Writeback_Inv 0x08 +#define Hit_Invalidate 0x10 +#define Hit_Writeback_Inv 0x10 +#define CacheOp_User_Defined 0x18 + +#define Index_Invalidate_I (Cache_I | Index_Invalidate) +#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv) +#define Index_Writeback_Inv_V (Cache_V | Index_Writeback_Inv) +#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv) +#define Hit_Invalidate_I (Cache_I | Hit_Invalidate) +#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_V (Cache_V | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv) + +#endif /* __ASM_CACHEOPS_H */ diff --git a/arch/loongarch/include/asm/clocksource.h b/arch/loongarch/include/asm/clocksource.h new file mode 100644 index 000000000000..803f0b42b28c --- /dev/null +++ b/arch/loongarch/include/asm/clocksource.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef __ASM_CLOCKSOURCE_H +#define __ASM_CLOCKSOURCE_H + +#include + +#endif /* __ASM_CLOCKSOURCE_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h new file mode 100644 index 000000000000..63e60e272771 --- /dev/null +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CMPXCHG_H +#define __ASM_CMPXCHG_H + +#include +#include + +#define __xchg_asm(amswap_db, m, val) \ +({ \ + __typeof(val) __ret; \ + \ + __asm__ __volatile__ ( \ + " "amswap_db" %1, %z2, %0 \n" \ + : "+ZB" (*m), "=&r" (__ret) \ + : "Jr" (val) \ + : "memory"); \ + \ + __ret; \ +}) + +extern unsigned long __xchg_small(volatile void *ptr, unsigned long x, + unsigned int size); + +static inline unsigned long __xchg(volatile void *ptr, unsigned long x, + int size) +{ + switch (size) { + case 1: + case 2: + return __xchg_small(ptr, x, size); + + case 4: + return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); + + case 8: + return __xchg_asm("amswap_db.d", (volatile u64 *)ptr, (u64)x); + + default: + BUILD_BUG(); + } + + return 0; +} + +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __res; \ + \ + __res = (__typeof__(*(ptr))) \ + __xchg((ptr), (unsigned long)(x), sizeof(*(ptr))); \ + \ + __res; \ +}) + +#define __cmpxchg_asm(ld, st, m, old, new) \ +({ \ + __typeof(old) __ret; \ + \ + __asm__ __volatile__( \ + "1: " ld " %0, %2 # __cmpxchg_asm \n" \ + " bne %0, %z3, 2f \n" \ + " or $t0, %z4, $zero \n" \ + " " st " $t0, %1 \n" \ + " beq $zero, $t0, 1b \n" \ + "2: \n" \ + : "=&r" (__ret), "=ZB"(*m) \ + : "ZB"(*m), "Jr" (old), "Jr" (new) \ + : "t0", "memory"); \ + \ + __ret; \ +}) + +extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size); + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) +{ + switch (size) { + case 1: + case 2: + return __cmpxchg_small(ptr, old, new, size); + + case 4: + return __cmpxchg_asm("ll.w", "sc.w", (volatile u32 *)ptr, + (u32)old, new); + + case 8: + return __cmpxchg_asm("ll.d", "sc.d", (volatile u64 *)ptr, + (u64)old, new); + + default: + BUILD_BUG(); + } + + return 0; +} + +#define cmpxchg_local(ptr, old, new) \ + ((__typeof__(*(ptr))) \ + __cmpxchg((ptr), \ + (unsigned long)(__typeof__(*(ptr)))(old), \ + (unsigned long)(__typeof__(*(ptr)))(new), \ + sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ +({ \ + __typeof__(*(ptr)) __res; \ + \ + __res = cmpxchg_local((ptr), (old), (new)); \ + \ + __res; \ +}) + +#ifdef CONFIG_64BIT +#define cmpxchg64_local(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ + }) + +#define cmpxchg64(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg((ptr), (o), (n)); \ + }) +#else +#include +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n)) +#endif + +#endif /* __ASM_CMPXCHG_H */ diff --git a/arch/loongarch/include/asm/compiler.h b/arch/loongarch/include/asm/compiler.h new file mode 100644 index 000000000000..2dc4d84b2a32 --- /dev/null +++ b/arch/loongarch/include/asm/compiler.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef _ASM_COMPILER_H +#define _ASM_COMPILER_H + +#define GCC_OFF_SMALL_ASM() "ZC" + +#define LOONGARCH_ISA_LEVEL "loongarch" +#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch" +#define LOONGARCH_ISA_LEVEL_RAW loongarch +#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW + +#endif /* _ASM_COMPILER_H */ diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h new file mode 100644 index 000000000000..dca5c43851f2 --- /dev/null +++ b/arch/loongarch/include/asm/cpu-features.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CPU_FEATURES_H +#define __ASM_CPU_FEATURES_H + +#include +#include + +#define cpu_opt(opt) (cpu_data[0].options & (opt)) +#define cpu_has(feat) (cpu_data[0].options & BIT_ULL(feat)) + +#define cpu_has_loongarch (cpu_has_loongarch32 | cpu_has_loongarch64) +#define cpu_has_loongarch32 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT) +#define cpu_has_loongarch64 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) + +#define cpu_icache_line_size() cpu_data[0].icache.linesz +#define cpu_dcache_line_size() cpu_data[0].dcache.linesz +#define cpu_vcache_line_size() cpu_data[0].vcache.linesz +#define cpu_scache_line_size() cpu_data[0].scache.linesz + +#ifdef CONFIG_32BIT +# define cpu_has_64bits (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) +# define cpu_vabits 31 +# define cpu_pabits 31 +#endif + +#ifdef CONFIG_64BIT +# define cpu_has_64bits 1 +# define cpu_vabits cpu_data[0].vabits +# define cpu_pabits cpu_data[0].pabits +# define __NEED_ADDRBITS_PROBE +#endif + +/* + * SMP assumption: Options of CPU 0 are a superset of all processors. + * This is true for all known LoongArch systems. + */ +#define cpu_has_cpucfg cpu_opt(LOONGARCH_CPU_CPUCFG) +#define cpu_has_lam cpu_opt(LOONGARCH_CPU_LAM) +#define cpu_has_ual cpu_opt(LOONGARCH_CPU_UAL) +#define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) +#define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX) +#define cpu_has_lasx cpu_opt(LOONGARCH_CPU_LASX) +#define cpu_has_complex cpu_opt(LOONGARCH_CPU_COMPLEX) +#define cpu_has_crypto cpu_opt(LOONGARCH_CPU_CRYPTO) +#define cpu_has_lvz cpu_opt(LOONGARCH_CPU_LVZ) +#define cpu_has_lbt_x86 cpu_opt(LOONGARCH_CPU_LBT_X86) +#define cpu_has_lbt_arm cpu_opt(LOONGARCH_CPU_LBT_ARM) +#define cpu_has_lbt_mips cpu_opt(LOONGARCH_CPU_LBT_MIPS) +#define cpu_has_lbt (cpu_has_lbt_x86|cpu_has_lbt_arm|cpu_has_lbt_mips) +#define cpu_has_csr cpu_opt(LOONGARCH_CPU_CSR) +#define cpu_has_tlb cpu_opt(LOONGARCH_CPU_TLB) +#define cpu_has_watch cpu_opt(LOONGARCH_CPU_WATCH) +#define cpu_has_vint cpu_opt(LOONGARCH_CPU_VINT) +#define cpu_has_csripi cpu_opt(LOONGARCH_CPU_CSRIPI) +#define cpu_has_extioi cpu_opt(LOONGARCH_CPU_EXTIOI) +#define cpu_has_prefetch cpu_opt(LOONGARCH_CPU_PREFETCH) +#define cpu_has_pmp cpu_opt(LOONGARCH_CPU_PMP) +#define cpu_has_perf cpu_opt(LOONGARCH_CPU_PMP) +#define cpu_has_scalefreq cpu_opt(LOONGARCH_CPU_SCALEFREQ) +#define cpu_has_flatmode cpu_opt(LOONGARCH_CPU_FLATMODE) +#define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE) +#define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) +#define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) + + +#endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h new file mode 100644 index 000000000000..6a025fa1ad66 --- /dev/null +++ b/arch/loongarch/include/asm/cpu-info.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CPU_INFO_H +#define __ASM_CPU_INFO_H + +#include +#include + +#include + +/* + * Descriptor for a cache + */ +struct cache_desc { + unsigned int waysize; /* Bytes per way */ + unsigned short sets; /* Number of lines per set */ + unsigned char ways; /* Number of ways */ + unsigned char linesz; /* Size of line in bytes */ + unsigned char waybit; /* Bits to select in a cache set */ + unsigned char flags; /* Flags describing cache properties */ +}; + +struct cpuinfo_loongarch { + u64 asid_cache; + unsigned long asid_mask; + + /* + * Capability and feature descriptor structure for LoongArch CPU + */ + unsigned long ases; + unsigned long long options; + unsigned int processor_id; + unsigned int fpu_vers; + unsigned int fpu_csr0; + unsigned int fpu_mask; + unsigned int cputype; + int isa_level; + int tlbsize; + int tlbsizemtlb; + int tlbsizestlbsets; + int tlbsizestlbways; + struct cache_desc icache; /* Primary I-cache */ + struct cache_desc dcache; /* Primary D or combined I/D cache */ + struct cache_desc vcache; /* Victim cache, between pcache and scache */ + struct cache_desc scache; /* Secondary cache */ + struct cache_desc tcache; /* Tertiary/split secondary cache */ + int core; /* physical core number in package */ + int package;/* physical package number */ + int vabits; /* Virtual Address size in bits */ + int pabits; /* Physical Address size in bits */ + unsigned int ksave_mask; /* Usable KSave mask. */ + unsigned int watch_dreg_count; /* Number data breakpoints */ + unsigned int watch_ireg_count; /* Number instruction breakpoints */ + unsigned int watch_reg_use_cnt; /* min(NUM_WATCH_REGS, watch_dreg_count + watch_ireg_count), Usable by ptrace */ +} __attribute__((aligned(SMP_CACHE_BYTES))); + +extern struct cpuinfo_loongarch cpu_data[]; +#define boot_cpu_data cpu_data[0] +#define current_cpu_data cpu_data[smp_processor_id()] +#define raw_current_cpu_data cpu_data[raw_smp_processor_id()] + +extern void cpu_probe(void); + +extern const char *__cpu_family[]; +extern const char *__cpu_full_name[]; +#define cpu_family_string() __cpu_family[raw_smp_processor_id()] +#define cpu_full_name_string() __cpu_full_name[raw_smp_processor_id()] + +struct seq_file; +struct notifier_block; + +extern int register_proc_cpuinfo_notifier(struct notifier_block *nb); +extern int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v); + +#define proc_cpuinfo_notifier(fn, pri) \ +({ \ + static struct notifier_block fn##_nb = { \ + .notifier_call = fn, \ + .priority = pri \ + }; \ + \ + register_proc_cpuinfo_notifier(&fn##_nb); \ +}) + +struct proc_cpuinfo_notifier_args { + struct seq_file *m; + unsigned long n; +}; + +static inline bool cpus_are_siblings(int cpua, int cpub) +{ + struct cpuinfo_loongarch *infoa = &cpu_data[cpua]; + struct cpuinfo_loongarch *infob = &cpu_data[cpub]; + + if (infoa->package != infob->package) + return false; + + if (infoa->core != infob->core) + return false; + + return true; +} + +static inline unsigned long cpu_asid_mask(struct cpuinfo_loongarch *cpuinfo) +{ + return cpuinfo->asid_mask; +} + +static inline void set_cpu_asid_mask(struct cpuinfo_loongarch *cpuinfo, + unsigned long asid_mask) +{ + cpuinfo->asid_mask = asid_mask; +} + +#endif /* __ASM_CPU_INFO_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h new file mode 100644 index 000000000000..63d7094610be --- /dev/null +++ b/arch/loongarch/include/asm/cpu.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * cpu.h: Values of the PRID register used to match up + * various LoongArch cpu types. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_CPU_H +#define _ASM_CPU_H + +/* + * As described in LoongArch specs from Loongson Technology, the PRID register + * (CPUCFG.00) has the following layout: + * + * +---------------+----------------+------------+--------------------+ + * | Reserved | Company ID | Series ID | Product ID | + * +---------------+----------------+------------+--------------------+ + * 31 24 23 16 15 12 11 0 + */ + +/* + * Assigned Company values for bits 23:16 of the PRID register. + */ + +#define PRID_COMP_MASK 0xff0000 + +#define PRID_COMP_LOONGSON 0x140000 + +/* + * Assigned Series ID values for bits 15:12 of the PRID register. In order + * to detect a certain CPU type exactly eventually additional registers may + * need to be examined. + */ + +#define PRID_SERIES_MASK 0xf000 + +#define PRID_SERIES_LA132 0x8000 /* Loongson 32bit */ +#define PRID_SERIES_LA264 0xa000 /* Loongson 64bit, 2-issue */ +#define PRID_SERIES_LA364 0xb000 /* Loongson 64bit,3-issue */ +#define PRID_SERIES_LA464 0xc000 /* Loongson 64bit, 4-issue */ +#define PRID_SERIES_LA664 0xd000 /* Loongson 64bit, 6-issue */ + +/* + * Particular Product ID values for bits 11:0 of the PRID register. + */ + +#define PRID_PRODUCT_MASK 0x0fff + +#if !defined(__ASSEMBLY__) + +enum cpu_type_enum { + CPU_UNKNOWN, + CPU_LOONGSON32, + CPU_LOONGSON64, + CPU_LAST +}; + +#endif /* !__ASSEMBLY */ + +/* + * ISA Level encodings + * + */ + +#define LOONGARCH_CPU_ISA_LA32R 0x00000001 +#define LOONGARCH_CPU_ISA_LA32S 0x00000002 +#define LOONGARCH_CPU_ISA_LA64 0x00000004 + +#define LOONGARCH_CPU_ISA_32BIT (LOONGARCH_CPU_ISA_LA32R | LOONGARCH_CPU_ISA_LA32S) +#define LOONGARCH_CPU_ISA_64BIT LOONGARCH_CPU_ISA_LA64 + +/* + * CPU Option encodings + */ +#define CPU_FEATURE_CPUCFG 0 /* CPU has CPUCFG */ +#define CPU_FEATURE_LAM 1 /* CPU has Atomic instructions */ +#define CPU_FEATURE_UAL 2 /* CPU supports unaligned access */ +#define CPU_FEATURE_FPU 3 /* CPU has FPU */ +#define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */ +#define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */ +#define CPU_FEATURE_COMPLEX 6 /* CPU has Complex instructions */ +#define CPU_FEATURE_CRYPTO 7 /* CPU has Crypto instructions */ +#define CPU_FEATURE_LVZ 8 /* CPU has Virtualization extension */ +#define CPU_FEATURE_LBT_X86 9 /* CPU has X86 Binary Translation */ +#define CPU_FEATURE_LBT_ARM 10 /* CPU has ARM Binary Translation */ +#define CPU_FEATURE_LBT_MIPS 11 /* CPU has MIPS Binary Translation */ +#define CPU_FEATURE_TLB 12 /* CPU has TLB */ +#define CPU_FEATURE_CSR 13 /* CPU has CSR */ +#define CPU_FEATURE_WATCH 14 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 15 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 16 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 17 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 18 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 19 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 20 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 21 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 22 /* CPU has extioi int pin decode mode */ +#define CPU_FEATURE_GUESTID 23 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 24 /* CPU has hypervisor (running in VM) */ + +#define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) +#define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) +#define LOONGARCH_CPU_UAL BIT_ULL(CPU_FEATURE_UAL) +#define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) +#define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX) +#define LOONGARCH_CPU_LASX BIT_ULL(CPU_FEATURE_LASX) +#define LOONGARCH_CPU_COMPLEX BIT_ULL(CPU_FEATURE_COMPLEX) +#define LOONGARCH_CPU_CRYPTO BIT_ULL(CPU_FEATURE_CRYPTO) +#define LOONGARCH_CPU_LVZ BIT_ULL(CPU_FEATURE_LVZ) +#define LOONGARCH_CPU_LBT_X86 BIT_ULL(CPU_FEATURE_LBT_X86) +#define LOONGARCH_CPU_LBT_ARM BIT_ULL(CPU_FEATURE_LBT_ARM) +#define LOONGARCH_CPU_LBT_MIPS BIT_ULL(CPU_FEATURE_LBT_MIPS) +#define LOONGARCH_CPU_TLB BIT_ULL(CPU_FEATURE_TLB) +#define LOONGARCH_CPU_CSR BIT_ULL(CPU_FEATURE_CSR) +#define LOONGARCH_CPU_WATCH BIT_ULL(CPU_FEATURE_WATCH) +#define LOONGARCH_CPU_VINT BIT_ULL(CPU_FEATURE_VINT) +#define LOONGARCH_CPU_CSRIPI BIT_ULL(CPU_FEATURE_CSRIPI) +#define LOONGARCH_CPU_EXTIOI BIT_ULL(CPU_FEATURE_EXTIOI) +#define LOONGARCH_CPU_PREFETCH BIT_ULL(CPU_FEATURE_PREFETCH) +#define LOONGARCH_CPU_PMP BIT_ULL(CPU_FEATURE_PMP) +#define LOONGARCH_CPU_SCALEFREQ BIT_ULL(CPU_FEATURE_SCALEFREQ) +#define LOONGARCH_CPU_FLATMODE BIT_ULL(CPU_FEATURE_FLATMODE) +#define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE) +#define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) +#define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) +#endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/cpufeature.h b/arch/loongarch/include/asm/cpufeature.h new file mode 100644 index 000000000000..8cf9cc9cd072 --- /dev/null +++ b/arch/loongarch/include/asm/cpufeature.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CPU feature definitions for module loading, used by + * module_cpu_feature_match(), see uapi/asm/hwcap.h for LoongArch CPU features. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include +#include + +#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) + +#define cpu_feature(x) ilog2(HWCAP_ ## x) + +static inline bool cpu_have_feature(unsigned int num) +{ + return elf_hwcap & (1UL << num); +} + +#endif /* __ASM_CPUFEATURE_H */ diff --git a/arch/loongarch/include/asm/delay.h b/arch/loongarch/include/asm/delay.h new file mode 100644 index 000000000000..9536bcc3cfae --- /dev/null +++ b/arch/loongarch/include/asm/delay.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 by Waldorf Electronics + * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_DELAY_H +#define _ASM_DELAY_H + +#include + +extern void __delay(unsigned long cycles); +extern void __ndelay(unsigned long ns); +extern void __udelay(unsigned long us); + +#define ndelay(ns) __ndelay(ns) +#define udelay(us) __udelay(us) + +/* make sure "usecs *= ..." in udelay do not overflow. */ +#if HZ >= 1000 +#define MAX_UDELAY_MS 1 +#elif HZ <= 200 +#define MAX_UDELAY_MS 5 +#else +#define MAX_UDELAY_MS (1000 / HZ) +#endif + +#endif /* _ASM_DELAY_H */ diff --git a/arch/loongarch/include/asm/dma-direct.h b/arch/loongarch/include/asm/dma-direct.h new file mode 100644 index 000000000000..7056788e9c1e --- /dev/null +++ b/arch/loongarch/include/asm/dma-direct.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#ifndef _LOONGARCH_DMA_DIRECT_H +#define _LOONGARCH_DMA_DIRECT_H + +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); + +#endif /* _LOONGARCH_DMA_DIRECT_H */ diff --git a/arch/loongarch/include/asm/dma.h b/arch/loongarch/include/asm/dma.h new file mode 100644 index 000000000000..7e5b3e7d462b --- /dev/null +++ b/arch/loongarch/include/asm/dma.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + */ +#ifndef __ASM_DMA_H +#define __ASM_DMA_H + +#define MAX_DMA_ADDRESS PAGE_OFFSET +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) + +extern int isa_dma_bridge_buggy; + +#endif diff --git a/arch/loongarch/include/asm/dmi.h b/arch/loongarch/include/asm/dmi.h new file mode 100644 index 000000000000..cdc29017c3fd --- /dev/null +++ b/arch/loongarch/include/asm/dmi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_DMI_H +#define _ASM_DMI_H + +#include +#include + +#define dmi_early_remap(x, l) dmi_remap(x, l) +#define dmi_early_unmap(x, l) dmi_unmap(x) +#define dmi_alloc(l) memblock_alloc(l, PAGE_SIZE) + +static inline void *dmi_remap(u64 phys_addr, unsigned long size) +{ + return ((void *)TO_CACHE(phys_addr)); +} + +static inline void dmi_unmap(void *addr) +{ +} + +#endif /* _ASM_DMI_H */ diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h new file mode 100644 index 000000000000..8a1f6521781a --- /dev/null +++ b/arch/loongarch/include/asm/efi.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_LOONGARCH_EFI_H +#define _ASM_LOONGARCH_EFI_H + +#include + +void __init efi_init(void); +void __init efi_runtime_init(void); + +#define ARCH_EFI_IRQ_FLAGS_MASK 0x00000004 /* Bit 2: CSR.CRMD.IE */ + +static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) +{ +} + +#define arch_efi_call_virt_setup() \ +({ \ +}) + +#define arch_efi_call_virt(p, f, args...) \ +({ \ + efi_##f##_t * __f; \ + __f = p->f; \ + __f(args); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ +}) + +#define EFI_ALLOC_ALIGN SZ_64K + +struct screen_info *alloc_screen_info(void); +void free_screen_info(struct screen_info *si); + +static inline unsigned long efi_get_max_fdt_addr(unsigned long image_addr) +{ + return ULONG_MAX; +} + +static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) +{ + return ULONG_MAX; +} + +#endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h new file mode 100644 index 000000000000..f881bd16514d --- /dev/null +++ b/arch/loongarch/include/asm/elf.h @@ -0,0 +1,302 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Much of this is taken from binutils and GNU libc ... + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ELF_H +#define _ASM_ELF_H + +#include +#include +#include + +#include +#include + +/* The ABI of a file. */ +#define EF_LOONGARCH_ABI_LP64_SOFT_FLOAT 0x1 +#define EF_LOONGARCH_ABI_LP64_SINGLE_FLOAT 0x2 +#define EF_LOONGARCH_ABI_LP64_DOUBLE_FLOAT 0x3 + +#define EF_LOONGARCH_ABI_ILP32_SOFT_FLOAT 0x5 +#define EF_LOONGARCH_ABI_ILP32_SINGLE_FLOAT 0x6 +#define EF_LOONGARCH_ABI_ILP32_DOUBLE_FLOAT 0x7 + +/* LoongArch relocation types used by the dynamic linker */ +#define R_LARCH_NONE 0 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_RELATIVE 3 +#define R_LARCH_COPY 4 +#define R_LARCH_JUMP_SLOT 5 +#define R_LARCH_TLS_DTPMOD32 6 +#define R_LARCH_TLS_DTPMOD64 7 +#define R_LARCH_TLS_DTPREL32 8 +#define R_LARCH_TLS_DTPREL64 9 +#define R_LARCH_TLS_TPREL32 10 +#define R_LARCH_TLS_TPREL64 11 +#define R_LARCH_IRELATIVE 12 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_MARK_PCREL 21 +#define R_LARCH_SOP_PUSH_PCREL 22 +#define R_LARCH_SOP_PUSH_ABSOLUTE 23 +#define R_LARCH_SOP_PUSH_DUP 24 +#define R_LARCH_SOP_PUSH_GPREL 25 +#define R_LARCH_SOP_PUSH_TLS_TPREL 26 +#define R_LARCH_SOP_PUSH_TLS_GOT 27 +#define R_LARCH_SOP_PUSH_TLS_GD 28 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#define R_LARCH_SOP_ASSERT 30 +#define R_LARCH_SOP_NOT 31 +#define R_LARCH_SOP_SUB 32 +#define R_LARCH_SOP_SL 33 +#define R_LARCH_SOP_SR 34 +#define R_LARCH_SOP_ADD 35 +#define R_LARCH_SOP_AND 36 +#define R_LARCH_SOP_IF_ELSE 37 +#define R_LARCH_SOP_POP_32_S_10_5 38 +#define R_LARCH_SOP_POP_32_U_10_12 39 +#define R_LARCH_SOP_POP_32_S_10_12 40 +#define R_LARCH_SOP_POP_32_S_10_16 41 +#define R_LARCH_SOP_POP_32_S_10_16_S2 42 +#define R_LARCH_SOP_POP_32_S_5_20 43 +#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 +#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 +#define R_LARCH_SOP_POP_32_U 46 +#define R_LARCH_ADD8 47 +#define R_LARCH_ADD16 48 +#define R_LARCH_ADD24 49 +#define R_LARCH_ADD32 50 +#define R_LARCH_ADD64 51 +#define R_LARCH_SUB8 52 +#define R_LARCH_SUB16 53 +#define R_LARCH_SUB24 54 +#define R_LARCH_SUB32 55 +#define R_LARCH_SUB64 56 +#define R_LARCH_GNU_VTINHERIT 57 +#define R_LARCH_GNU_VTENTRY 58 + +#ifndef ELF_ARCH + +/* ELF register definitions */ + +/* + * General purpose have the following registers: + * Register Number + * GPRs 32 + * ORIG_A0 1 + * ERA 1 + * BADVADDR 1 + * CRMD 1 + * PRMD 1 + * EUEN 1 + * ECFG 1 + * ESTAT 1 + * Reserved 5 + */ +#define ELF_NGREG 45 + +/* + * Floating point have the following registers: + * Register Number + * FPR 32 + * FCC 1 + * FCSR 1 + */ +#define ELF_NFPREG 34 + +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs); + +#ifdef CONFIG_32BIT +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch elf32_check_arch + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS32 + +#define ELF_CORE_COPY_REGS(dest, regs) \ + loongarch_dump_regs32((u32 *)&(dest), (regs)); + +#endif /* CONFIG_32BIT */ + +#ifdef CONFIG_64BIT +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch elf64_check_arch + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 + +#define ELF_CORE_COPY_REGS(dest, regs) \ + loongarch_dump_regs64((u64 *)&(dest), (regs)); + +#endif /* CONFIG_64BIT */ + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_LOONGARCH + +#endif /* !defined(ELF_ARCH) */ + +#define loongarch_elf_check_machine(x) ((x)->e_machine == EM_LOONGARCH) + +#define vmcore_elf32_check_arch loongarch_elf_check_machine +#define vmcore_elf64_check_arch loongarch_elf_check_machine + +/* + * Return non-zero if HDR identifies an 32bit ELF binary. + */ +#define elf32_check_arch(hdr) \ +({ \ + int __res = 1; \ + struct elfhdr *__h = (hdr); \ + \ + if (!loongarch_elf_check_machine(__h)) \ + __res = 0; \ + if (__h->e_ident[EI_CLASS] != ELFCLASS32) \ + __res = 0; \ + \ + __res; \ +}) + +/* + * Return non-zero if HDR identifies an 64bit ELF binary. + */ +#define elf64_check_arch(hdr) \ +({ \ + int __res = 1; \ + struct elfhdr *__h = (hdr); \ + \ + if (!loongarch_elf_check_machine(__h)) \ + __res = 0; \ + if (__h->e_ident[EI_CLASS] != ELFCLASS64) \ + __res = 0; \ + \ + __res; \ +}) + +#ifdef CONFIG_32BIT + +#define SET_PERSONALITY2(ex, state) \ +do { \ + current->thread.vdso = &vdso_info; \ + \ + loongarch_set_personality_fcsr(state); \ + \ + if (personality(current->personality) != PER_LINUX) \ + set_personality(PER_LINUX); \ +} while (0) + +#endif /* CONFIG_32BIT */ + +#ifdef CONFIG_64BIT + +#define SET_PERSONALITY2(ex, state) \ +do { \ + unsigned int p; \ + \ + clear_thread_flag(TIF_32BIT_REGS); \ + clear_thread_flag(TIF_32BIT_ADDR); \ + \ + current->thread.vdso = &vdso_info; \ + loongarch_set_personality_fcsr(state); \ + \ + p = personality(current->personality); \ + if (p != PER_LINUX32 && p != PER_LINUX) \ + set_personality(PER_LINUX); \ +} while (0) + +#endif /* CONFIG_64BIT */ + +#define CORE_DUMP_USE_REGSET +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* This yields a mask that user programs can use to figure out what + instruction set this cpu supports. This could be done in userspace, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP (elf_hwcap) +extern unsigned int elf_hwcap; +#include + +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ + +#define ELF_PLATFORM __elf_platform +extern const char *__elf_platform; + +#define ELF_PLAT_INIT(_r, load_addr) do { \ + _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ + _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ + _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ + _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ + _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ + _r->regs[25] = _r->regs[26] = _r->regs[27] = _r->regs[28] = 0; \ + _r->regs[29] = _r->regs[30] = _r->regs[31] = 0; \ +} while (0) + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. */ + +#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ +#define ARCH_DLINFO \ +do { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); + +struct arch_elf_state { + int fp_abi; + int interp_fp_abi; +}; + +#define LOONGARCH_ABI_FP_ANY (0) + +#define INIT_ARCH_ELF_STATE { \ + .fp_abi = LOONGARCH_ABI_FP_ANY, \ + .interp_fp_abi = LOONGARCH_ABI_FP_ANY, \ +} + +#define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT) + +extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf, + bool is_interp, struct arch_elf_state *state); + +extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr, + struct arch_elf_state *state); + +extern void loongarch_set_personality_fcsr(struct arch_elf_state *state); + +#endif /* _ASM_ELF_H */ diff --git a/arch/loongarch/include/asm/entry-common.h b/arch/loongarch/include/asm/entry-common.h new file mode 100644 index 000000000000..0fe2a098ded9 --- /dev/null +++ b/arch/loongarch/include/asm/entry-common.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_LOONGARCH_ENTRY_COMMON_H +#define ARCH_LOONGARCH_ENTRY_COMMON_H + +#include +#include + +static inline bool on_thread_stack(void) +{ + return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); +} + +#endif diff --git a/arch/loongarch/include/asm/exec.h b/arch/loongarch/include/asm/exec.h new file mode 100644 index 000000000000..371d5fb327a8 --- /dev/null +++ b/arch/loongarch/include/asm/exec.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 95, 96, 97, 98, 99, 2003, 06 by Ralf Baechle + * Copyright (C) 1996 by Paul M. Antoine + * Copyright (C) 1999 Silicon Graphics + * Kevin D. Kissell, kevink@mips.org and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_EXEC_H +#define _ASM_EXEC_H + +extern unsigned long arch_align_stack(unsigned long sp); + +#endif /* _ASM_EXEC_H */ diff --git a/arch/loongarch/include/asm/fb.h b/arch/loongarch/include/asm/fb.h new file mode 100644 index 000000000000..5cd96907d913 --- /dev/null +++ b/arch/loongarch/include/asm/fb.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is released under the GPLv2 + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ + +#include +#include +#include + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); +} + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h new file mode 100644 index 000000000000..cd810ee0d251 --- /dev/null +++ b/arch/loongarch/include/asm/fixmap.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include + +#define NR_FIX_BTMAPS 64 + +#endif diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h new file mode 100644 index 000000000000..caf528ae6428 --- /dev/null +++ b/arch/loongarch/include/asm/fpregdef.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definitions for the FPU register names + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FPREGDEF_H +#define _ASM_FPREGDEF_H + +#define fa0 $f0 /* argument registers, fa0/fa1 reused as fv0/fv1 for return value */ +#define fa1 $f1 +#define fa2 $f2 +#define fa3 $f3 +#define fa4 $f4 +#define fa5 $f5 +#define fa6 $f6 +#define fa7 $f7 +#define ft0 $f8 /* caller saved */ +#define ft1 $f9 +#define ft2 $f10 +#define ft3 $f11 +#define ft4 $f12 +#define ft5 $f13 +#define ft6 $f14 +#define ft7 $f15 +#define ft8 $f16 +#define ft9 $f17 +#define ft10 $f18 +#define ft11 $f19 +#define ft12 $f20 +#define ft13 $f21 +#define ft14 $f22 +#define ft15 $f23 +#define fs0 $f24 /* callee saved */ +#define fs1 $f25 +#define fs2 $f26 +#define fs3 $f27 +#define fs4 $f28 +#define fs5 $f29 +#define fs6 $f30 +#define fs7 $f31 + +/* + * Current binutils expects *GPRs* at FCSR position for the FCSR + * operation instructions, so define aliases for those used. + */ +#define fcsr0 $r0 +#define fcsr1 $r1 +#define fcsr2 $r2 +#define fcsr3 $r3 +#define vcsr16 $r16 + +#endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h new file mode 100644 index 000000000000..d53ab744e151 --- /dev/null +++ b/arch/loongarch/include/asm/fpu.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FPU_H +#define _ASM_FPU_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +struct sigcontext; + +extern void _init_fpu(unsigned int); +extern void _save_fp(struct loongarch_fpu *); +extern void _restore_fp(struct loongarch_fpu *); + +/* + * Mask the FCSR Cause bits according to the Enable bits, observing + * that Unimplemented is always enabled. + */ +static inline unsigned long mask_fcsr_x(unsigned long fcsr) +{ + return fcsr & ((fcsr & FPU_CSR_ALL_E) << + (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E))); +} + +static inline int is_fp_enabled(void) +{ + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_FPEN) ? + 1 : 0; +} + +#define enable_fpu() \ +do { \ + set_csr_euen(CSR_EUEN_FPEN); \ +} while (0) + +#define disable_fpu() \ +do { \ + clear_csr_euen(CSR_EUEN_FPEN); \ +} while (0) + +#define clear_fpu_owner() clear_thread_flag(TIF_USEDFPU) + +static inline int is_fpu_owner(void) +{ + return test_thread_flag(TIF_USEDFPU); +} + +static inline void __own_fpu(void) +{ + enable_fpu(); + set_thread_flag(TIF_USEDFPU); + KSTK_EUEN(current) |= CSR_EUEN_FPEN; +} + +static inline void own_fpu_inatomic(int restore) +{ + if (cpu_has_fpu && !is_fpu_owner()) { + __own_fpu(); + if (restore) + _restore_fp(¤t->thread.fpu); + } +} + +static inline void own_fpu(int restore) +{ + preempt_disable(); + own_fpu_inatomic(restore); + preempt_enable(); +} + +static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) +{ + if (is_fpu_owner()) { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + clear_tsk_thread_flag(tsk, TIF_USEDFPU); + } + KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); +} + +static inline void lose_fpu(int save) +{ + preempt_disable(); + lose_fpu_inatomic(save, current); + preempt_enable(); +} + +static inline void init_fpu(void) +{ + unsigned int fcsr = current->thread.fpu.fcsr; + + __own_fpu(); + _init_fpu(fcsr); + set_used_math(); +} + +static inline void save_fp(struct task_struct *tsk) +{ + if (cpu_has_fpu) + _save_fp(&tsk->thread.fpu); +} + +static inline void restore_fp(struct task_struct *tsk) +{ + if (cpu_has_fpu) + _restore_fp(&tsk->thread.fpu); +} + +static inline union fpureg *get_fpu_regs(struct task_struct *tsk) +{ + if (tsk == current) { + preempt_disable(); + if (is_fpu_owner()) + _save_fp(¤t->thread.fpu); + preempt_enable(); + } + + return tsk->thread.fpu.fpr; +} + +#endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h new file mode 100644 index 000000000000..fa0ccf9ea024 --- /dev/null +++ b/arch/loongarch/include/asm/futex.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#include +#include +#include +#include +#include + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +{ \ + __asm__ __volatile__( \ + "1: ll.w %1, %4 # __futex_atomic_op\n" \ + " " insn " \n" \ + "2: sc.w $t0, %2 \n" \ + " beq $t0, $zero, 1b \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + "4: li.w %0, %6 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " "__UA_ADDR "\t1b, 4b \n" \ + " "__UA_ADDR "\t2b, 4b \n" \ + " .previous \n" \ + : "=r" (ret), "=&r" (oldval), \ + "=ZC" (*uaddr) \ + : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ + "i" (-EFAULT) \ + : "memory", "t0"); \ +} + +static inline int +arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) +{ + int oldval = 0, ret = 0; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("move $t0, %z5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("add.w $t0, %1, %z5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("or $t0, %1, %z5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("and $t0, %1, %z5", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("xor $t0, %1, %z5", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) + *oval = oldval; + + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) +{ + int ret = 0; + u32 val = 0; + + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__( + "# futex_atomic_cmpxchg_inatomic \n" + "1: ll.w %1, %3 \n" + " bne %1, %z4, 3f \n" + " or $t0, %z5, $zero \n" + "2: sc.w $t0, %2 \n" + " beq $zero, $t0, 1b \n" + "3: \n" + " .section .fixup,\"ax\" \n" + "4: li.d %0, %6 \n" + " b 3b \n" + " .previous \n" + " .section __ex_table,\"a\" \n" + " "__UA_ADDR "\t1b, 4b \n" + " "__UA_ADDR "\t2b, 4b \n" + " .previous \n" + : "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr) + : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), + "i" (-EFAULT) + : "memory", "t0"); + + *uval = val; + + return ret; +} + +#endif /* _ASM_FUTEX_H */ diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h new file mode 100644 index 000000000000..5effe88fce3e --- /dev/null +++ b/arch/loongarch/include/asm/hardirq.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_HARDIRQ_H +#define _ASM_HARDIRQ_H + +#include +#include +#include + +extern void ack_bad_irq(unsigned int irq); +#define ack_bad_irq ack_bad_irq + +#define NR_IPI 2 + +typedef struct { + unsigned int ipi_irqs[NR_IPI]; + unsigned int __softirq_pending; +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +#endif /* _ASM_HARDIRQ_H */ diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h new file mode 100644 index 000000000000..0729eb9441ff --- /dev/null +++ b/arch/loongarch/include/asm/hugetlb.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include + +uint64_t pmd_to_entrylo(unsigned long pmd_val); + +#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, + unsigned long len) +{ + unsigned long task_size = STACK_TOP; + struct hstate *h = hstate_file(file); + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + if (len > task_size) + return -ENOMEM; + if (task_size - len < addr) + return -EINVAL; + return 0; +} + +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t clear; + pte_t pte = *ptep; + + pte_val(clear) = (unsigned long)invalid_pte_table; + set_pte_at(mm, addr, ptep, clear); + return pte; +} + +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + flush_tlb_page(vma, addr); +} + +#define __HAVE_ARCH_HUGE_PTE_NONE +static inline int huge_pte_none(pte_t pte) +{ + unsigned long val = pte_val(pte) & ~_PAGE_GLOBAL; + return !val || (val == (unsigned long)invalid_pte_table); +} + +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep, pte_t pte, + int dirty) +{ + int changed = !pte_same(*ptep, pte); + + if (changed) { + set_pte_at(vma->vm_mm, addr, ptep, pte); + /* + * There could be some standard sized pages in there, + * get them all. + */ + flush_tlb_range(vma, addr, addr + HPAGE_SIZE); + } + return changed; +} + +#include + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/loongarch/include/asm/hw_irq.h b/arch/loongarch/include/asm/hw_irq.h new file mode 100644 index 000000000000..1633972591a0 --- /dev/null +++ b/arch/loongarch/include/asm/hw_irq.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000, 2001, 2002 by Ralf Baechle + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_HW_IRQ_H +#define __ASM_HW_IRQ_H + +#include + +extern atomic_t irq_err_count; + +/* + * interrupt-retrigger: NOP for now. This may not be appropriate for all + * machines, we'll see ... + */ + +#endif /* __ASM_HW_IRQ_H */ diff --git a/arch/loongarch/include/asm/idle.h b/arch/loongarch/include/asm/idle.h new file mode 100644 index 000000000000..f7f2b7dbf958 --- /dev/null +++ b/arch/loongarch/include/asm/idle.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_IDLE_H +#define __ASM_IDLE_H + +#include + +extern asmlinkage void __arch_cpu_idle(void); + +#endif /* __ASM_IDLE_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h new file mode 100644 index 000000000000..fc805abfbf84 --- /dev/null +++ b/arch/loongarch/include/asm/inst.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Format of an instruction in memory. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_INST_H +#define _ASM_INST_H + +#include +#include + +/* HACHACHAHCAHC ... */ + +/* In case some other massaging is needed, keep LOONGARCHInst as wrapper */ + +#define LOONGARCHInst(x) x + +#define I_OPCODE_SFT 26 +#define LOONGARCHInst_OPCODE(x) (LOONGARCHInst(x) >> I_OPCODE_SFT) + +#define I_JTARGET_SFT 0 +#define LOONGARCHInst_JTARGET(x) (LOONGARCHInst(x) & 0x03ffffff) + +#define I_RS_SFT 21 +#define LOONGARCHInst_RS(x) ((LOONGARCHInst(x) & 0x03e00000) >> I_RS_SFT) + +#define I_RT_SFT 16 +#define LOONGARCHInst_RT(x) ((LOONGARCHInst(x) & 0x001f0000) >> I_RT_SFT) + +#define I_IMM_SFT 0 +#define LOONGARCHInst_SIMM(x) ((int)((short)(LOONGARCHInst(x) & 0xffff))) +#define LOONGARCHInst_UIMM(x) (LOONGARCHInst(x) & 0xffff) + +#define I_CACHEOP_SFT 18 +#define LOONGARCHInst_CACHEOP(x) ((LOONGARCHInst(x) & 0x001c0000) >> I_CACHEOP_SFT) + +#define I_CACHESEL_SFT 16 +#define LOONGARCHInst_CACHESEL(x) ((LOONGARCHInst(x) & 0x00030000) >> I_CACHESEL_SFT) + +#define I_RD_SFT 11 +#define LOONGARCHInst_RD(x) ((LOONGARCHInst(x) & 0x0000f800) >> I_RD_SFT) + +#define I_RE_SFT 6 +#define LOONGARCHInst_RE(x) ((LOONGARCHInst(x) & 0x000007c0) >> I_RE_SFT) + +#define I_FUNC_SFT 0 +#define LOONGARCHInst_FUNC(x) (LOONGARCHInst(x) & 0x0000003f) + +#define I_FFMT_SFT 21 +#define LOONGARCHInst_FFMT(x) ((LOONGARCHInst(x) & 0x01e00000) >> I_FFMT_SFT) + +#define I_FT_SFT 16 +#define LOONGARCHInst_FT(x) ((LOONGARCHInst(x) & 0x001f0000) >> I_FT_SFT) + +#define I_FS_SFT 11 +#define LOONGARCHInst_FS(x) ((LOONGARCHInst(x) & 0x0000f800) >> I_FS_SFT) + +#define I_FD_SFT 6 +#define LOONGARCHInst_FD(x) ((LOONGARCHInst(x) & 0x000007c0) >> I_FD_SFT) + +#define I_FR_SFT 21 +#define LOONGARCHInst_FR(x) ((LOONGARCHInst(x) & 0x03e00000) >> I_FR_SFT) + +#define I_FMA_FUNC_SFT 2 +#define LOONGARCHInst_FMA_FUNC(x) ((LOONGARCHInst(x) & 0x0000003c) >> I_FMA_FUNC_SFT) + +#define I_FMA_FFMT_SFT 0 +#define LOONGARCHInst_FMA_FFMT(x) (LOONGARCHInst(x) & 0x00000003) + +struct pt_regs; +typedef unsigned int loongarch_instruction; + +/* Recode table from 16-bit register notation to 32-bit GPR. Do NOT export!!! */ +extern const int reg16to32[]; + +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc); +unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign); +unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n); + +#endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h new file mode 100644 index 000000000000..79c6022f5747 --- /dev/null +++ b/arch/loongarch/include/asm/io.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#define ARCH_HAS_IOREMAP_WC + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * On LoongArch, I/O ports mappring is following: + * + * | .... | + * |-----------------------| + * | pci io ports(64K~32M) | + * |-----------------------| + * | isa io ports(0 ~16K) | + * PCI_IOBASE ->|-----------------------| + * | .... | + */ +#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) +#define PCI_IOSIZE SZ_32M +#define ISA_IOSIZE SZ_16K +#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); +extern void __init early_iounmap(void __iomem *addr, unsigned long size); + +#define early_memremap early_ioremap +#define early_memunmap early_iounmap + +static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val) +{ + if (prot_val == _CACHE_CC) + return (void __iomem *)(unsigned long)(CACHE_BASE + offset); + else + return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset); +} + +/* + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ +#define ioremap(offset, size) \ + ioremap_prot((offset), (size), _CACHE_SUC) + +/* + * ioremap_wc - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_wc performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * but accelerated by means of write-combining feature. It is specifically + * useful for PCIe prefetchable windows, which may vastly improve a + * communications performance. If it was determined on boot stage, what + * CPU CCA doesn't support WUC, the method shall fall-back to the + * _CACHE_SUC option (see cpu_probe() method). + */ +#define ioremap_wc(offset, size) \ + ioremap_prot((offset), (size), _CACHE_WUC) + +/* + * ioremap_cache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_cache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked cachable by + * the CPU. Also enables full write-combining. Useful for some + * memory-like regions on I/O busses. + */ +#define ioremap_cache(offset, size) \ + ioremap_prot((offset), (size), _CACHE_CC) + +static inline void iounmap(const volatile void __iomem *addr) +{ +} + +#define mmiowb() asm volatile ("dbar 0" ::: "memory") + +/* + * String version of I/O memory access operations. + */ +extern void __memset_io(volatile void __iomem *dst, int c, size_t count); +extern void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count); +extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count); +#define memset_io(c, v, l) __memset_io((c), (v), (l)) +#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l)) +#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l)) + +#include + +#endif /* _ASM_IO_H */ diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h new file mode 100644 index 000000000000..e8c81770bb34 --- /dev/null +++ b/arch/loongarch/include/asm/irq.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +#include +#include +#include + +#define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START (IRQ_STACK_SIZE - 16) + +DECLARE_PER_CPU(unsigned long, irq_stack); + +/* + * The highest address on the IRQ stack contains a dummy frame put down in + * genex.S (except_vec_vi_handler) which is structured as follows: + * + * top ------------ + * | task sp | <- irq_stack[cpu] + IRQ_STACK_START + * ------------ + * | | <- First frame of IRQ context + * ------------ + * + * task sp holds a copy of the task stack pointer where the struct pt_regs + * from exception entry can be found. + */ + +static inline bool on_irq_stack(int cpu, unsigned long sp) +{ + unsigned long low = per_cpu(irq_stack, cpu); + unsigned long high = low + IRQ_STACK_SIZE; + + return (low <= sp && sp <= high); +} + +struct irq_data; +struct device_node; + +int get_ipi_irq(void); +int get_pmc_irq(void); +int get_timer_irq(void); +void arch_init_irq(void); +void spurious_interrupt(void); +struct irq_domain *loongarch_cpu_irq_init(void); + +#define NR_IRQS_LEGACY 16 + +void arch_trigger_cpumask_backtrace(const struct cpumask *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace + +#endif /* _ASM_IRQ_H */ diff --git a/arch/loongarch/include/asm/irq_regs.h b/arch/loongarch/include/asm/irq_regs.h new file mode 100644 index 000000000000..92e72213944e --- /dev/null +++ b/arch/loongarch/include/asm/irq_regs.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_IRQ_REGS_H +#define __ASM_IRQ_REGS_H + +#define ARCH_HAS_OWN_IRQ_REGS + +#include + +static inline struct pt_regs *get_irq_regs(void) +{ + return current_thread_info()->regs; +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + current_thread_info()->regs = new_regs; + + return old_regs; +} + +#endif /* __ASM_IRQ_REGS_H */ diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h new file mode 100644 index 000000000000..8811bdf884e7 --- /dev/null +++ b/arch/loongarch/include/asm/irqflags.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +static inline void arch_local_irq_enable(void) +{ + u32 flags = CSR_CRMD_IE; + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void arch_local_irq_disable(void) +{ + u32 flags = 0; + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline unsigned long arch_local_irq_save(void) +{ + u32 flags = 0; + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + return flags; +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline unsigned long arch_local_save_flags(void) +{ + u32 flags; + __asm__ __volatile__( + "csrrd %[val], %[reg]\n\t" + : [val] "=r" (flags) + : [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + return flags; +} + +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & CSR_CRMD_IE); +} + +static inline int arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +#endif /* #ifndef __ASSEMBLY__ */ + +#endif /* _ASM_IRQFLAGS_H */ diff --git a/arch/loongarch/include/asm/kdebug.h b/arch/loongarch/include/asm/kdebug.h new file mode 100644 index 000000000000..64cdaead21a8 --- /dev/null +++ b/arch/loongarch/include/asm/kdebug.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_LOONGARCH_KDEBUG_H +#define _ASM_LOONGARCH_KDEBUG_H + +#include + +enum die_val { + DIE_OOPS = 1, + DIE_RI, + DIE_FP, + DIE_SIMD, + DIE_TRAP, + DIE_PAGE_FAULT, + DIE_BREAK, + DIE_SSTEPBP, + DIE_UPROBE, + DIE_UPROBE_XOL, +}; + +#endif /* _ASM_LOONGARCH_KDEBUG_H */ diff --git a/arch/loongarch/include/asm/kmap_types.h b/arch/loongarch/include/asm/kmap_types.h new file mode 100644 index 000000000000..5c2173103727 --- /dev/null +++ b/arch/loongarch/include/asm/kmap_types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + +#include + +#endif diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h new file mode 100644 index 000000000000..81b0c4cfbf4f --- /dev/null +++ b/arch/loongarch/include/asm/linkage.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#define __ALIGN .align 2 +#define __ALIGN_STR __stringify(__ALIGN) + +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + .cfi_startproc; + +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + .cfi_startproc; + +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + .cfi_startproc; + +#define SYM_FUNC_END(name) \ + .cfi_endproc; \ + SYM_END(name, SYM_T_FUNC) + +#endif diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h new file mode 100644 index 000000000000..7eeff36450d1 --- /dev/null +++ b/arch/loongarch/include/asm/local.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ARCH_LOONGARCH_LOCAL_H +#define _ARCH_LOONGARCH_LOCAL_H + +#include +#include +#include +#include +#include + +typedef struct +{ + atomic_long_t a; +} local_t; + +#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } + +#define local_read(l) atomic_long_read(&(l)->a) +#define local_set(l, i) atomic_long_set(&(l)->a, (i)) + +#define local_add(i, l) atomic_long_add((i), (&(l)->a)) +#define local_sub(i, l) atomic_long_sub((i), (&(l)->a)) +#define local_inc(l) atomic_long_inc(&(l)->a) +#define local_dec(l) atomic_long_dec(&(l)->a) + +/* + * Same as above, but return the result value + */ +static __inline__ long local_add_return(long i, local_t * l) +{ + unsigned long result; + + __asm__ __volatile__( + " " __AMADD " %1, %2, %0 \n" + : "+ZB" (l->a.counter), "=&r" (result) + : "r" (i) + : "memory"); + result = result + i; + + return result; +} + +static __inline__ long local_sub_return(long i, local_t * l) +{ + unsigned long result; + + __asm__ __volatile__( + " " __AMADD "%1, %2, %0 \n" + : "+ZB" (l->a.counter), "=&r" (result) + : "r" (-i) + : "memory"); + + result = result - i; + + return result; +} + +#define local_cmpxchg(l, o, n) \ + ((long)cmpxchg_local(&((l)->a.counter), (o), (n))) +#define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n))) + +/** + * local_add_unless - add unless the number is a given value + * @l: pointer of type local_t + * @a: the amount to add to l... + * @u: ...unless l is equal to u. + * + * Atomically adds @a to @l, so long as it was not @u. + * Returns non-zero if @l was not @u, and zero otherwise. + */ +#define local_add_unless(l, a, u) \ +({ \ + long c, old; \ + c = local_read(l); \ + while (c != (u) && (old = local_cmpxchg((l), c, c + (a))) != c) \ + c = old; \ + c != (u); \ +}) +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + +#define local_dec_return(l) local_sub_return(1, (l)) +#define local_inc_return(l) local_add_return(1, (l)) + +/* + * local_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @l: pointer of type local_t + * + * Atomically subtracts @i from @l and returns + * true if the result is zero, or false for all + * other cases. + */ +#define local_sub_and_test(i, l) (local_sub_return((i), (l)) == 0) + +/* + * local_inc_and_test - increment and test + * @l: pointer of type local_t + * + * Atomically increments @l by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define local_inc_and_test(l) (local_inc_return(l) == 0) + +/* + * local_dec_and_test - decrement by 1 and test + * @l: pointer of type local_t + * + * Atomically decrements @l by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +#define local_dec_and_test(l) (local_sub_return(1, (l)) == 0) + +/* + * local_add_negative - add and test if negative + * @l: pointer of type local_t + * @i: integer value to add + * + * Atomically adds @i to @l and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +#define local_add_negative(i, l) (local_add_return(i, (l)) < 0) + +/* Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable, not an address. + */ + +#define __local_inc(l) ((l)->a.counter++) +#define __local_dec(l) ((l)->a.counter++) +#define __local_add(i, l) ((l)->a.counter+=(i)) +#define __local_sub(i, l) ((l)->a.counter-=(i)) + +#endif /* _ARCH_LOONGARCH_LOCAL_H */ diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h new file mode 100644 index 000000000000..1d93f34be853 --- /dev/null +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -0,0 +1,1516 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_LOONGARCHREGS_H +#define _ASM_LOONGARCHREGS_H + +#include +#include +#include + +#ifndef __ASSEMBLY__ +#include + +/* + * parse_r var, r - Helper assembler macro for parsing register names. + * + * This converts the register name in $n form provided in \r to the + * corresponding register number, which is assigned to the variable \var. It is + * needed to allow explicit encoding of instructions in inline assembly where + * registers are chosen by the compiler in $n form, allowing us to avoid using + * fixed register numbers. + * + * It also allows newer instructions (not implemented by the assembler) to be + * transparently implemented using assembler macros, instead of needing separate + * cases depending on toolchain support. + * + * Simple usage example: + * __asm__ __volatile__("parse_r __rt, %0\n\t" + * "# di %0\n\t" + * ".word (0x41606000 | (__rt << 16))" + * : "=r" (status); + */ + +/* Match an individual register number and assign to \var */ +#define _IFC_REG(n) \ + ".ifc \\r, $r" #n "\n\t" \ + "\\var = " #n "\n\t" \ + ".endif\n\t" + +__asm__(".macro parse_r var r\n\t" + "\\var = -1\n\t" + _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) + _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) + _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) + _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) + _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) + _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) + _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) + _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) + ".iflt \\var\n\t" + ".error \"Unable to parse register name \\r\"\n\t" + ".endif\n\t" + ".endm"); + +#undef _IFC_REG + +/* CPUCFG */ +static inline u32 read_cpucfg(u32 reg) +{ + return __cpucfg(reg); +} + +#endif /* !__ASSEMBLY__ */ + +/* LoongArch Registers */ +#define REG_ZERO 0x0 +#define REG_RA 0x1 +#define REG_TP 0x2 +#define REG_SP 0x3 +#define REG_A0 0x4 /* Reused as V0 for return value */ +#define REG_A1 0x5 /* Reused as V1 for return value */ +#define REG_A2 0x6 +#define REG_A3 0x7 +#define REG_A4 0x8 +#define REG_A5 0x9 +#define REG_A6 0xa +#define REG_A7 0xb +#define REG_T0 0xc +#define REG_T1 0xd +#define REG_T2 0xe +#define REG_T3 0xf +#define REG_T4 0x10 +#define REG_T5 0x11 +#define REG_T6 0x12 +#define REG_T7 0x13 +#define REG_T8 0x14 +#define REG_U0 0x15 /* Kernel uses it as percpu base */ +#define REG_FP 0x16 +#define REG_S0 0x17 +#define REG_S1 0x18 +#define REG_S2 0x19 +#define REG_S3 0x1a +#define REG_S4 0x1b +#define REG_S5 0x1c +#define REG_S6 0x1d +#define REG_S7 0x1e +#define REG_S8 0x1f + +/* Bit fields for CPUCFG registers */ +#define LOONGARCH_CPUCFG0 0x0 +#define CPUCFG0_PRID GENMASK(31, 0) + +#define LOONGARCH_CPUCFG1 0x1 +#define CPUCFG1_ISGR32 BIT(0) +#define CPUCFG1_ISGR64 BIT(1) +#define CPUCFG1_PAGING BIT(2) +#define CPUCFG1_IOCSR BIT(3) +#define CPUCFG1_PABITS GENMASK(11, 4) +#define CPUCFG1_VABITS GENMASK(19, 12) +#define CPUCFG1_UAL BIT(20) +#define CPUCFG1_RI BIT(21) +#define CPUCFG1_EP BIT(22) +#define CPUCFG1_RPLV BIT(23) +#define CPUCFG1_HUGEPG BIT(24) +#define CPUCFG1_IOCSRBRD BIT(25) +#define CPUCFG1_MSGINT BIT(26) + +#define LOONGARCH_CPUCFG2 0x2 +#define CPUCFG2_FP BIT(0) +#define CPUCFG2_FPSP BIT(1) +#define CPUCFG2_FPDP BIT(2) +#define CPUCFG2_FPVERS GENMASK(5, 3) +#define CPUCFG2_LSX BIT(6) +#define CPUCFG2_LASX BIT(7) +#define CPUCFG2_COMPLEX BIT(8) +#define CPUCFG2_CRYPTO BIT(9) +#define CPUCFG2_LVZP BIT(10) +#define CPUCFG2_LVZVER GENMASK(13, 11) +#define CPUCFG2_LLFTP BIT(14) +#define CPUCFG2_LLFTPREV GENMASK(17, 15) +#define CPUCFG2_X86BT BIT(18) +#define CPUCFG2_ARMBT BIT(19) +#define CPUCFG2_MIPSBT BIT(20) +#define CPUCFG2_LSPW BIT(21) +#define CPUCFG2_LAM BIT(22) + +#define LOONGARCH_CPUCFG3 0x3 +#define CPUCFG3_CCDMA BIT(0) +#define CPUCFG3_SFB BIT(1) +#define CPUCFG3_UCACC BIT(2) +#define CPUCFG3_LLEXC BIT(3) +#define CPUCFG3_SCDLY BIT(4) +#define CPUCFG3_LLDBAR BIT(5) +#define CPUCFG3_ITLBT BIT(6) +#define CPUCFG3_ICACHET BIT(7) +#define CPUCFG3_SPW_LVL GENMASK(10, 8) +#define CPUCFG3_SPW_HG_HF BIT(11) +#define CPUCFG3_RVA BIT(12) +#define CPUCFG3_RVAMAX GENMASK(16, 13) + +#define LOONGARCH_CPUCFG4 0x4 +#define CPUCFG4_CCFREQ GENMASK(31, 0) + +#define LOONGARCH_CPUCFG5 0x5 +#define CPUCFG5_CCMUL GENMASK(15, 0) +#define CPUCFG5_CCDIV GENMASK(31, 16) + +#define LOONGARCH_CPUCFG6 0x6 +#define CPUCFG6_PMP BIT(0) +#define CPUCFG6_PAMVER GENMASK(3, 1) +#define CPUCFG6_PMNUM GENMASK(7, 4) +#define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_UPM BIT(14) + +#define LOONGARCH_CPUCFG16 0x10 +#define CPUCFG16_L1_IUPRE BIT(0) +#define CPUCFG16_L1_IUUNIFY BIT(1) +#define CPUCFG16_L1_DPRE BIT(2) +#define CPUCFG16_L2_IUPRE BIT(3) +#define CPUCFG16_L2_IUUNIFY BIT(4) +#define CPUCFG16_L2_IUPRIV BIT(5) +#define CPUCFG16_L2_IUINCL BIT(6) +#define CPUCFG16_L2_DPRE BIT(7) +#define CPUCFG16_L2_DPRIV BIT(8) +#define CPUCFG16_L2_DINCL BIT(9) +#define CPUCFG16_L3_IUPRE BIT(10) +#define CPUCFG16_L3_IUUNIFY BIT(11) +#define CPUCFG16_L3_IUPRIV BIT(12) +#define CPUCFG16_L3_IUINCL BIT(13) +#define CPUCFG16_L3_DPRE BIT(14) +#define CPUCFG16_L3_DPRIV BIT(15) +#define CPUCFG16_L3_DINCL BIT(16) + +#define LOONGARCH_CPUCFG17 0x11 +#define CPUCFG17_L1I_WAYS_M GENMASK(15, 0) +#define CPUCFG17_L1I_SETS_M GENMASK(23, 16) +#define CPUCFG17_L1I_SIZE_M GENMASK(30, 24) +#define CPUCFG17_L1I_WAYS 0 +#define CPUCFG17_L1I_SETS 16 +#define CPUCFG17_L1I_SIZE 24 + +#define LOONGARCH_CPUCFG18 0x12 +#define CPUCFG18_L1D_WAYS_M GENMASK(15, 0) +#define CPUCFG18_L1D_SETS_M GENMASK(23, 16) +#define CPUCFG18_L1D_SIZE_M GENMASK(30, 24) +#define CPUCFG18_L1D_WAYS 0 +#define CPUCFG18_L1D_SETS 16 +#define CPUCFG18_L1D_SIZE 24 + +#define LOONGARCH_CPUCFG19 0x13 +#define CPUCFG19_L2_WAYS_M GENMASK(15, 0) +#define CPUCFG19_L2_SETS_M GENMASK(23, 16) +#define CPUCFG19_L2_SIZE_M GENMASK(30, 24) +#define CPUCFG19_L2_WAYS 0 +#define CPUCFG19_L2_SETS 16 +#define CPUCFG19_L2_SIZE 24 + +#define LOONGARCH_CPUCFG20 0x14 +#define CPUCFG20_L3_WAYS_M GENMASK(15, 0) +#define CPUCFG20_L3_SETS_M GENMASK(23, 16) +#define CPUCFG20_L3_SIZE_M GENMASK(30, 24) +#define CPUCFG20_L3_WAYS 0 +#define CPUCFG20_L3_SETS 16 +#define CPUCFG20_L3_SIZE 24 + +#define LOONGARCH_CPUCFG48 0x30 +#define CPUCFG48_MCSR_LCK BIT(0) +#define CPUCFG48_NAP_EN BIT(1) +#define CPUCFG48_VFPU_CG BIT(2) +#define CPUCFG48_RAM_CG BIT(3) + +#ifndef __ASSEMBLY__ + +/* CSR */ +static __always_inline u32 csr_read32(u32 reg) +{ + return __csrrd_w(reg); +} + +static __always_inline u64 csr_read64(u32 reg) +{ + return __csrrd_d(reg); +} + +static __always_inline void csr_write32(u32 val, u32 reg) +{ + __csrwr_w(val, reg); +} + +static __always_inline void csr_write64(u64 val, u32 reg) +{ + __csrwr_d(val, reg); +} + +static __always_inline u32 csr_xchg32(u32 val, u32 mask, u32 reg) +{ + return __csrxchg_w(val, mask, reg); +} + +static __always_inline u64 csr_xchg64(u64 val, u64 mask, u32 reg) +{ + return __csrxchg_d(val, mask, reg); +} + +/* IOCSR */ +static __always_inline u32 iocsr_read32(u32 reg) +{ + return __iocsrrd_w(reg); +} + +static __always_inline u64 iocsr_read64(u32 reg) +{ + return __iocsrrd_d(reg); +} + +static __always_inline void iocsr_write32(u32 val, u32 reg) +{ + __iocsrwr_w(val, reg); +} + +static __always_inline void iocsr_write64(u64 val, u32 reg) +{ + __iocsrwr_d(val, reg); +} + +#endif /* !__ASSEMBLY__ */ + +/* CSR register number */ + +/* Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_WE_SHIFT 9 +#define CSR_CRMD_WE (_ULCAST_(0x1) << CSR_CRMD_WE_SHIFT) +#define CSR_CRMD_DACM_SHIFT 7 +#define CSR_CRMD_DACM_WIDTH 2 +#define CSR_CRMD_DACM (_ULCAST_(0x3) << CSR_CRMD_DACM_SHIFT) +#define CSR_CRMD_DACF_SHIFT 5 +#define CSR_CRMD_DACF_WIDTH 2 +#define CSR_CRMD_DACF (_ULCAST_(0x3) << CSR_CRMD_DACF_SHIFT) +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG (_ULCAST_(0x1) << CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_DA_SHIFT 3 +#define CSR_CRMD_DA (_ULCAST_(0x1) << CSR_CRMD_DA_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE (_ULCAST_(0x1) << CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (_ULCAST_(0x3) << CSR_CRMD_PLV_SHIFT) + +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 + +#define LOONGARCH_CSR_PRMD 0x1 /* Prev-exception mode info */ +#define CSR_PRMD_PWE_SHIFT 3 +#define CSR_PRMD_PWE (_ULCAST_(0x1) << CSR_PRMD_PWE_SHIFT) +#define CSR_PRMD_PIE_SHIFT 2 +#define CSR_PRMD_PIE (_ULCAST_(0x1) << CSR_PRMD_PIE_SHIFT) +#define CSR_PRMD_PPLV_SHIFT 0 +#define CSR_PRMD_PPLV_WIDTH 2 +#define CSR_PRMD_PPLV (_ULCAST_(0x3) << CSR_PRMD_PPLV_SHIFT) + +#define LOONGARCH_CSR_EUEN 0x2 /* Extended unit enable */ +#define CSR_EUEN_LBTEN_SHIFT 3 +#define CSR_EUEN_LBTEN (_ULCAST_(0x1) << CSR_EUEN_LBTEN_SHIFT) +#define CSR_EUEN_LASXEN_SHIFT 2 +#define CSR_EUEN_LASXEN (_ULCAST_(0x1) << CSR_EUEN_LASXEN_SHIFT) +#define CSR_EUEN_LSXEN_SHIFT 1 +#define CSR_EUEN_LSXEN (_ULCAST_(0x1) << CSR_EUEN_LSXEN_SHIFT) +#define CSR_EUEN_FPEN_SHIFT 0 +#define CSR_EUEN_FPEN (_ULCAST_(0x1) << CSR_EUEN_FPEN_SHIFT) + +#define LOONGARCH_CSR_MISC 0x3 /* Misc config */ + +#define LOONGARCH_CSR_ECFG 0x4 /* Exception config */ +#define CSR_ECFG_VS_SHIFT 16 +#define CSR_ECFG_VS_WIDTH 3 +#define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT) +#define CSR_ECFG_IM_SHIFT 0 +#define CSR_ECFG_IM_WIDTH 13 +#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT) + +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define CSR_ESTAT_ESUBCODE_SHIFT 22 +#define CSR_ESTAT_ESUBCODE_WIDTH 9 +#define CSR_ESTAT_ESUBCODE (_ULCAST_(0x1ff) << CSR_ESTAT_ESUBCODE_SHIFT) +#define CSR_ESTAT_EXC_SHIFT 16 +#define CSR_ESTAT_EXC_WIDTH 6 +#define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT) +#define CSR_ESTAT_IS_SHIFT 0 +#define CSR_ESTAT_IS_WIDTH 15 +#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) + +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ + +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ + +#define LOONGARCH_CSR_BADI 0x8 /* Bad instruction */ + +#define LOONGARCH_CSR_EENTRY 0xc /* Exception entry */ + +/* TLB related CSR registers */ +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize, NP */ +#define CSR_TLBIDX_EHINV_SHIFT 31 +#define CSR_TLBIDX_EHINV (_ULCAST_(1) << CSR_TLBIDX_EHINV_SHIFT) +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (_ULCAST_(0x3f) << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_IDX_SHIFT 0 +#define CSR_TLBIDX_IDX_WIDTH 12 +#define CSR_TLBIDX_IDX (_ULCAST_(0xfff) << CSR_TLBIDX_IDX_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define CSR_TLBIDX_IDXM 0xfff +#define CSR_INVALID_ENTRY(e) (CSR_TLBIDX_EHINV | e) + +#define LOONGARCH_CSR_TLBEHI 0x11 /* TLB EntryHi */ + +#define LOONGARCH_CSR_TLBELO0 0x12 /* TLB EntryLo0 */ +#define CSR_TLBLO0_RPLV_SHIFT 63 +#define CSR_TLBLO0_RPLV (_ULCAST_(0x1) << CSR_TLBLO0_RPLV_SHIFT) +#define CSR_TLBLO0_NX_SHIFT 62 +#define CSR_TLBLO0_NX (_ULCAST_(0x1) << CSR_TLBLO0_NX_SHIFT) +#define CSR_TLBLO0_NR_SHIFT 61 +#define CSR_TLBLO0_NR (_ULCAST_(0x1) << CSR_TLBLO0_NR_SHIFT) +#define CSR_TLBLO0_PFN_SHIFT 12 +#define CSR_TLBLO0_PFN_WIDTH 36 +#define CSR_TLBLO0_PFN (_ULCAST_(0xfffffffff) << CSR_TLBLO0_PFN_SHIFT) +#define CSR_TLBLO0_GLOBAL_SHIFT 6 +#define CSR_TLBLO0_GLOBAL (_ULCAST_(0x1) << CSR_TLBLO0_GLOBAL_SHIFT) +#define CSR_TLBLO0_CCA_SHIFT 4 +#define CSR_TLBLO0_CCA_WIDTH 2 +#define CSR_TLBLO0_CCA (_ULCAST_(0x3) << CSR_TLBLO0_CCA_SHIFT) +#define CSR_TLBLO0_PLV_SHIFT 2 +#define CSR_TLBLO0_PLV_WIDTH 2 +#define CSR_TLBLO0_PLV (_ULCAST_(0x3) << CSR_TLBLO0_PLV_SHIFT) +#define CSR_TLBLO0_WE_SHIFT 1 +#define CSR_TLBLO0_WE (_ULCAST_(0x1) << CSR_TLBLO0_WE_SHIFT) +#define CSR_TLBLO0_V_SHIFT 0 +#define CSR_TLBLO0_V (_ULCAST_(0x1) << CSR_TLBLO0_V_SHIFT) + +#define LOONGARCH_CSR_TLBELO1 0x13 /* TLB EntryLo1 */ +#define CSR_TLBLO1_RPLV_SHIFT 63 +#define CSR_TLBLO1_RPLV (_ULCAST_(0x1) << CSR_TLBLO1_RPLV_SHIFT) +#define CSR_TLBLO1_NX_SHIFT 62 +#define CSR_TLBLO1_NX (_ULCAST_(0x1) << CSR_TLBLO1_NX_SHIFT) +#define CSR_TLBLO1_NR_SHIFT 61 +#define CSR_TLBLO1_NR (_ULCAST_(0x1) << CSR_TLBLO1_NR_SHIFT) +#define CSR_TLBLO1_PFN_SHIFT 12 +#define CSR_TLBLO1_PFN_WIDTH 36 +#define CSR_TLBLO1_PFN (_ULCAST_(0xfffffffff) << CSR_TLBLO1_PFN_SHIFT) +#define CSR_TLBLO1_GLOBAL_SHIFT 6 +#define CSR_TLBLO1_GLOBAL (_ULCAST_(0x1) << CSR_TLBLO1_GLOBAL_SHIFT) +#define CSR_TLBLO1_CCA_SHIFT 4 +#define CSR_TLBLO1_CCA_WIDTH 2 +#define CSR_TLBLO1_CCA (_ULCAST_(0x3) << CSR_TLBLO1_CCA_SHIFT) +#define CSR_TLBLO1_PLV_SHIFT 2 +#define CSR_TLBLO1_PLV_WIDTH 2 +#define CSR_TLBLO1_PLV (_ULCAST_(0x3) << CSR_TLBLO1_PLV_SHIFT) +#define CSR_TLBLO1_WE_SHIFT 1 +#define CSR_TLBLO1_WE (_ULCAST_(0x1) << CSR_TLBLO1_WE_SHIFT) +#define CSR_TLBLO1_V_SHIFT 0 +#define CSR_TLBLO1_V (_ULCAST_(0x1) << CSR_TLBLO1_V_SHIFT) + +#define LOONGARCH_CSR_GTLBC 0x15 /* Guest TLB control */ +#define CSR_GTLBC_RID_SHIFT 16 +#define CSR_GTLBC_RID_WIDTH 8 +#define CSR_GTLBC_RID (_ULCAST_(0xff) << CSR_GTLBC_RID_SHIFT) +#define CSR_GTLBC_TOTI_SHIFT 13 +#define CSR_GTLBC_TOTI (_ULCAST_(0x1) << CSR_GTLBC_TOTI_SHIFT) +#define CSR_GTLBC_USERID_SHIFT 12 +#define CSR_GTLBC_USERID (_ULCAST_(0x1) << CSR_GTLBC_USERID_SHIFT) +#define CSR_GTLBC_GMTLBSZ_SHIFT 0 +#define CSR_GTLBC_GMTLBSZ_WIDTH 6 +#define CSR_GTLBC_GMTLBSZ (_ULCAST_(0x3f) << CSR_GTLBC_GMTLBSZ_SHIFT) + +#define LOONGARCH_CSR_TRGP 0x16 /* TLBR read guest info */ +#define CSR_TRGP_RID_SHIFT 16 +#define CSR_TRGP_RID_WIDTH 8 +#define CSR_TRGP_RID (_ULCAST_(0xff) << CSR_TRGP_RID_SHIFT) +#define CSR_TRGP_GTLB_SHIFT 0 +#define CSR_TRGP_GTLB (1 << CSR_TRGP_GTLB_SHIFT) + +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define CSR_ASID_BIT_SHIFT 16 /* ASIDBits */ +#define CSR_ASID_BIT_WIDTH 8 +#define CSR_ASID_BIT (_ULCAST_(0xff) << CSR_ASID_BIT_SHIFT) +#define CSR_ASID_ASID_SHIFT 0 +#define CSR_ASID_ASID_WIDTH 10 +#define CSR_ASID_ASID (_ULCAST_(0x3ff) << CSR_ASID_ASID_SHIFT) + +#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */ + +#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */ + +#define LOONGARCH_CSR_PGD 0x1b /* Page table base */ + +#define LOONGARCH_CSR_PWCTL0 0x1c /* PWCtl0 */ +#define CSR_PWCTL0_PTEW_SHIFT 30 +#define CSR_PWCTL0_PTEW_WIDTH 2 +#define CSR_PWCTL0_PTEW (_ULCAST_(0x3) << CSR_PWCTL0_PTEW_SHIFT) +#define CSR_PWCTL0_DIR1WIDTH_SHIFT 25 +#define CSR_PWCTL0_DIR1WIDTH_WIDTH 5 +#define CSR_PWCTL0_DIR1WIDTH (_ULCAST_(0x1f) << CSR_PWCTL0_DIR1WIDTH_SHIFT) +#define CSR_PWCTL0_DIR1BASE_SHIFT 20 +#define CSR_PWCTL0_DIR1BASE_WIDTH 5 +#define CSR_PWCTL0_DIR1BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR1BASE_SHIFT) +#define CSR_PWCTL0_DIR0WIDTH_SHIFT 15 +#define CSR_PWCTL0_DIR0WIDTH_WIDTH 5 +#define CSR_PWCTL0_DIR0WIDTH (_ULCAST_(0x1f) << CSR_PWCTL0_DIR0WIDTH_SHIFT) +#define CSR_PWCTL0_DIR0BASE_SHIFT 10 +#define CSR_PWCTL0_DIR0BASE_WIDTH 5 +#define CSR_PWCTL0_DIR0BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR0BASE_SHIFT) +#define CSR_PWCTL0_PTWIDTH_SHIFT 5 +#define CSR_PWCTL0_PTWIDTH_WIDTH 5 +#define CSR_PWCTL0_PTWIDTH (_ULCAST_(0x1f) << CSR_PWCTL0_PTWIDTH_SHIFT) +#define CSR_PWCTL0_PTBASE_SHIFT 0 +#define CSR_PWCTL0_PTBASE_WIDTH 5 +#define CSR_PWCTL0_PTBASE (_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT) + +#define LOONGARCH_CSR_PWCTL1 0x1d /* PWCtl1 */ +#define CSR_PWCTL1_DIR3WIDTH_SHIFT 18 +#define CSR_PWCTL1_DIR3WIDTH_WIDTH 5 +#define CSR_PWCTL1_DIR3WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT) +#define CSR_PWCTL1_DIR3BASE_SHIFT 12 +#define CSR_PWCTL1_DIR3BASE_WIDTH 5 +#define CSR_PWCTL1_DIR3BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR3BASE_SHIFT) +#define CSR_PWCTL1_DIR2WIDTH_SHIFT 6 +#define CSR_PWCTL1_DIR2WIDTH_WIDTH 5 +#define CSR_PWCTL1_DIR2WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR2WIDTH_SHIFT) +#define CSR_PWCTL1_DIR2BASE_SHIFT 0 +#define CSR_PWCTL1_DIR2BASE_WIDTH 5 +#define CSR_PWCTL1_DIR2BASE (_ULCAST_(0x1f) << CSR_PWCTL0_DIR2BASE_SHIFT) + +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define CSR_STLBPGSIZE_PS_WIDTH 6 +#define CSR_STLBPGSIZE_PS (_ULCAST_(0x3f)) + +#define LOONGARCH_CSR_RVACFG 0x1f +#define CSR_RVACFG_RDVA_WIDTH 4 +#define CSR_RVACFG_RDVA (_ULCAST_(0xf)) + +/* Config CSR registers */ +#define LOONGARCH_CSR_CPUID 0x20 /* CPU core id */ +#define CSR_CPUID_COREID_WIDTH 9 +#define CSR_CPUID_COREID _ULCAST_(0x1ff) + +#define LOONGARCH_CSR_PRCFG1 0x21 /* Config1 */ +#define CSR_CONF1_VSMAX_SHIFT 12 +#define CSR_CONF1_VSMAX_WIDTH 3 +#define CSR_CONF1_VSMAX (_ULCAST_(7) << CSR_CONF1_VSMAX_SHIFT) +#define CSR_CONF1_TMRBITS_SHIFT 4 +#define CSR_CONF1_TMRBITS_WIDTH 8 +#define CSR_CONF1_TMRBITS (_ULCAST_(0xff) << CSR_CONF1_TMRBITS_SHIFT) +#define CSR_CONF1_KSNUM_WIDTH 4 +#define CSR_CONF1_KSNUM _ULCAST_(0xf) + +#define LOONGARCH_CSR_PRCFG2 0x22 /* Config2 */ +#define CSR_CONF2_PGMASK_SUPP 0x3ffff000 + +#define LOONGARCH_CSR_PRCFG3 0x23 /* Config3 */ +#define CSR_CONF3_STLBIDX_SHIFT 20 +#define CSR_CONF3_STLBIDX_WIDTH 6 +#define CSR_CONF3_STLBIDX (_ULCAST_(0x3f) << CSR_CONF3_STLBIDX_SHIFT) +#define CSR_CONF3_STLBWAYS_SHIFT 12 +#define CSR_CONF3_STLBWAYS_WIDTH 8 +#define CSR_CONF3_STLBWAYS (_ULCAST_(0xff) << CSR_CONF3_STLBWAYS_SHIFT) +#define CSR_CONF3_MTLBSIZE_SHIFT 4 +#define CSR_CONF3_MTLBSIZE_WIDTH 8 +#define CSR_CONF3_MTLBSIZE (_ULCAST_(0xff) << CSR_CONF3_MTLBSIZE_SHIFT) +#define CSR_CONF3_TLBTYPE_SHIFT 0 +#define CSR_CONF3_TLBTYPE_WIDTH 4 +#define CSR_CONF3_TLBTYPE (_ULCAST_(0xf) << CSR_CONF3_TLBTYPE_SHIFT) + +/* KSave registers */ +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_KS2 0x32 +#define LOONGARCH_CSR_KS3 0x33 +#define LOONGARCH_CSR_KS4 0x34 +#define LOONGARCH_CSR_KS5 0x35 +#define LOONGARCH_CSR_KS6 0x36 +#define LOONGARCH_CSR_KS7 0x37 +#define LOONGARCH_CSR_KS8 0x38 + +/* Exception allocated KS0, KS1 and KS2 statically */ +#define EXCEPTION_KS0 LOONGARCH_CSR_KS0 +#define EXCEPTION_KS1 LOONGARCH_CSR_KS1 +#define EXCEPTION_KS2 LOONGARCH_CSR_KS2 +#define EXC_KSAVE_MASK (1 << 0 | 1 << 1 | 1 << 2) + +/* Percpu-data base allocated KS3 statically */ +#define PERCPU_BASE_KS LOONGARCH_CSR_KS3 +#define PERCPU_KSAVE_MASK (1 << 3) + +/* KVM allocated KS4 and KS5 statically */ +#define KVM_VCPU_KS LOONGARCH_CSR_KS4 +#define KVM_TEMP_KS LOONGARCH_CSR_KS5 +#define KVM_KSAVE_MASK (1 << 4 | 1 << 5) + +/* Timer registers */ +#define LOONGARCH_CSR_TMID 0x40 /* Timer ID */ + +#define LOONGARCH_CSR_TCFG 0x41 /* Timer config */ +#define CSR_TCFG_VAL_SHIFT 2 +#define CSR_TCFG_VAL_WIDTH 48 +#define CSR_TCFG_VAL (_ULCAST_(0x3fffffffffff) << CSR_TCFG_VAL_SHIFT) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (_ULCAST_(0x1) << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (_ULCAST_(0x1)) + +#define LOONGARCH_CSR_TVAL 0x42 /* Timer value */ + +#define LOONGARCH_CSR_CNTC 0x43 /* Timer offset */ + +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) + +/* Guest registers */ +#define LOONGARCH_CSR_GSTAT 0x50 /* Guest status */ +#define CSR_GSTAT_GID_SHIFT 16 +#define CSR_GSTAT_GID_WIDTH 8 +#define CSR_GSTAT_GID (_ULCAST_(0xff) << CSR_GSTAT_GID_SHIFT) +#define CSR_GSTAT_GIDBIT_SHIFT 4 +#define CSR_GSTAT_GIDBIT_WIDTH 6 +#define CSR_GSTAT_GIDBIT (_ULCAST_(0x3f) << CSR_GSTAT_GIDBIT_SHIFT) +#define CSR_GSTAT_PVM_SHIFT 1 +#define CSR_GSTAT_PVM (_ULCAST_(0x1) << CSR_GSTAT_PVM_SHIFT) +#define CSR_GSTAT_VM_SHIFT 0 +#define CSR_GSTAT_VM (_ULCAST_(0x1) << CSR_GSTAT_VM_SHIFT) + +#define LOONGARCH_CSR_GCFG 0x51 /* Guest config */ +#define CSR_GCFG_GPERF_SHIFT 24 +#define CSR_GCFG_GPERF_WIDTH 3 +#define CSR_GCFG_GPERF (_ULCAST_(0x7) << CSR_GCFG_GPERF_SHIFT) +#define CSR_GCFG_GCI_SHIFT 20 +#define CSR_GCFG_GCI_WIDTH 2 +#define CSR_GCFG_GCI (_ULCAST_(0x3) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCI_ALL (_ULCAST_(0x0) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCI_HIT (_ULCAST_(0x1) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCI_SECURE (_ULCAST_(0x2) << CSR_GCFG_GCI_SHIFT) +#define CSR_GCFG_GCIP_SHIFT 16 +#define CSR_GCFG_GCIP (_ULCAST_(0xf) << CSR_GCFG_GCIP_SHIFT) +#define CSR_GCFG_GCIP_ALL (_ULCAST_(0x1) << CSR_GCFG_GCIP_SHIFT) +#define CSR_GCFG_GCIP_HIT (_ULCAST_(0x1) << (CSR_GCFG_GCIP_SHIFT + 1)) +#define CSR_GCFG_GCIP_SECURE (_ULCAST_(0x1) << (CSR_GCFG_GCIP_SHIFT + 2)) +#define CSR_GCFG_TORU_SHIFT 15 +#define CSR_GCFG_TORU (_ULCAST_(0x1) << CSR_GCFG_TORU_SHIFT) +#define CSR_GCFG_TORUP_SHIFT 14 +#define CSR_GCFG_TORUP (_ULCAST_(0x1) << CSR_GCFG_TORUP_SHIFT) +#define CSR_GCFG_TOP_SHIFT 13 +#define CSR_GCFG_TOP (_ULCAST_(0x1) << CSR_GCFG_TOP_SHIFT) +#define CSR_GCFG_TOPP_SHIFT 12 +#define CSR_GCFG_TOPP (_ULCAST_(0x1) << CSR_GCFG_TOPP_SHIFT) +#define CSR_GCFG_TOE_SHIFT 11 +#define CSR_GCFG_TOE (_ULCAST_(0x1) << CSR_GCFG_TOE_SHIFT) +#define CSR_GCFG_TOEP_SHIFT 10 +#define CSR_GCFG_TOEP (_ULCAST_(0x1) << CSR_GCFG_TOEP_SHIFT) +#define CSR_GCFG_TIT_SHIFT 9 +#define CSR_GCFG_TIT (_ULCAST_(0x1) << CSR_GCFG_TIT_SHIFT) +#define CSR_GCFG_TITP_SHIFT 8 +#define CSR_GCFG_TITP (_ULCAST_(0x1) << CSR_GCFG_TITP_SHIFT) +#define CSR_GCFG_SIT_SHIFT 7 +#define CSR_GCFG_SIT (_ULCAST_(0x1) << CSR_GCFG_SIT_SHIFT) +#define CSR_GCFG_SITP_SHIFT 6 +#define CSR_GCFG_SITP (_ULCAST_(0x1) << CSR_GCFG_SITP_SHIFT) +#define CSR_GCFG_MATC_SHITF 4 +#define CSR_GCFG_MATC_WIDTH 2 +#define CSR_GCFG_MATC_MASK (_ULCAST_(0x3) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATC_GUEST (_ULCAST_(0x0) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATC_ROOT (_ULCAST_(0x1) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATC_NEST (_ULCAST_(0x2) << CSR_GCFG_MATC_SHITF) + +#define LOONGARCH_CSR_GINTC 0x52 /* Guest interrupt control */ +#define CSR_GINTC_HC_SHIFT 16 +#define CSR_GINTC_HC_WIDTH 8 +#define CSR_GINTC_HC (_ULCAST_(0xff) << CSR_GINTC_HC_SHIFT) +#define CSR_GINTC_PIP_SHIFT 8 +#define CSR_GINTC_PIP_WIDTH 8 +#define CSR_GINTC_PIP (_ULCAST_(0xff) << CSR_GINTC_PIP_SHIFT) +#define CSR_GINTC_VIP_SHIFT 0 +#define CSR_GINTC_VIP_WIDTH 8 +#define CSR_GINTC_VIP (_ULCAST_(0xff)) + +#define LOONGARCH_CSR_GCNTC 0x53 /* Guest timer offset */ + +/* LLBCTL register */ +#define LOONGARCH_CSR_LLBCTL 0x60 /* LLBit control */ +#define CSR_LLBCTL_ROLLB_SHIFT 0 +#define CSR_LLBCTL_ROLLB (_ULCAST_(1) << CSR_LLBCTL_ROLLB_SHIFT) +#define CSR_LLBCTL_WCLLB_SHIFT 1 +#define CSR_LLBCTL_WCLLB (_ULCAST_(1) << CSR_LLBCTL_WCLLB_SHIFT) +#define CSR_LLBCTL_KLO_SHIFT 2 +#define CSR_LLBCTL_KLO (_ULCAST_(1) << CSR_LLBCTL_KLO_SHIFT) + +/* Implement dependent */ +#define LOONGARCH_CSR_IMPCTL1 0x80 /* Loongson config1 */ +#define CSR_MISPEC_SHIFT 20 +#define CSR_MISPEC_WIDTH 8 +#define CSR_MISPEC (_ULCAST_(0xff) << CSR_MISPEC_SHIFT) +#define CSR_SSEN_SHIFT 18 +#define CSR_SSEN (_ULCAST_(1) << CSR_SSEN_SHIFT) +#define CSR_SCRAND_SHIFT 17 +#define CSR_SCRAND (_ULCAST_(1) << CSR_SCRAND_SHIFT) +#define CSR_LLEXCL_SHIFT 16 +#define CSR_LLEXCL (_ULCAST_(1) << CSR_LLEXCL_SHIFT) +#define CSR_DISVC_SHIFT 15 +#define CSR_DISVC (_ULCAST_(1) << CSR_DISVC_SHIFT) +#define CSR_VCLRU_SHIFT 14 +#define CSR_VCLRU (_ULCAST_(1) << CSR_VCLRU_SHIFT) +#define CSR_DCLRU_SHIFT 13 +#define CSR_DCLRU (_ULCAST_(1) << CSR_DCLRU_SHIFT) +#define CSR_FASTLDQ_SHIFT 12 +#define CSR_FASTLDQ (_ULCAST_(1) << CSR_FASTLDQ_SHIFT) +#define CSR_USERCAC_SHIFT 11 +#define CSR_USERCAC (_ULCAST_(1) << CSR_USERCAC_SHIFT) +#define CSR_ANTI_MISPEC_SHIFT 10 +#define CSR_ANTI_MISPEC (_ULCAST_(1) << CSR_ANTI_MISPEC_SHIFT) +#define CSR_AUTO_FLUSHSFB_SHIFT 9 +#define CSR_AUTO_FLUSHSFB (_ULCAST_(1) << CSR_AUTO_FLUSHSFB_SHIFT) +#define CSR_STFILL_SHIFT 8 +#define CSR_STFILL (_ULCAST_(1) << CSR_STFILL_SHIFT) +#define CSR_LIFEP_SHIFT 7 +#define CSR_LIFEP (_ULCAST_(1) << CSR_LIFEP_SHIFT) +#define CSR_LLSYNC_SHIFT 6 +#define CSR_LLSYNC (_ULCAST_(1) << CSR_LLSYNC_SHIFT) +#define CSR_BRBTDIS_SHIFT 5 +#define CSR_BRBTDIS (_ULCAST_(1) << CSR_BRBTDIS_SHIFT) +#define CSR_RASDIS_SHIFT 4 +#define CSR_RASDIS (_ULCAST_(1) << CSR_RASDIS_SHIFT) +#define CSR_STPRE_SHIFT 2 +#define CSR_STPRE_WIDTH 2 +#define CSR_STPRE (_ULCAST_(3) << CSR_STPRE_SHIFT) +#define CSR_INSTPRE_SHIFT 1 +#define CSR_INSTPRE (_ULCAST_(1) << CSR_INSTPRE_SHIFT) +#define CSR_DATAPRE_SHIFT 0 +#define CSR_DATAPRE (_ULCAST_(1) << CSR_DATAPRE_SHIFT) + +#define LOONGARCH_CSR_IMPCTL2 0x81 /* Loongson config2 */ +#define CSR_FLUSH_MTLB_SHIFT 0 +#define CSR_FLUSH_MTLB (_ULCAST_(1) << CSR_FLUSH_MTLB_SHIFT) +#define CSR_FLUSH_STLB_SHIFT 1 +#define CSR_FLUSH_STLB (_ULCAST_(1) << CSR_FLUSH_STLB_SHIFT) +#define CSR_FLUSH_DTLB_SHIFT 2 +#define CSR_FLUSH_DTLB (_ULCAST_(1) << CSR_FLUSH_DTLB_SHIFT) +#define CSR_FLUSH_ITLB_SHIFT 3 +#define CSR_FLUSH_ITLB (_ULCAST_(1) << CSR_FLUSH_ITLB_SHIFT) +#define CSR_FLUSH_BTAC_SHIFT 4 +#define CSR_FLUSH_BTAC (_ULCAST_(1) << CSR_FLUSH_BTAC_SHIFT) + +#define LOONGARCH_CSR_GNMI 0x82 + +/* TLB Refill registers */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 /* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRBADV 0x89 /* TLB refill badvaddr */ +#define LOONGARCH_CSR_TLBRERA 0x8a /* TLB refill ERA */ +#define LOONGARCH_CSR_TLBRSAVE 0x8b /* KSave for TLB refill exception */ +#define LOONGARCH_CSR_TLBRELO0 0x8c /* TLB refill entrylo0 */ +#define LOONGARCH_CSR_TLBRELO1 0x8d /* TLB refill entrylo1 */ +#define LOONGARCH_CSR_TLBREHI 0x8e /* TLB refill entryhi */ +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (_ULCAST_(0x3f) << CSR_TLBREHI_PS_SHIFT) +#define LOONGARCH_CSR_TLBRPRMD 0x8f /* TLB refill mode info */ + +/* Machine Error registers */ +#define LOONGARCH_CSR_MERRCTL 0x90 /* MERRCTL */ +#define LOONGARCH_CSR_MERRINFO1 0x91 /* MError info1 */ +#define LOONGARCH_CSR_MERRINFO2 0x92 /* MError info2 */ +#define LOONGARCH_CSR_MERRENTRY 0x93 /* MError exception entry */ +#define LOONGARCH_CSR_MERRERA 0x94 /* MError exception ERA */ +#define LOONGARCH_CSR_MERRSAVE 0x95 /* KSave for machine error exception */ + +#define LOONGARCH_CSR_CTAG 0x98 /* TagLo + TagHi */ + +#define LOONGARCH_CSR_PRID 0xc0 + +/* Shadow MCSR : 0xc0 ~ 0xff */ +#define LOONGARCH_CSR_MCSR0 0xc0 /* CPUCFG0 and CPUCFG1 */ +#define MCSR0_INT_IMPL_SHIFT 58 +#define MCSR0_INT_IMPL 0 +#define MCSR0_IOCSR_BRD_SHIFT 57 +#define MCSR0_IOCSR_BRD (_ULCAST_(1) << MCSR0_IOCSR_BRD_SHIFT) +#define MCSR0_HUGEPG_SHIFT 56 +#define MCSR0_HUGEPG (_ULCAST_(1) << MCSR0_HUGEPG_SHIFT) +#define MCSR0_RPLMTLB_SHIFT 55 +#define MCSR0_RPLMTLB (_ULCAST_(1) << MCSR0_RPLMTLB_SHIFT) +#define MCSR0_EP_SHIFT 54 +#define MCSR0_EP (_ULCAST_(1) << MCSR0_EP_SHIFT) +#define MCSR0_RI_SHIFT 53 +#define MCSR0_RI (_ULCAST_(1) << MCSR0_RI_SHIFT) +#define MCSR0_UAL_SHIFT 52 +#define MCSR0_UAL (_ULCAST_(1) << MCSR0_UAL_SHIFT) +#define MCSR0_VABIT_SHIFT 44 +#define MCSR0_VABIT_WIDTH 8 +#define MCSR0_VABIT (_ULCAST_(0xff) << MCSR0_VABIT_SHIFT) +#define VABIT_DEFAULT 0x2f +#define MCSR0_PABIT_SHIFT 36 +#define MCSR0_PABIT_WIDTH 8 +#define MCSR0_PABIT (_ULCAST_(0xff) << MCSR0_PABIT_SHIFT) +#define PABIT_DEFAULT 0x2f +#define MCSR0_IOCSR_SHIFT 35 +#define MCSR0_IOCSR (_ULCAST_(1) << MCSR0_IOCSR_SHIFT) +#define MCSR0_PAGING_SHIFT 34 +#define MCSR0_PAGING (_ULCAST_(1) << MCSR0_PAGING_SHIFT) +#define MCSR0_GR64_SHIFT 33 +#define MCSR0_GR64 (_ULCAST_(1) << MCSR0_GR64_SHIFT) +#define GR64_DEFAULT 1 +#define MCSR0_GR32_SHIFT 32 +#define MCSR0_GR32 (_ULCAST_(1) << MCSR0_GR32_SHIFT) +#define GR32_DEFAULT 0 +#define MCSR0_PRID_WIDTH 32 +#define MCSR0_PRID 0x14C010 + +#define LOONGARCH_CSR_MCSR1 0xc1 /* CPUCFG2 and CPUCFG3 */ +#define MCSR1_HPFOLD_SHIFT 43 +#define MCSR1_HPFOLD (_ULCAST_(1) << MCSR1_HPFOLD_SHIFT) +#define MCSR1_SPW_LVL_SHIFT 40 +#define MCSR1_SPW_LVL_WIDTH 3 +#define MCSR1_SPW_LVL (_ULCAST_(7) << MCSR1_SPW_LVL_SHIFT) +#define MCSR1_ICACHET_SHIFT 39 +#define MCSR1_ICACHET (_ULCAST_(1) << MCSR1_ICACHET_SHIFT) +#define MCSR1_ITLBT_SHIFT 38 +#define MCSR1_ITLBT (_ULCAST_(1) << MCSR1_ITLBT_SHIFT) +#define MCSR1_LLDBAR_SHIFT 37 +#define MCSR1_LLDBAR (_ULCAST_(1) << MCSR1_LLDBAR_SHIFT) +#define MCSR1_SCDLY_SHIFT 36 +#define MCSR1_SCDLY (_ULCAST_(1) << MCSR1_SCDLY_SHIFT) +#define MCSR1_LLEXC_SHIFT 35 +#define MCSR1_LLEXC (_ULCAST_(1) << MCSR1_LLEXC_SHIFT) +#define MCSR1_UCACC_SHIFT 34 +#define MCSR1_UCACC (_ULCAST_(1) << MCSR1_UCACC_SHIFT) +#define MCSR1_SFB_SHIFT 33 +#define MCSR1_SFB (_ULCAST_(1) << MCSR1_SFB_SHIFT) +#define MCSR1_CCDMA_SHIFT 32 +#define MCSR1_CCDMA (_ULCAST_(1) << MCSR1_CCDMA_SHIFT) +#define MCSR1_LAMO_SHIFT 22 +#define MCSR1_LAMO (_ULCAST_(1) << MCSR1_LAMO_SHIFT) +#define MCSR1_LSPW_SHIFT 21 +#define MCSR1_LSPW (_ULCAST_(1) << MCSR1_LSPW_SHIFT) +#define MCSR1_MIPSBT_SHIFT 20 +#define MCSR1_MIPSBT (_ULCAST_(1) << MCSR1_MIPSBT_SHIFT) +#define MCSR1_ARMBT_SHIFT 19 +#define MCSR1_ARMBT (_ULCAST_(1) << MCSR1_ARMBT_SHIFT) +#define MCSR1_X86BT_SHIFT 18 +#define MCSR1_X86BT (_ULCAST_(1) << MCSR1_X86BT_SHIFT) +#define MCSR1_LLFTPVERS_SHIFT 15 +#define MCSR1_LLFTPVERS_WIDTH 3 +#define MCSR1_LLFTPVERS (_ULCAST_(7) << MCSR1_LLFTPVERS_SHIFT) +#define MCSR1_LLFTP_SHIFT 14 +#define MCSR1_LLFTP (_ULCAST_(1) << MCSR1_LLFTP_SHIFT) +#define MCSR1_VZVERS_SHIFT 11 +#define MCSR1_VZVERS_WIDTH 3 +#define MCSR1_VZVERS (_ULCAST_(7) << MCSR1_VZVERS_SHIFT) +#define MCSR1_VZ_SHIFT 10 +#define MCSR1_VZ (_ULCAST_(1) << MCSR1_VZ_SHIFT) +#define MCSR1_CRYPTO_SHIFT 9 +#define MCSR1_CRYPTO (_ULCAST_(1) << MCSR1_CRYPTO_SHIFT) +#define MCSR1_COMPLEX_SHIFT 8 +#define MCSR1_COMPLEX (_ULCAST_(1) << MCSR1_COMPLEX_SHIFT) +#define MCSR1_LASX_SHIFT 7 +#define MCSR1_LASX (_ULCAST_(1) << MCSR1_LASX_SHIFT) +#define MCSR1_LSX_SHIFT 6 +#define MCSR1_LSX (_ULCAST_(1) << MCSR1_LSX_SHIFT) +#define MCSR1_FPVERS_SHIFT 3 +#define MCSR1_FPVERS_WIDTH 3 +#define MCSR1_FPVERS (_ULCAST_(7) << MCSR1_FPVERS_SHIFT) +#define MCSR1_FPDP_SHIFT 2 +#define MCSR1_FPDP (_ULCAST_(1) << MCSR1_FPDP_SHIFT) +#define MCSR1_FPSP_SHIFT 1 +#define MCSR1_FPSP (_ULCAST_(1) << MCSR1_FPSP_SHIFT) +#define MCSR1_FP_SHIFT 0 +#define MCSR1_FP (_ULCAST_(1) << MCSR1_FP_SHIFT) + +#define LOONGARCH_CSR_MCSR2 0xc2 /* CPUCFG4 and CPUCFG5 */ +#define MCSR2_CCDIV_SHIFT 48 +#define MCSR2_CCDIV_WIDTH 16 +#define MCSR2_CCDIV (_ULCAST_(0xffff) << MCSR2_CCDIV_SHIFT) +#define MCSR2_CCMUL_SHIFT 32 +#define MCSR2_CCMUL_WIDTH 16 +#define MCSR2_CCMUL (_ULCAST_(0xffff) << MCSR2_CCMUL_SHIFT) +#define MCSR2_CCFREQ_WIDTH 32 +#define MCSR2_CCFREQ (_ULCAST_(0xffffffff)) +#define CCFREQ_DEFAULT 0x5f5e100 /* 100MHz */ + +#define LOONGARCH_CSR_MCSR3 0xc3 /* CPUCFG6 */ +#define MCSR3_UPM_SHIFT 14 +#define MCSR3_UPM (_ULCAST_(1) << MCSR3_UPM_SHIFT) +#define MCSR3_PMBITS_SHIFT 8 +#define MCSR3_PMBITS_WIDTH 6 +#define MCSR3_PMBITS (_ULCAST_(0x3f) << MCSR3_PMBITS_SHIFT) +#define PMBITS_DEFAULT 0x40 +#define MCSR3_PMNUM_SHIFT 4 +#define MCSR3_PMNUM_WIDTH 4 +#define MCSR3_PMNUM (_ULCAST_(0xf) << MCSR3_PMNUM_SHIFT) +#define MCSR3_PAMVER_SHIFT 1 +#define MCSR3_PAMVER_WIDTH 3 +#define MCSR3_PAMVER (_ULCAST_(0x7) << MCSR3_PAMVER_SHIFT) +#define MCSR3_PMP_SHIFT 0 +#define MCSR3_PMP (_ULCAST_(1) << MCSR3_PMP_SHIFT) + +#define LOONGARCH_CSR_MCSR8 0xc8 /* CPUCFG16 and CPUCFG17 */ +#define MCSR8_L1I_SIZE_SHIFT 56 +#define MCSR8_L1I_SIZE_WIDTH 7 +#define MCSR8_L1I_SIZE (_ULCAST_(0x7f) << MCSR8_L1I_SIZE_SHIFT) +#define MCSR8_L1I_IDX_SHIFT 48 +#define MCSR8_L1I_IDX_WIDTH 8 +#define MCSR8_L1I_IDX (_ULCAST_(0xff) << MCSR8_L1I_IDX_SHIFT) +#define MCSR8_L1I_WAY_SHIFT 32 +#define MCSR8_L1I_WAY_WIDTH 16 +#define MCSR8_L1I_WAY (_ULCAST_(0xffff) << MCSR8_L1I_WAY_SHIFT) +#define MCSR8_L3DINCL_SHIFT 16 +#define MCSR8_L3DINCL (_ULCAST_(1) << MCSR8_L3DINCL_SHIFT) +#define MCSR8_L3DPRIV_SHIFT 15 +#define MCSR8_L3DPRIV (_ULCAST_(1) << MCSR8_L3DPRIV_SHIFT) +#define MCSR8_L3DPRE_SHIFT 14 +#define MCSR8_L3DPRE (_ULCAST_(1) << MCSR8_L3DPRE_SHIFT) +#define MCSR8_L3IUINCL_SHIFT 13 +#define MCSR8_L3IUINCL (_ULCAST_(1) << MCSR8_L3IUINCL_SHIFT) +#define MCSR8_L3IUPRIV_SHIFT 12 +#define MCSR8_L3IUPRIV (_ULCAST_(1) << MCSR8_L3IUPRIV_SHIFT) +#define MCSR8_L3IUUNIFY_SHIFT 11 +#define MCSR8_L3IUUNIFY (_ULCAST_(1) << MCSR8_L3IUUNIFY_SHIFT) +#define MCSR8_L3IUPRE_SHIFT 10 +#define MCSR8_L3IUPRE (_ULCAST_(1) << MCSR8_L3IUPRE_SHIFT) +#define MCSR8_L2DINCL_SHIFT 9 +#define MCSR8_L2DINCL (_ULCAST_(1) << MCSR8_L2DINCL_SHIFT) +#define MCSR8_L2DPRIV_SHIFT 8 +#define MCSR8_L2DPRIV (_ULCAST_(1) << MCSR8_L2DPRIV_SHIFT) +#define MCSR8_L2DPRE_SHIFT 7 +#define MCSR8_L2DPRE (_ULCAST_(1) << MCSR8_L2DPRE_SHIFT) +#define MCSR8_L2IUINCL_SHIFT 6 +#define MCSR8_L2IUINCL (_ULCAST_(1) << MCSR8_L2IUINCL_SHIFT) +#define MCSR8_L2IUPRIV_SHIFT 5 +#define MCSR8_L2IUPRIV (_ULCAST_(1) << MCSR8_L2IUPRIV_SHIFT) +#define MCSR8_L2IUUNIFY_SHIFT 4 +#define MCSR8_L2IUUNIFY (_ULCAST_(1) << MCSR8_L2IUUNIFY_SHIFT) +#define MCSR8_L2IUPRE_SHIFT 3 +#define MCSR8_L2IUPRE (_ULCAST_(1) << MCSR8_L2IUPRE_SHIFT) +#define MCSR8_L1DPRE_SHIFT 2 +#define MCSR8_L1DPRE (_ULCAST_(1) << MCSR8_L1DPRE_SHIFT) +#define MCSR8_L1IUUNIFY_SHIFT 1 +#define MCSR8_L1IUUNIFY (_ULCAST_(1) << MCSR8_L1IUUNIFY_SHIFT) +#define MCSR8_L1IUPRE_SHIFT 0 +#define MCSR8_L1IUPRE (_ULCAST_(1) << MCSR8_L1IUPRE_SHIFT) + +#define LOONGARCH_CSR_MCSR9 0xc9 /* CPUCFG18 and CPUCFG19 */ +#define MCSR9_L2U_SIZE_SHIFT 56 +#define MCSR9_L2U_SIZE_WIDTH 7 +#define MCSR9_L2U_SIZE (_ULCAST_(0x7f) << MCSR9_L2U_SIZE_SHIFT) +#define MCSR9_L2U_IDX_SHIFT 48 +#define MCSR9_L2U_IDX_WIDTH 8 +#define MCSR9_L2U_IDX (_ULCAST_(0xff) << MCSR9_IDX_LOG_SHIFT) +#define MCSR9_L2U_WAY_SHIFT 32 +#define MCSR9_L2U_WAY_WIDTH 16 +#define MCSR9_L2U_WAY (_ULCAST_(0xffff) << MCSR9_L2U_WAY_SHIFT) +#define MCSR9_L1D_SIZE_SHIFT 24 +#define MCSR9_L1D_SIZE_WIDTH 7 +#define MCSR9_L1D_SIZE (_ULCAST_(0x7f) << MCSR9_L1D_SIZE_SHIFT) +#define MCSR9_L1D_IDX_SHIFT 16 +#define MCSR9_L1D_IDX_WIDTH 8 +#define MCSR9_L1D_IDX (_ULCAST_(0xff) << MCSR9_L1D_IDX_SHIFT) +#define MCSR9_L1D_WAY_SHIFT 0 +#define MCSR9_L1D_WAY_WIDTH 16 +#define MCSR9_L1D_WAY (_ULCAST_(0xffff) << MCSR9_L1D_WAY_SHIFT) + +#define LOONGARCH_CSR_MCSR10 0xca /* CPUCFG20 */ +#define MCSR10_L3U_SIZE_SHIFT 24 +#define MCSR10_L3U_SIZE_WIDTH 7 +#define MCSR10_L3U_SIZE (_ULCAST_(0x7f) << MCSR10_L3U_SIZE_SHIFT) +#define MCSR10_L3U_IDX_SHIFT 16 +#define MCSR10_L3U_IDX_WIDTH 8 +#define MCSR10_L3U_IDX (_ULCAST_(0xff) << MCSR10_L3U_IDX_SHIFT) +#define MCSR10_L3U_WAY_SHIFT 0 +#define MCSR10_L3U_WAY_WIDTH 16 +#define MCSR10_L3U_WAY (_ULCAST_(0xffff) << MCSR10_L3U_WAY_SHIFT) + +#define LOONGARCH_CSR_MCSR24 0xf0 /* cpucfg48 */ +#define MCSR24_RAMCG_SHIFT 3 +#define MCSR24_RAMCG (_ULCAST_(1) << MCSR24_RAMCG_SHIFT) +#define MCSR24_VFPUCG_SHIFT 2 +#define MCSR24_VFPUCG (_ULCAST_(1) << MCSR24_VFPUCG_SHIFT) +#define MCSR24_NAPEN_SHIFT 1 +#define MCSR24_NAPEN (_ULCAST_(1) << MCSR24_NAPEN_SHIFT) +#define MCSR24_MCSRLOCK_SHIFT 0 +#define MCSR24_MCSRLOCK (_ULCAST_(1) << MCSR24_MCSRLOCK_SHIFT) + +/* Uncached accelerate windows registers */ +#define LOONGARCH_CSR_UCAWIN 0x100 +#define LOONGARCH_CSR_UCAWIN0_LO 0x102 +#define LOONGARCH_CSR_UCAWIN0_HI 0x103 +#define LOONGARCH_CSR_UCAWIN1_LO 0x104 +#define LOONGARCH_CSR_UCAWIN1_HI 0x105 +#define LOONGARCH_CSR_UCAWIN2_LO 0x106 +#define LOONGARCH_CSR_UCAWIN2_HI 0x107 +#define LOONGARCH_CSR_UCAWIN3_LO 0x108 +#define LOONGARCH_CSR_UCAWIN3_HI 0x109 + +/* Direct Map windows registers */ +#define LOONGARCH_CSR_DMWIN0 0x180 /* 64 direct map win0: MEM & IF */ +#define LOONGARCH_CSR_DMWIN1 0x181 /* 64 direct map win1: MEM & IF */ +#define LOONGARCH_CSR_DMWIN2 0x182 /* 64 direct map win2: MEM */ +#define LOONGARCH_CSR_DMWIN3 0x183 /* 64 direct map win3: MEM */ + +/* Direct Map window 0/1 */ +#define CSR_DMW0_PLV0 _CONST64_(1 << 0) +#define CSR_DMW0_VSEG _CONST64_(0x8000) +#define CSR_DMW0_BASE (CSR_DMW0_VSEG << DMW_PABITS) +#define CSR_DMW0_INIT (CSR_DMW0_BASE | CSR_DMW0_PLV0) + +#define CSR_DMW1_PLV0 _CONST64_(1 << 0) +#define CSR_DMW1_MAT _CONST64_(1 << 4) +#define CSR_DMW1_VSEG _CONST64_(0x9000) +#define CSR_DMW1_BASE (CSR_DMW1_VSEG << DMW_PABITS) +#define CSR_DMW1_INIT (CSR_DMW1_BASE | CSR_DMW1_MAT | CSR_DMW1_PLV0) + +/* Performance Counter registers */ +#define LOONGARCH_CSR_PERFCTRL0 0x200 /* 32 perf event 0 config */ +#define LOONGARCH_CSR_PERFCNTR0 0x201 /* 64 perf event 0 count value */ +#define LOONGARCH_CSR_PERFCTRL1 0x202 /* 32 perf event 1 config */ +#define LOONGARCH_CSR_PERFCNTR1 0x203 /* 64 perf event 1 count value */ +#define LOONGARCH_CSR_PERFCTRL2 0x204 /* 32 perf event 2 config */ +#define LOONGARCH_CSR_PERFCNTR2 0x205 /* 64 perf event 2 count value */ +#define LOONGARCH_CSR_PERFCTRL3 0x206 /* 32 perf event 3 config */ +#define LOONGARCH_CSR_PERFCNTR3 0x207 /* 64 perf event 3 count value */ +#define CSR_PERFCTRL_PLV0 (_ULCAST_(1) << 16) +#define CSR_PERFCTRL_PLV1 (_ULCAST_(1) << 17) +#define CSR_PERFCTRL_PLV2 (_ULCAST_(1) << 18) +#define CSR_PERFCTRL_PLV3 (_ULCAST_(1) << 19) +#define CSR_PERFCTRL_IE (_ULCAST_(1) << 20) +#define CSR_PERFCTRL_EVENT 0x3ff + +/* Debug registers */ +#define LOONGARCH_CSR_MWPC 0x300 /* data breakpoint config */ +#define LOONGARCH_CSR_MWPS 0x301 /* data breakpoint status */ + +#define LOONGARCH_CSR_DB0ADDR 0x310 /* data breakpoint 0 address */ +#define LOONGARCH_CSR_DB0MASK 0x311 /* data breakpoint 0 mask */ +#define LOONGARCH_CSR_DB0CTL 0x312 /* data breakpoint 0 control */ +#define LOONGARCH_CSR_DB0ASID 0x313 /* data breakpoint 0 asid */ + +#define LOONGARCH_CSR_DB1ADDR 0x318 /* data breakpoint 1 address */ +#define LOONGARCH_CSR_DB1MASK 0x319 /* data breakpoint 1 mask */ +#define LOONGARCH_CSR_DB1CTL 0x31a /* data breakpoint 1 control */ +#define LOONGARCH_CSR_DB1ASID 0x31b /* data breakpoint 1 asid */ + +#define LOONGARCH_CSR_DB2ADDR 0x320 /* data breakpoint 2 address */ +#define LOONGARCH_CSR_DB2MASK 0x321 /* data breakpoint 2 mask */ +#define LOONGARCH_CSR_DB2CTL 0x322 /* data breakpoint 2 control */ +#define LOONGARCH_CSR_DB2ASID 0x323 /* data breakpoint 2 asid */ + +#define LOONGARCH_CSR_DB3ADDR 0x328 /* data breakpoint 3 address */ +#define LOONGARCH_CSR_DB3MASK 0x329 /* data breakpoint 3 mask */ +#define LOONGARCH_CSR_DB3CTL 0x32a /* data breakpoint 3 control */ +#define LOONGARCH_CSR_DB3ASID 0x32b /* data breakpoint 3 asid */ + +#define LOONGARCH_CSR_DB4ADDR 0x330 /* data breakpoint 4 address */ +#define LOONGARCH_CSR_DB4MASK 0x331 /* data breakpoint 4 maks */ +#define LOONGARCH_CSR_DB4CTL 0x332 /* data breakpoint 4 control */ +#define LOONGARCH_CSR_DB4ASID 0x333 /* data breakpoint 4 asid */ + +#define LOONGARCH_CSR_DB5ADDR 0x338 /* data breakpoint 5 address */ +#define LOONGARCH_CSR_DB5MASK 0x339 /* data breakpoint 5 mask */ +#define LOONGARCH_CSR_DB5CTL 0x33a /* data breakpoint 5 control */ +#define LOONGARCH_CSR_DB5ASID 0x33b /* data breakpoint 5 asid */ + +#define LOONGARCH_CSR_DB6ADDR 0x340 /* data breakpoint 6 address */ +#define LOONGARCH_CSR_DB6MASK 0x341 /* data breakpoint 6 mask */ +#define LOONGARCH_CSR_DB6CTL 0x342 /* data breakpoint 6 control */ +#define LOONGARCH_CSR_DB6ASID 0x343 /* data breakpoint 6 asid */ + +#define LOONGARCH_CSR_DB7ADDR 0x348 /* data breakpoint 7 address */ +#define LOONGARCH_CSR_DB7MASK 0x349 /* data breakpoint 7 mask */ +#define LOONGARCH_CSR_DB7CTL 0x34a /* data breakpoint 7 control */ +#define LOONGARCH_CSR_DB7ASID 0x34b /* data breakpoint 7 asid */ + +#define LOONGARCH_CSR_FWPC 0x380 /* instruction breakpoint config */ +#define LOONGARCH_CSR_FWPS 0x381 /* instruction breakpoint status */ + +#define LOONGARCH_CSR_IB0ADDR 0x390 /* inst breakpoint 0 address */ +#define LOONGARCH_CSR_IB0MASK 0x391 /* inst breakpoint 0 mask */ +#define LOONGARCH_CSR_IB0CTL 0x392 /* inst breakpoint 0 control */ +#define LOONGARCH_CSR_IB0ASID 0x393 /* inst breakpoint 0 asid */ + +#define LOONGARCH_CSR_IB1ADDR 0x398 /* inst breakpoint 1 address */ +#define LOONGARCH_CSR_IB1MASK 0x399 /* inst breakpoint 1 mask */ +#define LOONGARCH_CSR_IB1CTL 0x39a /* inst breakpoint 1 control */ +#define LOONGARCH_CSR_IB1ASID 0x39b /* inst breakpoint 1 asid */ + +#define LOONGARCH_CSR_IB2ADDR 0x3a0 /* inst breakpoint 2 address */ +#define LOONGARCH_CSR_IB2MASK 0x3a1 /* inst breakpoint 2 mask */ +#define LOONGARCH_CSR_IB2CTL 0x3a2 /* inst breakpoint 2 control */ +#define LOONGARCH_CSR_IB2ASID 0x3a3 /* inst breakpoint 2 asid */ + +#define LOONGARCH_CSR_IB3ADDR 0x3a8 /* inst breakpoint 3 address */ +#define LOONGARCH_CSR_IB3MASK 0x3a9 /* breakpoint 3 mask */ +#define LOONGARCH_CSR_IB3CTL 0x3aa /* inst breakpoint 3 control */ +#define LOONGARCH_CSR_IB3ASID 0x3ab /* inst breakpoint 3 asid */ + +#define LOONGARCH_CSR_IB4ADDR 0x3b0 /* inst breakpoint 4 address */ +#define LOONGARCH_CSR_IB4MASK 0x3b1 /* inst breakpoint 4 mask */ +#define LOONGARCH_CSR_IB4CTL 0x3b2 /* inst breakpoint 4 control */ +#define LOONGARCH_CSR_IB4ASID 0x3b3 /* inst breakpoint 4 asid */ + +#define LOONGARCH_CSR_IB5ADDR 0x3b8 /* inst breakpoint 5 address */ +#define LOONGARCH_CSR_IB5MASK 0x3b9 /* inst breakpoint 5 mask */ +#define LOONGARCH_CSR_IB5CTL 0x3ba /* inst breakpoint 5 control */ +#define LOONGARCH_CSR_IB5ASID 0x3bb /* inst breakpoint 5 asid */ + +#define LOONGARCH_CSR_IB6ADDR 0x3c0 /* inst breakpoint 6 address */ +#define LOONGARCH_CSR_IB6MASK 0x3c1 /* inst breakpoint 6 mask */ +#define LOONGARCH_CSR_IB6CTL 0x3c2 /* inst breakpoint 6 control */ +#define LOONGARCH_CSR_IB6ASID 0x3c3 /* inst breakpoint 6 asid */ + +#define LOONGARCH_CSR_IB7ADDR 0x3c8 /* inst breakpoint 7 address */ +#define LOONGARCH_CSR_IB7MASK 0x3c9 /* inst breakpoint 7 mask */ +#define LOONGARCH_CSR_IB7CTL 0x3ca /* inst breakpoint 7 control */ +#define LOONGARCH_CSR_IB7ASID 0x3cb /* inst breakpoint 7 asid */ + +#define LOONGARCH_CSR_DEBUG 0x500 /* debug config */ +#define LOONGARCH_CSR_DERA 0x501 /* debug era */ +#define LOONGARCH_CSR_DESAVE 0x502 /* debug save */ + +/* + * CSR_ECFG IM + */ +#define ECFG0_IM 0x00001fff +#define ECFGB_SIP0 0 +#define ECFGF_SIP0 (_ULCAST_(1) << ECFGB_SIP0) +#define ECFGB_SIP1 1 +#define ECFGF_SIP1 (_ULCAST_(1) << ECFGB_SIP1) +#define ECFGB_IP0 2 +#define ECFGF_IP0 (_ULCAST_(1) << ECFGB_IP0) +#define ECFGB_IP1 3 +#define ECFGF_IP1 (_ULCAST_(1) << ECFGB_IP1) +#define ECFGB_IP2 4 +#define ECFGF_IP2 (_ULCAST_(1) << ECFGB_IP2) +#define ECFGB_IP3 5 +#define ECFGF_IP3 (_ULCAST_(1) << ECFGB_IP3) +#define ECFGB_IP4 6 +#define ECFGF_IP4 (_ULCAST_(1) << ECFGB_IP4) +#define ECFGB_IP5 7 +#define ECFGF_IP5 (_ULCAST_(1) << ECFGB_IP5) +#define ECFGB_IP6 8 +#define ECFGF_IP6 (_ULCAST_(1) << ECFGB_IP6) +#define ECFGB_IP7 9 +#define ECFGF_IP7 (_ULCAST_(1) << ECFGB_IP7) +#define ECFGB_PMC 10 +#define ECFGF_PMC (_ULCAST_(1) << ECFGB_PMC) +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (_ULCAST_(1) << ECFGB_TIMER) +#define ECFGB_IPI 12 +#define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI) +#define ECFGF(hwirq) (_ULCAST_(1) << hwirq) + +#define ESTATF_IP 0x00001fff + +#define LOONGARCH_IOCSR_FEATURES 0x8 +#define IOCSRF_TEMP BIT_ULL(0) +#define IOCSRF_NODECNT BIT_ULL(1) +#define IOCSRF_MSI BIT_ULL(2) +#define IOCSRF_EXTIOI BIT_ULL(3) +#define IOCSRF_CSRIPI BIT_ULL(4) +#define IOCSRF_FREQCSR BIT_ULL(5) +#define IOCSRF_FREQSCALE BIT_ULL(6) +#define IOCSRF_DVFSV1 BIT_ULL(7) +#define IOCSRF_EIODECODE BIT_ULL(9) +#define IOCSRF_FLATMODE BIT_ULL(10) +#define IOCSRF_VM BIT_ULL(11) + +#define LOONGARCH_IOCSR_VENDOR 0x10 + +#define LOONGARCH_IOCSR_CPUNAME 0x20 + +#define LOONGARCH_IOCSR_NODECNT 0x408 + +#define LOONGARCH_IOCSR_MISC_FUNC 0x420 +#define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21) +#define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48) + +#define LOONGARCH_IOCSR_CPUTEMP 0x428 + +/* PerCore CSR, only accessable by local cores */ +#define LOONGARCH_IOCSR_IPI_STATUS 0x1000 +#define LOONGARCH_IOCSR_IPI_EN 0x1004 +#define LOONGARCH_IOCSR_IPI_SET 0x1008 +#define LOONGARCH_IOCSR_IPI_CLEAR 0x100c +#define LOONGARCH_IOCSR_MBUF0 0x1020 +#define LOONGARCH_IOCSR_MBUF1 0x1028 +#define LOONGARCH_IOCSR_MBUF2 0x1030 +#define LOONGARCH_IOCSR_MBUF3 0x1038 + +#define LOONGARCH_IOCSR_IPI_SEND 0x1040 +#define IOCSR_IPI_SEND_IP_SHIFT 0 +#define IOCSR_IPI_SEND_CPU_SHIFT 16 +#define IOCSR_IPI_SEND_BLOCKING BIT(31) + +#define LOONGARCH_IOCSR_MBUF_SEND 0x1048 +#define IOCSR_MBUF_SEND_BLOCKING BIT_ULL(31) +#define IOCSR_MBUF_SEND_BOX_SHIFT 2 +#define IOCSR_MBUF_SEND_BOX_LO(box) (box << 1) +#define IOCSR_MBUF_SEND_BOX_HI(box) ((box << 1) + 1) +#define IOCSR_MBUF_SEND_CPU_SHIFT 16 +#define IOCSR_MBUF_SEND_BUF_SHIFT 32 +#define IOCSR_MBUF_SEND_H32_MASK 0xFFFFFFFF00000000ULL + +#define LOONGARCH_IOCSR_ANY_SEND 0x1158 +#define IOCSR_ANY_SEND_BLOCKING BIT_ULL(31) +#define IOCSR_ANY_SEND_CPU_SHIFT 16 +#define IOCSR_ANY_SEND_MASK_SHIFT 27 +#define IOCSR_ANY_SEND_BUF_SHIFT 32 +#define IOCSR_ANY_SEND_H32_MASK 0xFFFFFFFF00000000ULL + +/* Register offset and bit definition for CSR access */ +#define LOONGARCH_IOCSR_TIMER_CFG 0x1060 +#define LOONGARCH_IOCSR_TIMER_TICK 0x1070 +#define IOCSR_TIMER_CFG_RESERVED (_ULCAST_(1) << 63) +#define IOCSR_TIMER_CFG_PERIODIC (_ULCAST_(1) << 62) +#define IOCSR_TIMER_CFG_EN (_ULCAST_(1) << 61) +#define IOCSR_TIMER_MASK 0x0ffffffffffffULL +#define IOCSR_TIMER_INITVAL_RST (_ULCAST_(0xffff) << 48) + +#define LOONGARCH_IOCSR_EXTIOI_NODEMAP_BASE 0x14a0 +#define LOONGARCH_IOCSR_EXTIOI_IPMAP_BASE 0x14c0 +#define LOONGARCH_IOCSR_EXTIOI_EN_BASE 0x1600 +#define LOONGARCH_IOCSR_EXTIOI_BOUNCE_BASE 0x1680 +#define LOONGARCH_IOCSR_EXTIOI_ISR_BASE 0x1800 +#define LOONGARCH_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 +#define IOCSR_EXTIOI_VECTOR_NUM 256 + +#ifndef __ASSEMBLY__ + +static inline u64 drdtime(void) +{ + int rID = 0; + u64 val = 0; + + __asm__ __volatile__( + "rdtime.d %0, %1 \n\t" + : "=r"(val), "=r"(rID) + : + ); + return val; +} + +static inline unsigned int get_csr_cpuid(void) +{ + return csr_read32(LOONGARCH_CSR_CPUID); +} + +static inline void csr_any_send(unsigned int addr, unsigned int data, + unsigned int data_mask, unsigned int cpu) +{ + uint64_t val = 0; + + val = IOCSR_ANY_SEND_BLOCKING | addr; + val |= (cpu << IOCSR_ANY_SEND_CPU_SHIFT); + val |= (data_mask << IOCSR_ANY_SEND_MASK_SHIFT); + val |= ((uint64_t)data << IOCSR_ANY_SEND_BUF_SHIFT); + iocsr_write64(val, LOONGARCH_IOCSR_ANY_SEND); +} + +static inline unsigned int read_csr_excode(void) +{ + return (csr_read32(LOONGARCH_CSR_ESTAT) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; +} + +static inline void write_csr_index(unsigned int idx) +{ + csr_xchg32(idx, CSR_TLBIDX_IDXM, LOONGARCH_CSR_TLBIDX); +} + +static inline unsigned int read_csr_pagesize(void) +{ + return (csr_read32(LOONGARCH_CSR_TLBIDX) & CSR_TLBIDX_SIZEM) >> CSR_TLBIDX_SIZE; +} + +static inline void write_csr_pagesize(unsigned int size) +{ + csr_xchg32(size << CSR_TLBIDX_SIZE, CSR_TLBIDX_SIZEM, LOONGARCH_CSR_TLBIDX); +} + +static inline unsigned int read_csr_tlbrefill_pagesize(void) +{ + return (csr_read64(LOONGARCH_CSR_TLBREHI) & CSR_TLBREHI_PS) >> CSR_TLBREHI_PS_SHIFT; +} + +static inline void write_csr_tlbrefill_pagesize(unsigned int size) +{ + csr_xchg64(size << CSR_TLBREHI_PS_SHIFT, CSR_TLBREHI_PS, LOONGARCH_CSR_TLBREHI); +} + +#define read_csr_asid() csr_read32(LOONGARCH_CSR_ASID) +#define write_csr_asid(val) csr_write32(val, LOONGARCH_CSR_ASID) +#define read_csr_entryhi() csr_read64(LOONGARCH_CSR_TLBEHI) +#define write_csr_entryhi(val) csr_write64(val, LOONGARCH_CSR_TLBEHI) +#define read_csr_entrylo0() csr_read64(LOONGARCH_CSR_TLBELO0) +#define write_csr_entrylo0(val) csr_write64(val, LOONGARCH_CSR_TLBELO0) +#define read_csr_entrylo1() csr_read64(LOONGARCH_CSR_TLBELO1) +#define write_csr_entrylo1(val) csr_write64(val, LOONGARCH_CSR_TLBELO1) +#define read_csr_ecfg() csr_read32(LOONGARCH_CSR_ECFG) +#define write_csr_ecfg(val) csr_write32(val, LOONGARCH_CSR_ECFG) +#define read_csr_estat() csr_read32(LOONGARCH_CSR_ESTAT) +#define write_csr_estat(val) csr_write32(val, LOONGARCH_CSR_ESTAT) +#define read_csr_tlbidx() csr_read32(LOONGARCH_CSR_TLBIDX) +#define write_csr_tlbidx(val) csr_write32(val, LOONGARCH_CSR_TLBIDX) +#define read_csr_euen() csr_read32(LOONGARCH_CSR_EUEN) +#define write_csr_euen(val) csr_write32(val, LOONGARCH_CSR_EUEN) +#define read_csr_cpuid() csr_read32(LOONGARCH_CSR_CPUID) +#define read_csr_prcfg1() csr_read64(LOONGARCH_CSR_PRCFG1) +#define write_csr_prcfg1(val) csr_write64(val, LOONGARCH_CSR_PRCFG1) +#define read_csr_prcfg2() csr_read64(LOONGARCH_CSR_PRCFG2) +#define write_csr_prcfg2(val) csr_write64(val, LOONGARCH_CSR_PRCFG2) +#define read_csr_prcfg3() csr_read64(LOONGARCH_CSR_PRCFG3) +#define write_csr_prcfg3(val) csr_write64(val, LOONGARCH_CSR_PRCFG3) +#define read_csr_stlbpgsize() csr_read32(LOONGARCH_CSR_STLBPGSIZE) +#define write_csr_stlbpgsize(val) csr_write32(val, LOONGARCH_CSR_STLBPGSIZE) +#define read_csr_rvacfg() csr_read32(LOONGARCH_CSR_RVACFG) +#define write_csr_rvacfg(val) csr_write32(val, LOONGARCH_CSR_RVACFG) +#define write_csr_tintclear(val) csr_write32(val, LOONGARCH_CSR_TINTCLR) +#define read_csr_impctl1() csr_read64(LOONGARCH_CSR_IMPCTL1) +#define write_csr_impctl1(val) csr_write64(val, LOONGARCH_CSR_IMPCTL1) +#define write_csr_impctl2(val) csr_write64(val, LOONGARCH_CSR_IMPCTL2) + +#define read_csr_perfctrl0() csr_read64(LOONGARCH_CSR_PERFCTRL0) +#define read_csr_perfcntr0() csr_read64(LOONGARCH_CSR_PERFCNTR0) +#define read_csr_perfctrl1() csr_read64(LOONGARCH_CSR_PERFCTRL1) +#define read_csr_perfcntr1() csr_read64(LOONGARCH_CSR_PERFCNTR1) +#define read_csr_perfctrl2() csr_read64(LOONGARCH_CSR_PERFCTRL2) +#define read_csr_perfcntr2() csr_read64(LOONGARCH_CSR_PERFCNTR2) +#define read_csr_perfctrl3() csr_read64(LOONGARCH_CSR_PERFCTRL3) +#define read_csr_perfcntr3() csr_read64(LOONGARCH_CSR_PERFCNTR3) +#define write_csr_perfctrl0(val) csr_write64(val, LOONGARCH_CSR_PERFCTRL0) +#define write_csr_perfcntr0(val) csr_write64(val, LOONGARCH_CSR_PERFCNTR0) +#define write_csr_perfctrl1(val) csr_write64(val, LOONGARCH_CSR_PERFCTRL1) +#define write_csr_perfcntr1(val) csr_write64(val, LOONGARCH_CSR_PERFCNTR1) +#define write_csr_perfctrl2(val) csr_write64(val, LOONGARCH_CSR_PERFCTRL2) +#define write_csr_perfcntr2(val) csr_write64(val, LOONGARCH_CSR_PERFCNTR2) +#define write_csr_perfctrl3(val) csr_write64(val, LOONGARCH_CSR_PERFCTRL3) +#define write_csr_perfcntr3(val) csr_write64(val, LOONGARCH_CSR_PERFCNTR3) + +/* + * Manipulate bits in a register. + */ +#define __BUILD_CSR_COMMON(name) \ +static inline unsigned long \ +set_##name(unsigned long set) \ +{ \ + unsigned long res, new; \ + \ + res = read_##name(); \ + new = res | set; \ + write_##name(new); \ + \ + return res; \ +} \ + \ +static inline unsigned long \ +clear_##name(unsigned long clear) \ +{ \ + unsigned long res, new; \ + \ + res = read_##name(); \ + new = res & ~clear; \ + write_##name(new); \ + \ + return res; \ +} \ + \ +static inline unsigned long \ +change_##name(unsigned long change, unsigned long val) \ +{ \ + unsigned long res, new; \ + \ + res = read_##name(); \ + new = res & ~change; \ + new |= (val & change); \ + write_##name(new); \ + \ + return res; \ +} + +#define __BUILD_CSR_OP(name) __BUILD_CSR_COMMON(csr_##name) + +__BUILD_CSR_OP(euen) +__BUILD_CSR_OP(ecfg) +__BUILD_CSR_OP(tlbidx) + +#define set_csr_estat(val) \ + csr_xchg32(val, val, LOONGARCH_CSR_ESTAT) +#define clear_csr_estat(val) \ + csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT) + +#endif /* __ASSEMBLY__ */ + +/* Generic EntryLo bit definitions */ +#define ENTRYLO_V (_ULCAST_(1) << 0) +#define ENTRYLO_D (_ULCAST_(1) << 1) +#define ENTRYLO_PLV_SHIFT 2 +#define ENTRYLO_PLV (_ULCAST_(3) << ENTRYLO_PLV_SHIFT) +#define ENTRYLO_C_SHIFT 4 +#define ENTRYLO_C (_ULCAST_(3) << ENTRYLO_C_SHIFT) +#define ENTRYLO_G (_ULCAST_(1) << 6) +#define ENTRYLO_NR (_ULCAST_(1) << 61) +#define ENTRYLO_NX (_ULCAST_(1) << 62) + +/* Values for PageMask register */ +#define PS_4K 0x0000000c +#define PS_8K 0x0000000d +#define PS_16K 0x0000000e +#define PS_32K 0x0000000f +#define PS_64K 0x00000010 +#define PS_128K 0x00000011 +#define PS_256K 0x00000012 +#define PS_512K 0x00000013 +#define PS_1M 0x00000014 +#define PS_2M 0x00000015 +#define PS_4M 0x00000016 +#define PS_8M 0x00000017 +#define PS_16M 0x00000018 +#define PS_32M 0x00000019 +#define PS_64M 0x0000001a +#define PS_128M 0x0000001b +#define PS_256M 0x0000001c +#define PS_512M 0x0000001d +#define PS_1G 0x0000001e + +/* Default page size for a given kernel configuration */ +#ifdef CONFIG_PAGE_SIZE_4KB +#define PS_DEFAULT_SIZE PS_4K +#elif defined(CONFIG_PAGE_SIZE_16KB) +#define PS_DEFAULT_SIZE PS_16K +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PS_DEFAULT_SIZE PS_64K +#else +#error Bad page size configuration! +#endif + +/* Default huge tlb size for a given kernel configuration */ +#ifdef CONFIG_PAGE_SIZE_4KB +#define PS_HUGE_SIZE PS_1M +#elif defined(CONFIG_PAGE_SIZE_16KB) +#define PS_HUGE_SIZE PS_16M +#elif defined(CONFIG_PAGE_SIZE_64KB) +#define PS_HUGE_SIZE PS_256M +#else +#error Bad page size configuration for hugetlbfs! +#endif + +/* ExStatus.ExcCode */ +#define EXCCODE_RSV 0 /* Reserved */ +#define EXCCODE_TLBL 1 /* TLB miss on a load */ +#define EXCCODE_TLBS 2 /* TLB miss on a store */ +#define EXCCODE_TLBI 3 /* TLB miss on a ifetch */ +#define EXCCODE_TLBM 4 /* TLB modified fault */ +#define EXCCODE_TLBNR 5 /* TLB Read-Inhibit exception */ +#define EXCCODE_TLBNX 6 /* TLB Execution-Inhibit exception */ +#define EXCCODE_TLBPE 7 /* TLB Privilege Error */ +#define EXCCODE_ADE 8 /* Address Error */ + #define EXSUBCODE_ADEF 0 /* Fetch Instruction */ + #define EXSUBCODE_ADEM 1 /* Access Memory*/ +#define EXCCODE_ALE 9 /* Unalign Access */ +#define EXCCODE_OOB 10 /* Out of bounds */ +#define EXCCODE_SYS 11 /* System call */ +#define EXCCODE_BP 12 /* Breakpoint */ +#define EXCCODE_INE 13 /* Inst. Not Exist */ +#define EXCCODE_IPE 14 /* Inst. Privileged Error */ +#define EXCCODE_FPDIS 15 /* FPU Disabled */ +#define EXCCODE_LSXDIS 16 /* LSX Disabled */ +#define EXCCODE_LASXDIS 17 /* LASX Disabled */ +#define EXCCODE_FPE 18 /* Floating Point Exception */ + #define EXCSUBCODE_FPE 0 /* Floating Point Exception */ + #define EXCSUBCODE_VFPE 1 /* Vector Exception */ +#define EXCCODE_WATCH 19 /* Watch address reference */ +#define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */ +#define EXCCODE_BTE 21 /* Binary Trans. Exception */ +#define EXCCODE_PSI 22 /* Guest Privileged Error */ +#define EXCCODE_HYP 23 /* Hypercall */ +#define EXCCODE_GCM 24 /* Guest CSR modified */ + #define EXCSUBCODE_GCSC 0 /* Software caused */ + #define EXCSUBCODE_GCHC 1 /* Hardware caused */ +#define EXCCODE_SE 25 /* Security */ + +#define EXCCODE_INT_START 64 +#define EXCCODE_SIP0 64 +#define EXCCODE_SIP1 65 +#define EXCCODE_IP0 66 +#define EXCCODE_IP1 67 +#define EXCCODE_IP2 68 +#define EXCCODE_IP3 69 +#define EXCCODE_IP4 70 +#define EXCCODE_IP5 71 +#define EXCCODE_IP6 72 +#define EXCCODE_IP7 73 +#define EXCCODE_PMC 74 /* Performance Counter */ +#define EXCCODE_TIMER 75 +#define EXCCODE_IPI 76 +#define EXCCODE_NMI 77 +#define EXCCODE_INT_END 78 +#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START) + +/* FPU register names */ +#define LOONGARCH_FCSR0 $r0 +#define LOONGARCH_FCSR1 $r1 +#define LOONGARCH_FCSR2 $r2 +#define LOONGARCH_FCSR3 $r3 + +/* FPU Status Register Values */ +#define FPU_CSR_RSVD 0xe0e0fce0 + +/* + * X the exception cause indicator + * E the exception enable + * S the sticky/flag bit + */ +#define FPU_CSR_ALL_X 0x1f000000 +#define FPU_CSR_INV_X 0x10000000 +#define FPU_CSR_DIV_X 0x08000000 +#define FPU_CSR_OVF_X 0x04000000 +#define FPU_CSR_UDF_X 0x02000000 +#define FPU_CSR_INE_X 0x01000000 + +#define FPU_CSR_ALL_S 0x001f0000 +#define FPU_CSR_INV_S 0x00100000 +#define FPU_CSR_DIV_S 0x00080000 +#define FPU_CSR_OVF_S 0x00040000 +#define FPU_CSR_UDF_S 0x00020000 +#define FPU_CSR_INE_S 0x00010000 + +#define FPU_CSR_ALL_E 0x0000001f +#define FPU_CSR_INV_E 0x00000010 +#define FPU_CSR_DIV_E 0x00000008 +#define FPU_CSR_OVF_E 0x00000004 +#define FPU_CSR_UDF_E 0x00000002 +#define FPU_CSR_INE_E 0x00000001 + +/* Bits 8 and 9 of FPU Status Register specify the rounding mode */ +#define FPU_CSR_RM 0x300 +#define FPU_CSR_RN 0x000 /* nearest */ +#define FPU_CSR_RZ 0x100 /* towards zero */ +#define FPU_CSR_RU 0x200 /* towards +Infinity */ +#define FPU_CSR_RD 0x300 /* towards -Infinity */ + +#define write_fcsr(dest, val) \ +do { \ + __asm__ __volatile__( \ + " movgr2fcsr %0, "__stringify(dest)" \n" \ + : : "r" (val)); \ +} while (0) + +#define read_fcsr(source) \ +({ \ + unsigned int __res; \ +\ + __asm__ __volatile__( \ + " movfcsr2gr %0, "__stringify(source)" \n" \ + : "=r" (__res)); \ + __res; \ +}) + +#endif /* _ASM_LOONGARCHREGS_H */ diff --git a/arch/loongarch/include/asm/mach-loongson64/irq.h b/arch/loongarch/include/asm/mach-loongson64/irq.h new file mode 100644 index 000000000000..124cec11d01f --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/irq.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MACH_LOONGSON64_IRQ_H_ +#define __ASM_MACH_LOONGSON64_IRQ_H_ + +#define MAX_IO_PICS 2 +#define NR_IRQS (64 + (256 * MAX_IO_PICS)) + +#define CORES_PER_EIO_NODE 4 + +#define LOONGSON_CPU_UART0_VEC 10 /* CPU UART0 */ +#define LOONGSON_CPU_THSENS_VEC 14 /* CPU Thsens */ +#define LOONGSON_CPU_HT0_VEC 16 /* CPU HT0 irq vector base number */ +#define LOONGSON_CPU_HT1_VEC 24 /* CPU HT1 irq vector base number */ + +/* IRQ number definitions */ +#define LOONGSON_LPC_IRQ_BASE 0 +#define LOONGSON_LPC_LAST_IRQ (LOONGSON_LPC_IRQ_BASE + 15) + +#define LOONGSON_CPU_IRQ_BASE 16 +#define LOONGSON_CPU_LAST_IRQ (LOONGSON_CPU_IRQ_BASE + 14) + +#define LOONGSON_PCH_IRQ_BASE 64 +#define LOONGSON_PCH_ACPI_IRQ (LOONGSON_PCH_IRQ_BASE + 47) +#define LOONGSON_PCH_LAST_IRQ (LOONGSON_PCH_IRQ_BASE + 64 - 1) + +#define LOONGSON_MSI_IRQ_BASE (LOONGSON_PCH_IRQ_BASE + 64) +#define LOONGSON_MSI_LAST_IRQ (LOONGSON_PCH_IRQ_BASE + 256 - 1) + +#define GSI_MIN_LPC_IRQ LOONGSON_LPC_IRQ_BASE +#define GSI_MAX_LPC_IRQ (LOONGSON_LPC_IRQ_BASE + 16 - 1) +#define GSI_MIN_CPU_IRQ LOONGSON_CPU_IRQ_BASE +#define GSI_MAX_CPU_IRQ (LOONGSON_CPU_IRQ_BASE + 48 - 1) +#define GSI_MIN_PCH_IRQ LOONGSON_PCH_IRQ_BASE +#define GSI_MAX_PCH_IRQ (LOONGSON_PCH_IRQ_BASE + 256 - 1) + +extern int find_pch_pic(u32 gsi); +extern int eiointc_get_node(int id); + +static inline void eiointc_enable(void) +{ + uint64_t misc; + + misc = iocsr_read64(LOONGARCH_IOCSR_MISC_FUNC); + misc |= IOCSR_MISC_FUNC_EXT_IOI_EN; + iocsr_write64(misc, LOONGARCH_IOCSR_MISC_FUNC); +} + +struct acpi_madt_lio_pic; +struct acpi_madt_eio_pic; +struct acpi_madt_ht_pic; +struct acpi_madt_bio_pic; +struct acpi_madt_msi_pic; +struct acpi_madt_lpc_pic; + +struct irq_domain *liointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lio_pic *acpi_liointc); +struct irq_domain *eiointc_acpi_init(struct irq_domain *parent, + struct acpi_madt_eio_pic *acpi_eiointc); + +struct irq_domain *htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec); +struct irq_domain *pch_lpc_acpi_init(struct irq_domain *parent, + struct acpi_madt_lpc_pic *acpi_pchlpc); +struct irq_domain *pch_msi_acpi_init(struct irq_domain *parent, + struct acpi_madt_msi_pic *acpi_pchmsi); +struct irq_domain *pch_pic_acpi_init(struct irq_domain *parent, + struct acpi_madt_bio_pic *acpi_pchpic); + +extern struct acpi_madt_lio_pic *acpi_liointc; +extern struct acpi_madt_eio_pic *acpi_eiointc[MAX_IO_PICS]; + +extern struct acpi_madt_ht_pic *acpi_htintc; +extern struct acpi_madt_lpc_pic *acpi_pchlpc; +extern struct acpi_madt_msi_pic *acpi_pchmsi[MAX_IO_PICS]; +extern struct acpi_madt_bio_pic *acpi_pchpic[MAX_IO_PICS]; + +extern struct irq_domain *cpu_domain; +extern struct irq_domain *liointc_domain; +extern struct irq_domain *pch_lpc_domain; +extern struct irq_domain *pch_msi_domain[MAX_IO_PICS]; +extern struct irq_domain *pch_pic_domain[MAX_IO_PICS]; + +#endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */ diff --git a/arch/loongarch/include/asm/mach-loongson64/loongson.h b/arch/loongarch/include/asm/mach-loongson64/loongson.h new file mode 100644 index 000000000000..559e0ce970fa --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/loongson.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_MACH_LOONGSON64_LOONGSON_H +#define __ASM_MACH_LOONGSON64_LOONGSON_H + +#include +#include +#include +#include +#include +#include + +extern const struct plat_smp_ops loongson3_smp_ops; + +#define LOONGSON_REG(x) \ + (*(volatile u32 *)((char *)TO_UNCACHE(LOONGSON_REG_BASE) + (x))) + +#define LOONGSON_LIO_BASE 0x18000000 +#define LOONGSON_LIO_SIZE 0x00100000 /* 1M */ +#define LOONGSON_LIO_TOP (LOONGSON_LIO_BASE+LOONGSON_LIO_SIZE-1) + +#define LOONGSON_BOOT_BASE 0x1c000000 +#define LOONGSON_BOOT_SIZE 0x02000000 /* 32M */ +#define LOONGSON_BOOT_TOP (LOONGSON_BOOT_BASE+LOONGSON_BOOT_SIZE-1) + +#define LOONGSON_REG_BASE 0x1fe00000 +#define LOONGSON_REG_SIZE 0x00100000 /* 1M */ +#define LOONGSON_REG_TOP (LOONGSON_REG_BASE+LOONGSON_REG_SIZE-1) + +/* GPIO Regs - r/w */ + +#define LOONGSON_GPIODATA LOONGSON_REG(0x11c) +#define LOONGSON_GPIOIE LOONGSON_REG(0x120) +#define LOONGSON_REG_GPIO_BASE (LOONGSON_REG_BASE + 0x11c) + +#define MAX_PACKAGES 16 + +/* Chip Config registor of each physical cpu package */ +extern u64 loongson_chipcfg[MAX_PACKAGES]; +#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id])) + +/* Chip Temperature registor of each physical cpu package */ +extern u64 loongson_chiptemp[MAX_PACKAGES]; +#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id])) + +/* Freq Control register of each physical cpu package */ +extern u64 loongson_freqctrl[MAX_PACKAGES]; +#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id])) + +#define xconf_readl(addr) readl(addr) +#define xconf_readq(addr) readq(addr) + +static inline void xconf_writel(u32 val, volatile void __iomem *addr) +{ + asm volatile ( + " st.w %[v], %[hw], 0 \n" + " ld.b $r0, %[hw], 0 \n" + : + : [hw] "r" (addr), [v] "r" (val) + ); +} + +static inline void xconf_writeq(u64 val64, volatile void __iomem *addr) +{ + asm volatile ( + " st.d %[v], %[hw], 0 \n" + " ld.b $r0, %[hw], 0 \n" + : + : [hw] "r" (addr), [v] "r" (val64) + ); +} + +/* ============== LS7A registers =============== */ +#define LS7A_PCH_REG_BASE 0x10000000UL +/* LPC regs */ +#define LS7A_LPC_REG_BASE (LS7A_PCH_REG_BASE + 0x00002000) +/* CHIPCFG regs */ +#define LS7A_CHIPCFG_REG_BASE (LS7A_PCH_REG_BASE + 0x00010000) +/* MISC reg base */ +#define LS7A_MISC_REG_BASE (LS7A_PCH_REG_BASE + 0x00080000) +/* ACPI regs */ +#define LS7A_ACPI_REG_BASE (LS7A_MISC_REG_BASE + 0x00050000) +/* RTC regs */ +#define LS7A_RTC_REG_BASE (LS7A_MISC_REG_BASE + 0x00050100) + +#define LS7A_DMA_CFG (volatile void *)TO_UNCACHE(LS7A_CHIPCFG_REG_BASE + 0x041c) +#define LS7A_DMA_NODE_SHF 8 +#define LS7A_DMA_NODE_MASK 0x1F00 + +#define LS7A_INT_MASK_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x020) +#define LS7A_INT_EDGE_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x060) +#define LS7A_INT_CLEAR_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x080) +#define LS7A_INT_HTMSI_EN_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x040) +#define LS7A_INT_ROUTE_ENTRY_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x100) +#define LS7A_INT_HTMSI_VEC_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x200) +#define LS7A_INT_STATUS_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x3a0) +#define LS7A_INT_POL_REG (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x3e0) +#define LS7A_LPC_INT_CTL (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x2000) +#define LS7A_LPC_INT_ENA (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x2004) +#define LS7A_LPC_INT_STS (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x2008) +#define LS7A_LPC_INT_CLR (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x200c) +#define LS7A_LPC_INT_POL (volatile void *)TO_UNCACHE(LS7A_PCH_REG_BASE + 0x2010) + +#define LS7A_PMCON_SOC_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x000) +#define LS7A_PMCON_RESUME_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x004) +#define LS7A_PMCON_RTC_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x008) +#define LS7A_PM1_EVT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x00c) +#define LS7A_PM1_ENA_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x010) +#define LS7A_PM1_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x014) +#define LS7A_PM1_TMR_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x018) +#define LS7A_P_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x01c) +#define LS7A_GPE0_STS_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x028) +#define LS7A_GPE0_ENA_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x02c) +#define LS7A_RST_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x030) +#define LS7A_WD_SET_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x034) +#define LS7A_WD_TIMER_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x038) +#define LS7A_THSENS_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x04c) +#define LS7A_GEN_RTC_1_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x050) +#define LS7A_GEN_RTC_2_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x054) +#define LS7A_DPM_CFG_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x400) +#define LS7A_DPM_STS_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x404) +#define LS7A_DPM_CNT_REG (volatile void *)TO_UNCACHE(LS7A_ACPI_REG_BASE + 0x408) + +typedef enum { + ACPI_PCI_HOTPLUG_STATUS = 1 << 1, + ACPI_CPU_HOTPLUG_STATUS = 1 << 2, + ACPI_MEM_HOTPLUG_STATUS = 1 << 3, + ACPI_POWERBUTTON_STATUS = 1 << 8, + ACPI_RTC_WAKE_STATUS = 1 << 10, + ACPI_PCI_WAKE_STATUS = 1 << 14, + ACPI_ANY_WAKE_STATUS = 1 << 15, +} AcpiEventStatusBits; + +#define HT1LO_OFFSET 0xe0000000000UL + +/* PCI Configration Space Base */ +#define MCFG_EXT_PCICFG_BASE 0xefe00000000UL + +/* REG ACCESS*/ +#define ls7a_readb(addr) (*(volatile unsigned char *)TO_UNCACHE(addr)) +#define ls7a_readw(addr) (*(volatile unsigned short *)TO_UNCACHE(addr)) +#define ls7a_readl(addr) (*(volatile unsigned int *)TO_UNCACHE(addr)) +#define ls7a_readq(addr) (*(volatile unsigned long *)TO_UNCACHE(addr)) +#define ls7a_writeb(val, addr) *(volatile unsigned char *)TO_UNCACHE(addr) = (val) +#define ls7a_writew(val, addr) *(volatile unsigned short *)TO_UNCACHE(addr) = (val) +#define ls7a_writel(val, addr) *(volatile unsigned int *)TO_UNCACHE(addr) = (val) +#define ls7a_writeq(val, addr) *(volatile unsigned long *)TO_UNCACHE(addr) = (val) + +#endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */ diff --git a/arch/loongarch/include/asm/mmu.h b/arch/loongarch/include/asm/mmu.h new file mode 100644 index 000000000000..c2753acaf0ea --- /dev/null +++ b/arch/loongarch/include/asm/mmu.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_MMU_H +#define __ASM_MMU_H + +#include +#include + +typedef struct { + u64 asid[NR_CPUS]; + void *vdso; +} mm_context_t; + +#endif /* __ASM_MMU_H */ diff --git a/arch/loongarch/include/asm/mmu_context.h b/arch/loongarch/include/asm/mmu_context.h new file mode 100644 index 000000000000..ded6107885d0 --- /dev/null +++ b/arch/loongarch/include/asm/mmu_context.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Switch a MMU context. + */ +#ifndef _ASM_MMU_CONTEXT_H +#define _ASM_MMU_CONTEXT_H + +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * All unused by hardware upper bits will be considered + * as a software asid extension. + */ +static inline u64 asid_version_mask(unsigned int cpu) +{ + return ~(u64)(cpu_asid_mask(&cpu_data[cpu])); +} + +static inline u64 asid_first_version(unsigned int cpu) +{ + return cpu_asid_mask(&cpu_data[cpu]) + 1; +} + +#define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) +#define asid_cache(cpu) (cpu_data[cpu].asid_cache) +#define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & cpu_asid_mask(&cpu_data[cpu])) + +static inline int asid_valid(struct mm_struct *mm, unsigned int cpu) +{ + if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & asid_version_mask(cpu)) + return 0; + + return 1; +} + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + +/* Normal, classic get_new_mmu_context */ +static inline void +get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) +{ + u64 asid = asid_cache(cpu); + + if (!((++asid) & cpu_asid_mask(&cpu_data[cpu]))) + local_flush_tlb_user(); /* start new asid cycle */ + + cpu_context(cpu, mm) = asid_cache(cpu) = asid; +} + +/* + * Initialize the context related info for a new mm_struct + * instance. + */ +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int i; + + for_each_possible_cpu(i) + cpu_context(i, mm) = 0; + + return 0; +} + +static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned int cpu = smp_processor_id(); + + /* Check if our ASID is of an older version and thus invalid */ + if (!asid_valid(next, cpu)) + get_new_mmu_context(next, cpu); + + write_csr_asid(cpu_asid(cpu, next)); + + if (next != &init_mm) + csr_write64((unsigned long)next->pgd, LOONGARCH_CSR_PGDL); + else + csr_write64((unsigned long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + + /* + * Mark current->active_mm as not "active" anymore. + * We don't want to mislead possible IPI tlb flush routines. + */ + cpumask_set_cpu(cpu, mm_cpumask(next)); +} + +#define switch_mm_irqs_off switch_mm_irqs_off + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm_irqs_off(prev, next, tsk); + local_irq_restore(flags); +} + +/* + * Destroy context related info for an mm_struct that is about + * to be put to rest. + */ +static inline void destroy_context(struct mm_struct *mm) +{ +} + +#define activate_mm(prev, next) switch_mm(prev, next, current) +#define deactivate_mm(task, mm) do { } while (0) + +/* + * If mm is currently active, we can't really drop it. + * Instead, we will get a new one for it. + */ +static inline void +drop_mmu_context(struct mm_struct *mm, unsigned cpu) +{ + int asid; + unsigned long flags; + + local_irq_save(flags); + + asid = read_csr_asid() & cpu_asid_mask(¤t_cpu_data); + + if (asid == cpu_asid(cpu, mm)) { + if (!current->mm || (current->mm == mm)) { + get_new_mmu_context(mm, cpu); + write_csr_asid(cpu_asid(cpu, mm)); + goto out; + } + } + + /* Will get a new context next time */ + cpu_context(cpu, mm) = 0; + cpumask_clear_cpu(cpu, mm_cpumask(mm)); +out: + local_irq_restore(flags); +} + +#endif /* _ASM_MMU_CONTEXT_H */ diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h new file mode 100644 index 000000000000..c54d93ea2c27 --- /dev/null +++ b/arch/loongarch/include/asm/module.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_MODULE_H +#define _ASM_MODULE_H + +#include +#include + +#define RELA_STACK_DEPTH 16 + +struct mod_section { + Elf_Shdr *shdr; + int num_entries; + int max_entries; +}; + +struct mod_arch_specific { + struct mod_section plt; + struct mod_section plt_idx; +}; + +struct plt_entry { + u32 inst_lu12iw; + u32 inst_lu32id; + u32 inst_lu52id; + u32 inst_jirl; +}; + +struct plt_idx_entry { + unsigned long symbol_addr; +}; + +Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val); + +static inline struct plt_entry emit_plt_entry(unsigned long val) +{ + u32 lu12iw, lu32id, lu52id, jirl; + + lu12iw = (0x0000000a << 25 | (((val >> 12) & 0xfffff) << 5) | REG_T1); + lu32id = larch_insn_gen_lu32id(REG_T1, ADDR_IMM(val, LU32ID)); + lu52id = larch_insn_gen_lu52id(REG_T1, REG_T1, ADDR_IMM(val, LU52ID)); + jirl = larch_insn_gen_jirl(0, REG_T1, 0, (val & 0xfff)); + + return (struct plt_entry) { lu12iw, lu32id, lu52id, jirl }; +} + +static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val) +{ + return (struct plt_idx_entry) { val }; +} + +static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) +{ + int i; + struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sec->shdr->sh_addr; + + for (i = 0; i < sec->num_entries; i++) { + if (plt_idx[i].symbol_addr == val) + return i; + } + + return -1; +} + +static inline struct plt_entry *get_plt_entry(unsigned long val, + const struct mod_section *sec_plt, + const struct mod_section *sec_plt_idx) +{ + int plt_idx = get_plt_idx(val, sec_plt_idx); + struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr; + + if (plt_idx < 0) + return NULL; + + return plt + plt_idx; +} + +#endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h new file mode 100644 index 000000000000..31c1c0db11a3 --- /dev/null +++ b/arch/loongarch/include/asm/module.lds.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +SECTIONS { + . = ALIGN(4); + .plt : { BYTE(0) } + .plt.idx : { BYTE(0) } +} diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h new file mode 100644 index 000000000000..df8ce999c93e --- /dev/null +++ b/arch/loongarch/include/asm/page.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PAGE_H +#define _ASM_PAGE_H + +#include + +/* + * PAGE_SHIFT determines the page size + */ +#ifdef CONFIG_PAGE_SIZE_4KB +#define PAGE_SHIFT 12 +#endif +#ifdef CONFIG_PAGE_SIZE_16KB +#define PAGE_SHIFT 14 +#endif +#ifdef CONFIG_PAGE_SIZE_64KB +#define PAGE_SHIFT 16 +#endif +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define HPAGE_SHIFT (PAGE_SHIFT + PAGE_SHIFT - 3) +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#ifndef __ASSEMBLY__ + +#include +#include + +/* + * It's normally defined only for FLATMEM config but it's + * used in our early mem init code for all memory models. + * So always define it. + */ +#define ARCH_PFN_OFFSET PFN_UP(PHYS_OFFSET) + +extern void clear_page(void * page); +extern void copy_page(void * to, void * from); + +#define clear_user_page(page, vaddr, pg) clear_page(page) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +extern unsigned long shm_align_mask; + +struct page; +struct vm_area_struct; +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); + +#define __HAVE_ARCH_COPY_USER_HIGHPAGE + +typedef struct { unsigned long pte; } pte_t; +#define pte_val(x) ((x).pte) +#define __pte(x) ((pte_t) { (x) } ) +typedef struct page *pgtable_t; + +typedef struct { unsigned long pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) +#define __pgd(x) ((pgd_t) { (x) } ) + +/* + * Manipulate page protection bits + */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) +#define pte_pgprot(x) __pgprot(pte_val(x) & ~_PFN_MASK) + +#define ptep_buddy(x) ((pte_t *)((unsigned long)(x) ^ sizeof(pte_t))) + +/* + * __pa()/__va() should be used only during mem init. + */ +#define __pa(x) PHYSADDR(x) +#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) + +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#ifdef CONFIG_FLATMEM + +static inline int pfn_valid(unsigned long pfn) +{ + /* avoid include hell */ + extern unsigned long max_mapnr; + unsigned long pfn_offset = ARCH_PFN_OFFSET; + + return pfn >= pfn_offset && pfn < max_mapnr; +} + +#endif + +#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr))) +#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) + +extern int __virt_addr_valid(volatile void *kaddr); +#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) + +#define VM_DATA_DEFAULT_FLAGS \ + (VM_READ | VM_WRITE | \ + ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +extern unsigned long __kaslr_offset; +static inline unsigned long kaslr_offset(void) +{ + return __kaslr_offset; +} + +#include +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_PAGE_H */ diff --git a/arch/loongarch/include/asm/pci.h b/arch/loongarch/include/asm/pci.h new file mode 100644 index 000000000000..6698c01b54f5 --- /dev/null +++ b/arch/loongarch/include/asm/pci.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_PCI_H +#define _ASM_PCI_H + +#include + +/* + * This file essentially defines the interface between board specific + * PCI code and LoongArch common PCI code. Should potentially put into + * include/asm/pci.h file. + */ + +#include +#include + +extern phys_addr_t mcfg_addr_init(int node); + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ +static inline unsigned int pcibios_assign_all_busses(void) +{ + return 0; +} + +#define PCIBIOS_MIN_IO 0x4000 +#define PCIBIOS_MIN_MEM 0x20000000 +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +#define HAVE_PCI_MMAP +#define ARCH_GENERIC_PCI_MMAP_RESOURCE + +/* + * Dynamic DMA mapping stuff. + * LoongArch has everything mapped statically. + */ + +#include +#include +#include +#include +#include + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return 1; /* always show the domain in /proc */ +} + +/* generic pci stuff */ +#include + +#endif /* _ASM_PCI_H */ diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h new file mode 100644 index 000000000000..28fd8b6f2188 --- /dev/null +++ b/arch/loongarch/include/asm/percpu.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Ltd. + */ +#ifndef __ASM_PERCPU_H +#define __ASM_PERCPU_H + +/* Use r21 for fast access */ +register unsigned long __my_cpu_offset __asm__("$r21"); + +static inline void set_my_cpu_offset(unsigned long off) +{ + __my_cpu_offset = off; + csr_write64(off, PERCPU_BASE_KS); +} +#define __my_cpu_offset __my_cpu_offset + +#include + +#endif /* __ASM_PERCPU_H */ diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h new file mode 100644 index 000000000000..943f478d823f --- /dev/null +++ b/arch/loongarch/include/asm/perf_event.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * linux/arch/loongarch/include/asm/perf_event.h + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LOONGARCH_PERF_EVENT_H__ +#define __LOONGARCH_PERF_EVENT_H__ +/* Nothing to show here; the file is required by linux/perf_event.h. */ +#endif /* __LOONGARCH_PERF_EVENT_H__ */ diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h new file mode 100644 index 000000000000..e94de7f25366 --- /dev/null +++ b/arch/loongarch/include/asm/pgalloc.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGALLOC_H +#define _ASM_PGALLOC_H + +#include +#include + +#define __HAVE_ARCH_PMD_ALLOC_ONE +#define __HAVE_ARCH_PUD_ALLOC_ONE +#include + +static inline void pmd_populate_kernel(struct mm_struct *mm, + pmd_t *pmd, pte_t *pte) +{ + set_pmd(pmd, __pmd((unsigned long)pte)); +} + +static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) +{ + set_pmd(pmd, __pmd((unsigned long)page_address(pte))); +} +#define pmd_pgtable(pmd) pmd_page(pmd) + +#ifndef __PAGETABLE_PMD_FOLDED + +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + set_pud(pud, __pud((unsigned long)pmd)); +} +#endif + +#ifndef __PAGETABLE_PUD_FOLDED + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + set_p4d(p4d, __p4d((unsigned long)pud)); +} + +#endif /* __PAGETABLE_PUD_FOLDED */ + +extern void pagetable_init(void); + +/* + * Initialize a new pmd table with invalid pointers. + */ +extern void pmd_init(unsigned long page, unsigned long pagetable); + +/* + * Initialize a new pgd / pmd table with invalid pointers. + */ +extern void pgd_init(unsigned long page); +extern pgd_t *pgd_alloc(struct mm_struct *mm); + +#define __pte_free_tlb(tlb,pte,address) \ +do { \ + pgtable_pte_page_dtor(pte); \ + tlb_remove_page((tlb), pte); \ +} while (0) + +#ifndef __PAGETABLE_PMD_FOLDED + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *pmd; + struct page *pg; + + pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER); + if (!pg) + return NULL; + + if (!pgtable_pmd_page_ctor(pg)) { + __free_pages(pg, PMD_ORDER); + return NULL; + } + + pmd = (pmd_t *)page_address(pg); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + return pmd; +} + +#define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x) + +#endif + +#ifndef __PAGETABLE_PUD_FOLDED + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pud_t *pud; + + pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); + if (pud) + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + return pud; +} + +#define __pud_free_tlb(tlb, x, addr) pud_free((tlb)->mm, x) + +#endif /* __PAGETABLE_PUD_FOLDED */ + +#endif /* _ASM_PGALLOC_H */ diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h new file mode 100644 index 000000000000..adb05170dc2b --- /dev/null +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGTABLE_64_H +#define _ASM_PGTABLE_64_H + +#include +#include + +#include + +#if CONFIG_PGTABLE_LEVELS == 2 +#include +#elif CONFIG_PGTABLE_LEVELS == 3 +#include +#else +#include +#endif + +#define PGD_ORDER 0 +#define PUD_ORDER 0 +#define PMD_ORDER 0 +#define PTE_ORDER 0 + +#if CONFIG_PGTABLE_LEVELS == 2 +#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT + PTE_ORDER - 3)) +#elif CONFIG_PGTABLE_LEVELS == 3 +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT + PTE_ORDER - 3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3)) +#elif CONFIG_PGTABLE_LEVELS == 4 +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT + PTE_ORDER - 3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PUD_SHIFT (PMD_SHIFT + (PAGE_SHIFT + PMD_ORDER - 3)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PGDIR_SHIFT (PUD_SHIFT + (PAGE_SHIFT + PUD_ORDER - 3)) +#endif + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define VA_BITS (PGDIR_SHIFT + (PAGE_SHIFT + PGD_ORDER - 3)) + +#define PTRS_PER_PGD ((PAGE_SIZE << PGD_ORDER) >> 3) +#if CONFIG_PGTABLE_LEVELS > 3 +#define PTRS_PER_PUD ((PAGE_SIZE << PUD_ORDER) >> 3) +#endif +#if CONFIG_PGTABLE_LEVELS > 2 +#define PTRS_PER_PMD ((PAGE_SIZE << PMD_ORDER) >> 3) +#endif +#define PTRS_PER_PTE ((PAGE_SIZE << PTE_ORDER) >> 3) + +#define USER_PTRS_PER_PGD ((TASK_SIZE64 / PGDIR_SIZE)?(TASK_SIZE64 / PGDIR_SIZE):1) +#define FIRST_USER_ADDRESS 0UL + +#ifndef __ASSEMBLY__ + +#include + +/* + * TLB refill handlers may also map the vmalloc area into xkvrange. + * Avoid the first couple of pages so NULL pointer dereferences will + * still reliably trap. + */ +#define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE)) +#define MODULES_END (MODULES_VADDR + SZ_256M) + +#define VMALLOC_START MODULES_END +#define VMALLOC_END \ + (vm_map_base + \ + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE) + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#endif +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) +#endif +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) + +extern pte_t invalid_pte_table[PTRS_PER_PTE]; + +#ifndef __PAGETABLE_PUD_FOLDED + +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) + +extern pud_t invalid_pud_table[PTRS_PER_PUD]; + +/* + * Empty pgd/p4d entries point to the invalid_pud_table. + */ +static inline int p4d_none(p4d_t p4d) +{ + return p4d_val(p4d) == (unsigned long)invalid_pud_table; +} + +static inline int p4d_bad(p4d_t p4d) +{ + return p4d_val(p4d) & ~PAGE_MASK; +} + +static inline int p4d_present(p4d_t p4d) +{ + return p4d_val(p4d) != (unsigned long)invalid_pud_table; +} + +static inline void p4d_clear(p4d_t *p4dp) +{ + p4d_val(*p4dp) = (unsigned long)invalid_pud_table; +} + +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + return (pud_t *)p4d_val(p4d); +} + +static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) +{ + *p4d = p4dval; +} + +#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) +#define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT)) + +#endif + +#ifndef __PAGETABLE_PMD_FOLDED + +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) } ) + +extern pmd_t invalid_pmd_table[PTRS_PER_PMD]; + +/* + * Empty pud entries point to the invalid_pmd_table. + */ +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == (unsigned long)invalid_pmd_table; +} + +static inline int pud_bad(pud_t pud) +{ + return pud_val(pud) & ~PAGE_MASK; +} + +static inline int pud_present(pud_t pud) +{ + return pud_val(pud) != (unsigned long)invalid_pmd_table; +} + +static inline void pud_clear(pud_t *pudp) +{ + pud_val(*pudp) = ((unsigned long)invalid_pmd_table); +} + +static inline pmd_t *pud_pgtable(pud_t pud) +{ + return (pmd_t *)pud_val(pud); +} + +#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) + +#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) + +#endif + +/* + * Empty pmd entries point to the invalid_pte_table. + */ +static inline int pmd_none(pmd_t pmd) +{ + return pmd_val(pmd) == (unsigned long)invalid_pte_table; +} + +static inline int pmd_bad(pmd_t pmd) +{ + return (pmd_val(pmd) & ~PAGE_MASK); +} + +static inline int pmd_present(pmd_t pmd) +{ + if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) + return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE)); + + return pmd_val(pmd) != (unsigned long)invalid_pte_table; +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + pmd_val(*pmdp) = ((unsigned long)invalid_pte_table); +} + +#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) + +#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd)) + +#ifndef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#define pmd_page_vaddr(pmd) pmd_val(pmd) + +extern pmd_t mk_pmd(struct page *page, pgprot_t prot); +extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); + +#define pte_page(x) pfn_to_page(pte_pfn(x)) +#define pte_pfn(x) ((unsigned long)(((x).pte & _PFN_MASK) >> _PFN_SHIFT)) +#define pfn_pte(pfn, prot) __pte(((pfn) << _PFN_SHIFT) | pgprot_val(prot)) +#define pfn_pmd(pfn, prot) __pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot)) + +/* + * Initialize a new pgd / pmd table with invalid pointers. + */ +extern void pgd_init(unsigned long page); +extern void pud_init(unsigned long page, unsigned long pagetable); +extern void pmd_init(unsigned long page, unsigned long pagetable); + +/* + * Non-present pages: high 40 bits are offset, next 8 bits type, + * low 16 bits zero. + */ +static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) +{ pte_t pte; pte_val(pte) = (type << 16) | (offset << 24); return pte; } + +#define __swp_type(x) (((x).val >> 16) & 0xff) +#define __swp_offset(x) ((x).val >> 24) +#define __swp_entry(type, offset) ((swp_entry_t) { pte_val(mk_swap_pte((type), (offset))) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) +#define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE }) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_PGTABLE_64_H */ diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h new file mode 100644 index 000000000000..4c377ff13d0f --- /dev/null +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGTABLE_BITS_H +#define _ASM_PGTABLE_BITS_H + +/* Page table bits */ +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_ACCESSED_SHIFT 0 /* Reuse Valid for Accessed */ +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_GLOBAL_SHIFT 6 +#define _PAGE_HUGE_SHIFT 6 /* HUGE is a PMD bit */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 +#define _PAGE_MODIFIED_SHIFT 9 +#define _PAGE_PROTNONE_SHIFT 10 +#define _PAGE_SPECIAL_SHIFT 11 +#define _PAGE_HGLOBAL_SHIFT 12 /* HGlobal is a PMD bit */ +#define _PAGE_PFN_SHIFT 12 +#define _PAGE_PFN_END_SHIFT 48 +#define _PAGE_NO_READ_SHIFT 61 +#define _PAGE_NO_EXEC_SHIFT 62 +#define _PAGE_RPLV_SHIFT 63 + +/* Used by software */ +#define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT) +#define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT) +#define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT) +#define _PAGE_PROTNONE (_ULCAST_(1) << _PAGE_PROTNONE_SHIFT) +#define _PAGE_SPECIAL (_ULCAST_(1) << _PAGE_SPECIAL_SHIFT) + +/* Used by TLB hardware (placed in EntryLo*) */ +#define _PAGE_VALID (_ULCAST_(1) << _PAGE_VALID_SHIFT) +#define _PAGE_DIRTY (_ULCAST_(1) << _PAGE_DIRTY_SHIFT) +#define _PAGE_PLV (_ULCAST_(3) << _PAGE_PLV_SHIFT) +#define _PAGE_GLOBAL (_ULCAST_(1) << _PAGE_GLOBAL_SHIFT) +#define _PAGE_HUGE (_ULCAST_(1) << _PAGE_HUGE_SHIFT) +#define _PAGE_HGLOBAL (_ULCAST_(1) << _PAGE_HGLOBAL_SHIFT) +#define _PAGE_NO_READ (_ULCAST_(1) << _PAGE_NO_READ_SHIFT) +#define _PAGE_NO_EXEC (_ULCAST_(1) << _PAGE_NO_EXEC_SHIFT) +#define _PAGE_RPLV (_ULCAST_(1) << _PAGE_RPLV_SHIFT) +#define _CACHE_MASK (_ULCAST_(3) << _CACHE_SHIFT) +#define _PFN_SHIFT (PAGE_SHIFT - 12 + _PAGE_PFN_SHIFT) + +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define _PAGE_KERN (PLV_KERN << _PAGE_PLV_SHIFT) + +#define _PFN_MASK (~((_ULCAST_(1) << (_PFN_SHIFT)) - 1) & \ + ((_ULCAST_(1) << (_PAGE_PFN_END_SHIFT)) - 1)) + +/* + * Cache attributes + */ +#ifndef _CACHE_SUC +#define _CACHE_SUC (0<<_CACHE_SHIFT) /* Strong-ordered UnCached */ +#endif +#ifndef _CACHE_CC +#define _CACHE_CC (1<<_CACHE_SHIFT) /* Coherent Cached */ +#endif +#ifndef _CACHE_WUC +#define _CACHE_WUC (2<<_CACHE_SHIFT) /* Weak-ordered UnCached */ +#endif + +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) + +#define _PAGE_CHG_MASK (_PAGE_MODIFIED | _PAGE_SPECIAL | _PFN_MASK | _CACHE_MASK | _PAGE_PLV) +#define _HPAGE_CHG_MASK (_PAGE_MODIFIED | _PAGE_SPECIAL | _PFN_MASK | _CACHE_MASK | _PAGE_PLV | _PAGE_HUGE) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_NO_READ | \ + _PAGE_USER | _CACHE_CC) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ + _PAGE_USER | _CACHE_CC) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _CACHE_CC) + +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ + _PAGE_GLOBAL | _PAGE_KERN | _CACHE_CC) +#define PAGE_KERNEL_SUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ + _PAGE_GLOBAL | _PAGE_KERN | _CACHE_SUC) +#define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ + _PAGE_GLOBAL | _PAGE_KERN | _CACHE_WUC) + +#define __P000 __pgprot(_CACHE_CC | _PAGE_USER | _PAGE_PROTNONE | _PAGE_NO_EXEC | _PAGE_NO_READ) +#define __P001 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __P010 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __P011 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __P100 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __P101 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __P110 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __P111 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) + +#define __S000 __pgprot(_CACHE_CC | _PAGE_USER | _PAGE_PROTNONE | _PAGE_NO_EXEC | _PAGE_NO_READ) +#define __S001 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC) +#define __S010 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE) +#define __S011 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE) +#define __S100 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __S101 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT) +#define __S110 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_WRITE) +#define __S111 __pgprot(_CACHE_CC | _PAGE_VALID | _PAGE_USER | _PAGE_PRESENT | _PAGE_WRITE) + +#ifndef __ASSEMBLY__ + +#define pgprot_noncached pgprot_noncached + +static inline pgprot_t pgprot_noncached(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + + prot = (prot & ~_CACHE_MASK) | _CACHE_SUC; + + return __pgprot(prot); +} + +#define pgprot_writecombine pgprot_writecombine + +static inline pgprot_t pgprot_writecombine(pgprot_t _prot) +{ + unsigned long prot = pgprot_val(_prot); + + prot = (prot & ~_CACHE_MASK) | _CACHE_WUC; + + return __pgprot(prot); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h new file mode 100644 index 000000000000..99438f209e0d --- /dev/null +++ b/arch/loongarch/include/asm/pgtable.h @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PGTABLE_H +#define _ASM_PGTABLE_H + +#include +#include +#include +#include +#include + +struct mm_struct; +struct vm_area_struct; + +/* + * ZERO_PAGE is a global shared page that is always zero; used + * for zero-mapped memory areas etc.. + */ + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) +#define __HAVE_COLOR_ZERO_PAGE + +extern void paging_init(void); + +#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL)) +#define pte_present(pte) (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROTNONE)) +#define pte_no_exec(pte) (pte_val(pte) & _PAGE_NO_EXEC) + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + if (pte_val(pteval) & _PAGE_GLOBAL) { + pte_t *buddy = ptep_buddy(ptep); + /* + * Make sure the buddy is global too (if it's !none, + * it better already be global) + */ + if (pte_none(*buddy)) + pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + } +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + set_pte(ptep, pteval); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + /* Preserve global status for the pair */ + if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + set_pte_at(mm, addr, ptep, __pte(_PAGE_GLOBAL)); + else + set_pte_at(mm, addr, ptep, __pte(0)); +} + +#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) +#define PMD_T_LOG2 (__builtin_ffs(sizeof(pmd_t)) - 1) +#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) + +extern pgd_t swapper_pg_dir[]; +extern pgd_t invalid_pg_dir[]; + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_ACCESSED; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= _PAGE_ACCESSED; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_MODIFIED); + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + return pte; +} + +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + return pte; +} + +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_WRITE | _PAGE_DIRTY); + return pte; +} + +static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } + +static inline pte_t pte_mkhuge(pte_t pte) +{ + pte_val(pte) |= _PAGE_HUGE; + return pte; +} + +#if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) +static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } +static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; } +#endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ + +#define pte_accessible pte_accessible +static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) +{ + if (pte_val(a) & _PAGE_PRESENT) + return true; + + if ((pte_val(a) & _PAGE_PROTNONE) && + mm_tlb_flush_pending(mm)) + return true; + + return false; +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); +} + +extern void __update_tlb(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); + +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + __update_tlb(vma, address, ptep); +} + +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + __update_tlb(vma, address, (pte_t *)pmdp); +} + +#define kern_addr_valid(addr) (1) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/ +#define pmdp_establish generic_pmdp_establish + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_HUGE) && pmd_present(pmd); +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pmd_val(pmd) = (pmd_val(pmd) & ~(_PAGE_GLOBAL)) | + ((pmd_val(pmd) & _PAGE_GLOBAL) << (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)); + pmd_val(pmd) |= _PAGE_HUGE; + + return pmd; +} + +#define pmd_write pmd_write +static inline int pmd_write(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_WRITE); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + return pmd; +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_WRITE | _PAGE_DIRTY); + return pmd; +} + +static inline int pmd_dirty(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_MODIFIED); +} + +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_DIRTY | _PAGE_MODIFIED); + return pmd; +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + return pmd; +} + +static inline int pmd_young(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ + pmd_val(pmd) &= ~_PAGE_ACCESSED; + return pmd; +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + pmd_val(pmd) |= _PAGE_ACCESSED; + return pmd; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT; +} + +static inline struct page *pmd_page(pmd_t pmd) +{ + if (pmd_trans_huge(pmd)) + return pfn_to_page(pmd_pfn(pmd)); + + return pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT); +} + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); + return pmd; +} + +static inline pmd_t pmd_mkinvalid(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE); + + return pmd; +} + +/* + * The generic version pmdp_huge_get_and_clear uses a version of pmd_clear() with a + * different prototype. + */ +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + pmd_t old = *pmdp; + + pmd_clear(pmdp); + + return old; +} + +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* + * We provide our own get_unmapped area to cope with the virtual aliasing + * constraints placed on us by the cache architecture. + */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + +#endif /* _ASM_PGTABLE_H */ diff --git a/arch/loongarch/include/asm/prefetch.h b/arch/loongarch/include/asm/prefetch.h new file mode 100644 index 000000000000..47169b57ce32 --- /dev/null +++ b/arch/loongarch/include/asm/prefetch.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef __ASM_PREFETCH_H +#define __ASM_PREFETCH_H + +#define Pref_Load 0 +#define Pref_Store 8 + +#ifdef __ASSEMBLY__ + + .macro __pref hint addr +#ifdef CONFIG_CPU_HAS_PREFETCH + preld \hint, \addr, 0 +#endif + .endm + + .macro pref_load addr + __pref Pref_Load, \addr + .endm + + .macro pref_store addr + __pref Pref_Store, \addr + .endm + +#endif + +#endif /* __ASM_PREFETCH_H */ diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h new file mode 100644 index 000000000000..708af5acd506 --- /dev/null +++ b/arch/loongarch/include/asm/processor.h @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_PROCESSOR_H +#define _ASM_PROCESSOR_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_32BIT + +#define TASK_SIZE 0x80000000UL +#define TASK_SIZE_MIN TASK_SIZE +#define STACK_TOP_MAX TASK_SIZE + +#define TASK_IS_32BIT_ADDR 1 + +#endif + +#ifdef CONFIG_64BIT + +#define TASK_SIZE32 0x100000000UL +#define TASK_SIZE64 (0x1UL << ((cpu_vabits > VA_BITS) ? VA_BITS : cpu_vabits)) + +#define TASK_SIZE (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64) +#define TASK_SIZE_MIN TASK_SIZE32 +#define STACK_TOP_MAX TASK_SIZE64 + +#define TASK_SIZE_OF(tsk) \ + (test_tsk_thread_flag(tsk, TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE64) + +#define TASK_IS_32BIT_ADDR test_thread_flag(TIF_32BIT_ADDR) + +#endif + +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M) + +unsigned long stack_top(void); +#define STACK_TOP stack_top() + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) + +#define FPU_REG_WIDTH 256 +#define FPU_ALIGN __attribute__((aligned(32))) + +union fpureg { + __u32 val32[FPU_REG_WIDTH / 32]; + __u64 val64[FPU_REG_WIDTH / 64]; +}; + +#define FPR_IDX(width, idx) (idx) + +#define BUILD_FPR_ACCESS(width) \ +static inline u##width get_fpr##width(union fpureg *fpr, unsigned idx) \ +{ \ + return fpr->val##width[FPR_IDX(width, idx)]; \ +} \ + \ +static inline void set_fpr##width(union fpureg *fpr, unsigned idx, \ + u##width val) \ +{ \ + fpr->val##width[FPR_IDX(width, idx)] = val; \ +} + +BUILD_FPR_ACCESS(32) +BUILD_FPR_ACCESS(64) + +struct loongarch_fpu { + unsigned int fcsr; + unsigned int vcsr; + uint64_t fcc; /* 8x8 */ + union fpureg fpr[NUM_FPU_REGS]; +}; + +#define INIT_CPUMASK { \ + {0,} \ +} + +#define ARCH_MIN_TASKALIGN 32 + +struct loongarch_vdso_info; + +/* + * If you change thread_struct remember to change the #defines below too! + */ +struct thread_struct { + /* Main processor registers. */ + unsigned long reg01, reg03, reg22; /* ra sp fp */ + unsigned long reg23, reg24, reg25, reg26; /* s0-s3 */ + unsigned long reg27, reg28, reg29, reg30, reg31; /* s4-s8 */ + + /* CSR registers */ + unsigned long csr_prmd; + unsigned long csr_crmd; + unsigned long csr_euen; + unsigned long csr_ecfg; + unsigned long csr_badvaddr; /* Last user fault */ + + /* Scratch registers */ + unsigned long scr0; + unsigned long scr1; + unsigned long scr2; + unsigned long scr3; + + /* Eflags register */ + unsigned long eflags; + + /* Other stuff associated with the thread. */ + unsigned long trap_nr; + unsigned long error_code; + struct loongarch_vdso_info *vdso; + + /* + * FPU & vector registers, must be at last because + * they are conditionally copied at fork(). + */ + struct loongarch_fpu fpu FPU_ALIGN; +}; + +#define INIT_THREAD { \ + /* \ + * Main processor registers \ + */ \ + .reg01 = 0, \ + .reg03 = 0, \ + .reg22 = 0, \ + .reg23 = 0, \ + .reg24 = 0, \ + .reg25 = 0, \ + .reg26 = 0, \ + .reg27 = 0, \ + .reg28 = 0, \ + .reg29 = 0, \ + .reg30 = 0, \ + .reg31 = 0, \ + .csr_crmd = 0, \ + .csr_prmd = 0, \ + .csr_euen = 0, \ + .csr_ecfg = 0, \ + .csr_badvaddr = 0, \ + /* \ + * Other stuff associated with the process \ + */ \ + .trap_nr = 0, \ + .error_code = 0, \ + /* \ + * FPU & vector registers \ + */ \ + .fpu = { \ + .fcsr = 0, \ + .vcsr = 0, \ + .fcc = 0, \ + .fpr = {{{0,},},}, \ + }, \ +} + +struct task_struct; + +/* Free all resources held by a thread. */ +#define release_thread(thread) do { } while(0) + +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, + IDLE_POLL}; + +extern unsigned long boot_option_idle_override; +/* + * Do necessary setup to start up a newly executed thread. + */ +extern void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp); + +static inline void flush_thread(void) +{ +} + +unsigned long get_wchan(struct task_struct *p); + +#define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \ + THREAD_SIZE - 32 - sizeof(struct pt_regs)) +#define task_pt_regs(tsk) ((struct pt_regs *)__KSTK_TOS(tsk)) +#define KSTK_EIP(tsk) (task_pt_regs(tsk)->csr_era) +#define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[3]) +#define KSTK_EUEN(tsk) (task_pt_regs(tsk)->csr_euen) +#define KSTK_ECFG(tsk) (task_pt_regs(tsk)->csr_ecfg) + +#define return_address() ({__asm__ __volatile__("":::"$1");__builtin_return_address(0);}) + +#ifdef CONFIG_CPU_HAS_PREFETCH + +#define ARCH_HAS_PREFETCH +#define prefetch(x) __builtin_prefetch((x), 0, 1) + +#define ARCH_HAS_PREFETCHW +#define prefetchw(x) __builtin_prefetch((x), 1, 1) + +#endif + +#endif /* _ASM_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h new file mode 100644 index 000000000000..5dd5cae4e52b --- /dev/null +++ b/arch/loongarch/include/asm/ptrace.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_PTRACE_H +#define _ASM_PTRACE_H + +#include +#include +#include + +/* + * This struct defines the way the registers are stored on the stack during + * a system call/exception. If you add a register here, please also add it to + * regoffset_table[] in arch/loongarch/kernel/ptrace.c. + */ +struct pt_regs { + /* Main processor registers. */ + unsigned long regs[32]; + + /* Original syscall arg0. */ + unsigned long orig_a0; + + /* Special CSR registers. */ + unsigned long csr_era; + unsigned long csr_badvaddr; + unsigned long csr_crmd; + unsigned long csr_prmd; + unsigned long csr_euen; + unsigned long csr_ecfg; + unsigned long csr_estat; + unsigned long __last[0]; +} __aligned(8); + +static inline int regs_irqs_disabled(struct pt_regs *regs) +{ + return arch_irqs_disabled_flags(regs->csr_prmd); +} + +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->regs[3]; +} + +/* + * Don't use asm-generic/ptrace.h it defines FP accessors that don't make + * sense on LoongArch. We rather want an error if they get invoked. + */ + +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->csr_era = val; +} + +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +struct task_struct; + +/* + * Does the process account for user or for system time? + */ +#define user_mode(regs) (((regs)->csr_prmd & PLV_MASK) == PLV_USER) + +static inline long regs_return_value(struct pt_regs *regs) +{ + return regs->regs[4]; +} + +#define instruction_pointer(regs) ((regs)->csr_era) +#define profile_pc(regs) instruction_pointer(regs) + +extern void die(const char *, struct pt_regs *) __noreturn; + +static inline void die_if_kernel(const char *str, struct pt_regs *regs) +{ + if (unlikely(!user_mode(regs))) + die(str, regs); +} + +#define current_pt_regs() \ +({ \ + unsigned long sp = (unsigned long)__builtin_frame_address(0); \ + (struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1 - 32) - 1; \ +}) + +/* Helpers for working with the user stack pointer */ + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->regs[3]; +} + +static inline void user_stack_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->regs[3] = val; +} + +#endif /* _ASM_PTRACE_H */ diff --git a/arch/loongarch/include/asm/reboot.h b/arch/loongarch/include/asm/reboot.h new file mode 100644 index 000000000000..48c3b1ea7145 --- /dev/null +++ b/arch/loongarch/include/asm/reboot.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_REBOOT_H +#define _ASM_REBOOT_H + +extern void (*pm_restart)(void); + +#endif /* _ASM_REBOOT_H */ diff --git a/arch/loongarch/include/asm/regdef.h b/arch/loongarch/include/asm/regdef.h new file mode 100644 index 000000000000..d6f09dac861c --- /dev/null +++ b/arch/loongarch/include/asm/regdef.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_REGDEF_H +#define _ASM_REGDEF_H + +#define zero $r0 /* wired zero */ +#define ra $r1 /* return address */ +#define tp $r2 +#define sp $r3 /* stack pointer */ +#define a0 $r4 /* argument registers, a0/a1 reused as v0/v1 for return value */ +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 /* caller saved */ +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 /* frame pointer */ +#define s0 $r23 /* callee saved */ +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 + +#endif /* _ASM_REGDEF_H */ diff --git a/arch/loongarch/include/asm/serial.h b/arch/loongarch/include/asm/serial.h new file mode 100644 index 000000000000..f0bead2e9995 --- /dev/null +++ b/arch/loongarch/include/asm/serial.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef __ASM__SERIAL_H +#define __ASM__SERIAL_H + +#define BASE_BAUD 0 +#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST) + +#endif /* __ASM__SERIAL_H */ diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h new file mode 100644 index 000000000000..ebd9d210f287 --- /dev/null +++ b/arch/loongarch/include/asm/setup.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _LOONGARCH_SETUP_H +#define _LOONGARCH_SETUP_H + +#include +#include + +#define VECSIZE 0x200 + +extern unsigned long eentry; +extern unsigned long tlbrentry; +extern void cpu_cache_init(void); +extern void per_cpu_trap_init(int cpu); +extern void set_handler(unsigned long offset, void *addr, unsigned long len); +extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len); + +#endif /* __SETUP_H */ diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h new file mode 100644 index 000000000000..0a4af240090c --- /dev/null +++ b/arch/loongarch/include/asm/shmparam.h @@ -0,0 +1,13 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef _ASM_SHMPARAM_H +#define _ASM_SHMPARAM_H + +#define __ARCH_FORCE_SHMLBA 1 + +#define SHMLBA SZ_64K /* attach addr a multiple of this */ + +#endif /* _ASM_SHMPARAM_H */ diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h new file mode 100644 index 000000000000..3d18cdf1b069 --- /dev/null +++ b/arch/loongarch/include/asm/sparsemem.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LOONGARCH_SPARSEMEM_H +#define _LOONGARCH_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM + +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ +#define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */ +#define MAX_PHYSMEM_BITS 48 + +#endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif + +#define INIT_MEMBLOCK_RESERVED_REGIONS (INIT_MEMBLOCK_REGIONS + NR_CPUS) + +#endif /* _LOONGARCH_SPARSEMEM_H */ diff --git a/arch/loongarch/include/asm/spinlock.h b/arch/loongarch/include/asm/spinlock.h new file mode 100644 index 000000000000..52dcc6419525 --- /dev/null +++ b/arch/loongarch/include/asm/spinlock.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999, 2000, 06 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SPINLOCK_H +#define _ASM_SPINLOCK_H + +#include +#include +#include + +#endif /* _ASM_SPINLOCK_H */ diff --git a/arch/loongarch/include/asm/spinlock_types.h b/arch/loongarch/include/asm/spinlock_types.h new file mode 100644 index 000000000000..1cbfe2451529 --- /dev/null +++ b/arch/loongarch/include/asm/spinlock_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SPINLOCK_TYPES_H +#define _ASM_SPINLOCK_TYPES_H + +#include +#include + +#endif diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h new file mode 100644 index 000000000000..552ccf1f93f0 --- /dev/null +++ b/arch/loongarch/include/asm/stackframe.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_STACKFRAME_H +#define _ASM_STACKFRAME_H + +#include + +#include +#include +#include +#include +#include + +/* Make the addition of cfi info a little easier. */ + .macro cfi_rel_offset reg offset=0 docfi=0 + .if \docfi + .cfi_rel_offset \reg, \offset + .endif + .endm + + .macro cfi_st reg offset=0 docfi=0 + cfi_rel_offset \reg, \offset, \docfi + LONG_S \reg, sp, \offset + .endm + + .macro cfi_restore reg offset=0 docfi=0 + .if \docfi + .cfi_restore \reg + .endif + .endm + + .macro cfi_ld reg offset=0 docfi=0 + LONG_L \reg, sp, \offset + cfi_restore \reg \offset \docfi + .endm + + .macro BACKUP_T0T1 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + .endm + + .macro RELOAD_T0T1 + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + .endm + + .macro SAVE_TEMP docfi=0 + RELOAD_T0T1 + cfi_st t0, PT_R12, \docfi + cfi_st t1, PT_R13, \docfi + cfi_st t2, PT_R14, \docfi + cfi_st t3, PT_R15, \docfi + cfi_st t4, PT_R16, \docfi + cfi_st t5, PT_R17, \docfi + cfi_st t6, PT_R18, \docfi + cfi_st t7, PT_R19, \docfi + cfi_st t8, PT_R20, \docfi + .endm + + .macro SAVE_STATIC docfi=0 + cfi_st s0, PT_R23, \docfi + cfi_st s1, PT_R24, \docfi + cfi_st s2, PT_R25, \docfi + cfi_st s3, PT_R26, \docfi + cfi_st s4, PT_R27, \docfi + cfi_st s5, PT_R28, \docfi + cfi_st s6, PT_R29, \docfi + cfi_st s7, PT_R30, \docfi + cfi_st s8, PT_R31, \docfi + .endm + +/* + * get_saved_sp returns the SP for the current CPU by looking in the + * kernelsp array for it. It stores the current sp in t0 and loads the + * new value in sp. + */ + .macro get_saved_sp docfi=0 + la.abs t1, kernelsp + move t0, sp + .if \docfi + .cfi_register sp, t0 + .endif + LONG_L sp, t1, 0 + .endm + + .macro set_saved_sp stackp temp temp2 + la.abs \temp, kernelsp + LONG_S \stackp, \temp, 0 + .endm + + .macro SAVE_SOME docfi=0 + csrrd t1, LOONGARCH_CSR_PRMD + andi t1, t1, 0x3 /* extract pplv bit */ + move t0, sp + beqz t1, 8f + /* Called from user mode, new stack. */ + get_saved_sp docfi=\docfi +8: + PTR_ADDI sp, sp, -PT_SIZE + .if \docfi + .cfi_def_cfa sp,0 + .endif + cfi_st t0, PT_R3, \docfi + cfi_rel_offset sp, PT_R3, \docfi + LONG_S zero, sp, PT_R0 + csrrd t0, LOONGARCH_CSR_PRMD + LONG_S t0, sp, PT_PRMD + csrrd t0, LOONGARCH_CSR_CRMD + LONG_S t0, sp, PT_CRMD + csrrd t0, LOONGARCH_CSR_EUEN + LONG_S t0, sp, PT_EUEN + csrrd t0, LOONGARCH_CSR_ECFG + LONG_S t0, sp, PT_ECFG + csrrd t0, LOONGARCH_CSR_ESTAT + PTR_S t0, sp, PT_ESTAT + cfi_st ra, PT_R1, \docfi + cfi_st a0, PT_R4, \docfi + cfi_st a1, PT_R5, \docfi + cfi_st a2, PT_R6, \docfi + cfi_st a3, PT_R7, \docfi + cfi_st a4, PT_R8, \docfi + cfi_st a5, PT_R9, \docfi + cfi_st a6, PT_R10, \docfi + cfi_st a7, PT_R11, \docfi + csrrd ra, LOONGARCH_CSR_ERA + LONG_S ra, sp, PT_ERA + .if \docfi + .cfi_rel_offset ra, PT_ERA + .endif + cfi_st tp, PT_R2, \docfi + cfi_st fp, PT_R22, \docfi + + /* Set thread_info if we're coming from user mode */ + csrrd t0, LOONGARCH_CSR_PRMD + andi t0, t0, 0x3 /* extract pplv bit */ + beqz t0, 9f + + li.d tp, ~_THREAD_MASK + and tp, tp, sp + cfi_st u0, PT_R21, \docfi + csrrd u0, PERCPU_BASE_KS +9: + .endm + + .macro SAVE_ALL docfi=0 + SAVE_SOME \docfi + SAVE_TEMP \docfi + SAVE_STATIC \docfi + .endm + + .macro RESTORE_TEMP docfi=0 + cfi_ld t0, PT_R12, \docfi + cfi_ld t1, PT_R13, \docfi + cfi_ld t2, PT_R14, \docfi + cfi_ld t3, PT_R15, \docfi + cfi_ld t4, PT_R16, \docfi + cfi_ld t5, PT_R17, \docfi + cfi_ld t6, PT_R18, \docfi + cfi_ld t7, PT_R19, \docfi + cfi_ld t8, PT_R20, \docfi + .endm + + .macro RESTORE_STATIC docfi=0 + cfi_ld s0, PT_R23, \docfi + cfi_ld s1, PT_R24, \docfi + cfi_ld s2, PT_R25, \docfi + cfi_ld s3, PT_R26, \docfi + cfi_ld s4, PT_R27, \docfi + cfi_ld s5, PT_R28, \docfi + cfi_ld s6, PT_R29, \docfi + cfi_ld s7, PT_R30, \docfi + cfi_ld s8, PT_R31, \docfi + .endm + + .macro RESTORE_SOME docfi=0 + LONG_L a0, sp, PT_PRMD + andi a0, a0, 0x3 /* extract pplv bit */ + beqz a0, 8f + cfi_ld u0, PT_R21, \docfi +8: + LONG_L a0, sp, PT_ERA + csrwr a0, LOONGARCH_CSR_ERA + LONG_L a0, sp, PT_PRMD + csrwr a0, LOONGARCH_CSR_PRMD + cfi_ld ra, PT_R1, \docfi + cfi_ld a0, PT_R4, \docfi + cfi_ld a1, PT_R5, \docfi + cfi_ld a2, PT_R6, \docfi + cfi_ld a3, PT_R7, \docfi + cfi_ld a4, PT_R8, \docfi + cfi_ld a5, PT_R9, \docfi + cfi_ld a6, PT_R10, \docfi + cfi_ld a7, PT_R11, \docfi + cfi_ld tp, PT_R2, \docfi + cfi_ld fp, PT_R22, \docfi + .endm + + .macro RESTORE_SP_AND_RET docfi=0 + cfi_ld sp, PT_R3, \docfi + ertn + .endm + + .macro RESTORE_ALL_AND_RET docfi=0 + RESTORE_STATIC \docfi + RESTORE_TEMP \docfi + RESTORE_SOME \docfi + RESTORE_SP_AND_RET \docfi + .endm + +#endif /* _ASM_STACKFRAME_H */ diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h new file mode 100644 index 000000000000..53cbb9250c0c --- /dev/null +++ b/arch/loongarch/include/asm/stacktrace.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _ASM_STACKTRACE_H +#define _ASM_STACKTRACE_H + +#include +#include +#include +#include + +#define STR_LONG_L __stringify(LONG_L) +#define STR_LONG_S __stringify(LONG_S) +#define STR_LONGSIZE __stringify(LONGSIZE) + +#define STORE_ONE_REG(r) \ + STR_LONG_S " $r" __stringify(r)", %1, "STR_LONGSIZE"*"__stringify(r)"\n\t" + +#define CSRRD_ONE_REG(reg) \ + __stringify(csrrd) " %0, "__stringify(reg)"\n\t" + +static __always_inline void prepare_frametrace(struct pt_regs *regs) +{ + __asm__ __volatile__( + /* Save $r1 */ + STORE_ONE_REG(1) + /* Use $r1 to save PC */ + "pcaddi $r1, 0\n\t" + STR_LONG_S " $r1, %0\n\t" + /* Restore $r1 */ + STR_LONG_L " $r1, %1, "STR_LONGSIZE"\n\t" + STORE_ONE_REG(2) + STORE_ONE_REG(3) + STORE_ONE_REG(4) + STORE_ONE_REG(5) + STORE_ONE_REG(6) + STORE_ONE_REG(7) + STORE_ONE_REG(8) + STORE_ONE_REG(9) + STORE_ONE_REG(10) + STORE_ONE_REG(11) + STORE_ONE_REG(12) + STORE_ONE_REG(13) + STORE_ONE_REG(14) + STORE_ONE_REG(15) + STORE_ONE_REG(16) + STORE_ONE_REG(17) + STORE_ONE_REG(18) + STORE_ONE_REG(19) + STORE_ONE_REG(20) + STORE_ONE_REG(21) + STORE_ONE_REG(22) + STORE_ONE_REG(23) + STORE_ONE_REG(24) + STORE_ONE_REG(25) + STORE_ONE_REG(26) + STORE_ONE_REG(27) + STORE_ONE_REG(28) + STORE_ONE_REG(29) + STORE_ONE_REG(30) + STORE_ONE_REG(31) + : "=m" (regs->csr_era) + : "r" (regs->regs) + : "memory"); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_BADV) : "=r" (regs->csr_badvaddr)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_CRMD) : "=r" (regs->csr_crmd)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_PRMD) : "=r" (regs->csr_prmd)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_EUEN) : "=r" (regs->csr_euen)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_ECFG) : "=r" (regs->csr_ecfg)); + __asm__ __volatile__(CSRRD_ONE_REG(LOONGARCH_CSR_ESTAT) : "=r" (regs->csr_estat)); +} + +#endif /* _ASM_STACKTRACE_H */ diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h new file mode 100644 index 000000000000..812389888e35 --- /dev/null +++ b/arch/loongarch/include/asm/string.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_STRING_H +#define _ASM_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *__s, int __c, size_t __count); + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *__to, __const__ void *__from, size_t __n); + +#define __HAVE_ARCH_MEMMOVE +extern void *memmove(void *__dest, __const__ void *__src, size_t __n); + +#endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h new file mode 100644 index 000000000000..e2d4afefce24 --- /dev/null +++ b/arch/loongarch/include/asm/switch_to.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _ASM_SWITCH_TO_H +#define _ASM_SWITCH_TO_H + +#include +#include + +struct task_struct; + +/** + * __switch_to - switch execution of a task + * @prev: The task previously executed. + * @next: The task to begin executing. + * @next_ti: task_thread_info(next). + * + * This function is used whilst scheduling to save the context of prev & load + * the context of next. Returns prev. + */ +extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *next, struct thread_info *next_ti); + +/* + * For newly created kernel threads switch_to() will return to + * ret_from_kernel_thread, newly created user threads to ret_from_fork. + * That is, everything following __switch_to() will be skipped for new threads. + * So everything that matters to new threads should be placed before __switch_to(). + */ +#define switch_to(prev, next, last) \ +do { \ + lose_fpu_inatomic(1, prev); \ + (last) = __switch_to(prev, next, task_thread_info(next)); \ +} while (0) + +#endif /* _ASM_SWITCH_TO_H */ diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h new file mode 100644 index 000000000000..f6af611525b9 --- /dev/null +++ b/arch/loongarch/include/asm/syscall.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#ifndef __ASM_LOONGARCH_SYSCALL_H +#define __ASM_LOONGARCH_SYSCALL_H + +#include +#include +#include +#include +#include +#include +#include +#include + +extern void *sys_call_table[]; + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[11]; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->regs[4] = regs->orig_a0; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->regs[4]; + + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->regs[4]; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->regs[4] = (long) error ? error : val; +} + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned long *args) +{ + args[0] = regs->orig_a0; + memcpy(&args[1], ®s->regs[5], 5 * sizeof(long)); +} + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_LOONGARCH64; +} + +#endif /* __ASM_LOONGARCH_SYSCALL_H */ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h new file mode 100644 index 000000000000..d12ff4747bbf --- /dev/null +++ b/arch/loongarch/include/asm/thread_info.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * thread_info.h: LoongArch low-level thread information + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_THREAD_INFO_H +#define _ASM_THREAD_INFO_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +#include + +/* + * low level task data that entry.S needs immediate access to + * - this struct should fit entirely inside of one cache line + * - this struct shares the supervisor stack pages + * - if the contents of this structure are changed, the assembly constants + * must also be changed + */ +struct thread_info { + struct task_struct *task; /* main task structure */ + unsigned long flags; /* low level flags */ + unsigned long tp_value; /* thread pointer */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ + struct pt_regs *regs; + long syscall; /* syscall number */ +}; + +/* + * macros/functions for gaining access to the thread information structure + */ +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .flags = _TIF_FIXADE, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ +} + +/* How to get the thread information struct from C. */ +register struct thread_info *__current_thread_info __asm__("$r2"); + +static inline struct thread_info *current_thread_info(void) +{ + return __current_thread_info; +} + +register unsigned long current_stack_pointer __asm__("$r3"); + +#endif /* !__ASSEMBLY__ */ + +/* thread information allocation */ +#define THREAD_SIZE SZ_16K +#define THREAD_MASK (THREAD_SIZE - 1UL) +#define THREAD_SIZE_ORDER ilog2(THREAD_SIZE / PAGE_SIZE) +/* + * thread information flags + * - these are process state flags that various assembly files may need to + * access + * - pending work-to-be-done flags are in LSW + * - other flags in MSW + */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */ +#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ +#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ +#define TIF_NOHZ 6 /* in adaptive nohz mode */ +#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_TRACEPOINT 9 /* syscall tracepoint instrumentation */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_UPROBE 11 /* breakpointed or singlestepping */ +#define TIF_USEDFPU 12 /* FPU was used by this task this quantum (SMP) */ +#define TIF_USEDSIMD 13 /* SIMD has been used this quantum */ +#define TIF_MEMDIE 14 /* is terminating due to OOM killer */ +#define TIF_FIXADE 15 /* Fix address errors in software */ +#define TIF_LOGADE 16 /* Log address errors to syslog */ +#define TIF_32BIT_REGS 17 /* 32-bit general purpose registers */ +#define TIF_32BIT_ADDR 18 /* 32-bit address space */ +#define TIF_LOAD_WATCH 19 /* If set, load watch registers */ +#define TIF_SINGLESTEP 20 /* Single Step */ +#define TIF_LSX_CTX_LIVE 21 /* LSX context must be preserved */ +#define TIF_LASX_CTX_LIVE 22 /* LASX context must be preserved */ + +#define _TIF_SIGPENDING (1< +#include +#include + +extern u64 cpu_clock_freq; +extern u64 const_clock_freq; + +extern void sync_counter(void); + +static inline unsigned int calc_const_freq(void) +{ + unsigned int res; + unsigned int base_freq; + unsigned int cfm, cfd; + + res = read_cpucfg(LOONGARCH_CPUCFG2); + if (!(res & CPUCFG2_LLFTP)) + return 0; + + base_freq = read_cpucfg(LOONGARCH_CPUCFG4); + res = read_cpucfg(LOONGARCH_CPUCFG5); + cfm = res & 0xffff; + cfd = (res >> 16) & 0xffff; + + if (!base_freq || !cfm || !cfd) + return 0; + + return (base_freq * cfm / cfd); +} + +/* + * Initialize the calling CPU's timer interrupt as clockevent device + */ +extern int constant_clockevent_init(void); +extern int constant_clocksource_init(void); + +static inline void clockevent_set_clock(struct clock_event_device *cd, + unsigned int clock) +{ + clockevents_calc_mult_shift(cd, clock, 4); +} + +#endif /* _ASM_TIME_H */ diff --git a/arch/loongarch/include/asm/timex.h b/arch/loongarch/include/asm/timex.h new file mode 100644 index 000000000000..6d3aae7dbb44 --- /dev/null +++ b/arch/loongarch/include/asm/timex.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_TIMEX_H +#define _ASM_TIMEX_H + +#ifdef __KERNEL__ + +#include + +#include +#include +#include + +/* + * Standard way to access the cycle counter. + * Currently only used on SMP for scheduling. + * + * We know that all SMP capable CPUs have cycle counters. + */ + +typedef unsigned long cycles_t; + +#define get_cycles get_cycles + +static inline cycles_t get_cycles(void) +{ + return drdtime(); +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_TIMEX_H */ diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h new file mode 100644 index 000000000000..a77aa5905472 --- /dev/null +++ b/arch/loongarch/include/asm/tlb.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_TLB_H +#define __ASM_TLB_H + +#include +#include +#include + +/* + * TLB Invalidate Flush + */ +static inline void tlbclr(void) +{ + __asm__ __volatile__("tlbclr"); +} + +static inline void tlbflush(void) +{ + __asm__ __volatile__("tlbflush"); +} + +/* + * TLB R/W operations. + */ +static inline void tlb_probe(void) +{ + __asm__ __volatile__("tlbsrch"); +} + +static inline void tlb_read(void) +{ + __asm__ __volatile__("tlbrd"); +} + +static inline void tlb_write_indexed(void) +{ + __asm__ __volatile__("tlbwr"); +} + +static inline void tlb_write_random(void) +{ + __asm__ __volatile__("tlbfill"); +} + +enum invtlb_ops { + /* Invalid all tlb */ + INVTLB_ALL = 0x0, + /* Invalid current tlb */ + INVTLB_CURRENT_ALL = 0x1, + /* Invalid all global=1 lines in current tlb */ + INVTLB_CURRENT_GTRUE = 0x2, + /* Invalid all global=0 lines in current tlb */ + INVTLB_CURRENT_GFALSE = 0x3, + /* Invalid global=0 and matched asid lines in current tlb */ + INVTLB_GFALSE_AND_ASID = 0x4, + /* Invalid addr with global=0 and matched asid in current tlb */ + INVTLB_ADDR_GFALSE_AND_ASID = 0x5, + /* Invalid addr with global=1 or matched asid in current tlb */ + INVTLB_ADDR_GTRUE_OR_ASID = 0x6, + /* Invalid matched gid in guest tlb */ + INVGTLB_GID = 0x9, + /* Invalid global=1, matched gid in guest tlb */ + INVGTLB_GID_GTRUE = 0xa, + /* Invalid global=0, matched gid in guest tlb */ + INVGTLB_GID_GFALSE = 0xb, + /* Invalid global=0, matched gid and asid in guest tlb */ + INVGTLB_GID_GFALSE_ASID = 0xc, + /* Invalid global=0 , matched gid, asid and addr in guest tlb */ + INVGTLB_GID_GFALSE_ASID_ADDR = 0xd, + /* Invalid global=1 , matched gid, asid and addr in guest tlb */ + INVGTLB_GID_GTRUE_ASID_ADDR = 0xe, + /* Invalid all gid gva-->gpa guest tlb */ + INVGTLB_ALLGID_GVA_TO_GPA = 0x10, + /* Invalid all gid gpa-->hpa tlb */ + INVTLB_ALLGID_GPA_TO_HPA = 0x11, + /* Invalid all gid tlb, including gva-->gpa and gpa-->hpa */ + INVTLB_ALLGID = 0x12, + /* Invalid matched gid gva-->gpa guest tlb */ + INVGTLB_GID_GVA_TO_GPA = 0x13, + /* Invalid matched gid gpa-->hpa tlb */ + INVTLB_GID_GPA_TO_HPA = 0x14, + /* Invalid matched gid tlb,including gva-->gpa and gpa-->hpa */ + INVTLB_GID_ALL = 0x15, + /* Invalid matched gid and addr gpa-->hpa tlb */ + INVTLB_GID_ADDR = 0x16, +}; + +/* + * invtlb op info addr + * (0x1 << 26) | (0x24 << 20) | (0x13 << 15) | + * (addr << 10) | (info << 5) | op + */ +static inline void invtlb(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + "parse_r addr,%0\n\t" + "parse_r info,%1\n\t" + ".word ((0x6498000) | (addr << 10) | (info << 5) | %2)\n\t" + : + : "r"(addr), "r"(info), "i"(op) + : + ); +} + +static inline void invtlb_addr(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + "parse_r addr,%0\n\t" + ".word ((0x6498000) | (addr << 10) | (0 << 5) | %1)\n\t" + : + : "r"(addr), "i"(op) + : + ); +} + +static inline void invtlb_info(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + "parse_r info,%0\n\t" + ".word ((0x6498000) | (0 << 10) | (info << 5) | %1)\n\t" + : + : "r"(info), "i"(op) + : + ); +} + +static inline void invtlb_all(u32 op, u32 info, u64 addr) +{ + __asm__ __volatile__( + ".word ((0x6498000) | (0 << 10) | (0 << 5) | %0)\n\t" + : + : "i"(op) + : + ); +} + +/* + * LoongArch doesn't need any special per-pte or per-vma handling, except + * we need to flush cache for area to be unmapped. + */ +#define tlb_start_vma(tlb, vma) \ + do { \ + if (!(tlb)->fullmm) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end); \ + } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) + +static void tlb_flush(struct mmu_gather *tlb); + +#define tlb_flush tlb_flush +#include + +static inline void tlb_flush(struct mmu_gather *tlb) +{ + struct vm_area_struct vma; + + vma.vm_mm = tlb->mm; + vma.vm_flags = 0; + if (tlb->fullmm) { + flush_tlb_mm(tlb->mm); + return; + } + + flush_tlb_range(&vma, tlb->start, tlb->end); +} + +extern void handle_tlb_load(void); +extern void handle_tlb_store(void); +extern void handle_tlb_modify(void); +extern void handle_tlb_refill(void); +extern void handle_tlb_protect(void); + +extern void dump_tlb_all(void); +extern void dump_tlb_regs(void); + +#endif /* __ASM_TLB_H */ diff --git a/arch/loongarch/include/asm/tlbflush.h b/arch/loongarch/include/asm/tlbflush.h new file mode 100644 index 000000000000..67e08f8ad794 --- /dev/null +++ b/arch/loongarch/include/asm/tlbflush.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_TLBFLUSH_H +#define __ASM_TLBFLUSH_H + +#include + +/* + * TLB flushing: + * + * - flush_tlb_all() flushes all processes TLB entries + * - flush_tlb_mm(mm) flushes the specified mm context TLB entries + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages + */ +extern void local_flush_tlb_all(void); +extern void local_flush_tlb_user(void); +extern void local_flush_tlb_kernel(void); +extern void local_flush_tlb_mm(struct mm_struct *mm); +extern void local_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); +extern void local_flush_tlb_kernel_range(unsigned long start, + unsigned long end); +extern void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long page); +extern void local_flush_tlb_one(unsigned long vaddr); + +#define flush_tlb_all() local_flush_tlb_all() +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_range(vma, vmaddr, end) local_flush_tlb_range(vma, vmaddr, end) +#define flush_tlb_kernel_range(vmaddr,end) local_flush_tlb_kernel_range(vmaddr, end) +#define flush_tlb_page(vma, page) local_flush_tlb_page(vma, page) +#define flush_tlb_one(vaddr) local_flush_tlb_one(vaddr) + +#endif /* __ASM_TLBFLUSH_H */ diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h new file mode 100644 index 000000000000..51feaa64339a --- /dev/null +++ b/arch/loongarch/include/asm/topology.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_TOPOLOGY_H +#define __ASM_TOPOLOGY_H + +#include + +#define cpu_logical_map(cpu) 0 + +#include + +static inline void arch_fix_phys_package_id(int num, u32 slot) { } +#endif /* __ASM_TOPOLOGY_H */ diff --git a/arch/loongarch/include/asm/types.h b/arch/loongarch/include/asm/types.h new file mode 100644 index 000000000000..18dd9c01cce4 --- /dev/null +++ b/arch/loongarch/include/asm/types.h @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994, 1995, 1996, 1999 by Ralf Baechle + * Copyright (C) 2008 Wind River Systems, + * written by Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#ifndef _ASM_TYPES_H +#define _ASM_TYPES_H + +#include +#include + +#ifdef __ASSEMBLY__ +#define _ULCAST_ +#define _U64CAST_ +#else +#define _ULCAST_ (unsigned long) +#define _U64CAST_ (u64) +#endif + +#endif /* _ASM_TYPES_H */ diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h new file mode 100644 index 000000000000..3ecd73641922 --- /dev/null +++ b/arch/loongarch/include/asm/uaccess.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_UACCESS_H +#define _ASM_UACCESS_H + +#include +#include +#include +#include + +#ifdef CONFIG_64BIT + +extern u64 __ua_limit; + +#define __UA_LIMIT __ua_limit + +#define __UA_ADDR ".dword" +#define __UA_LA "la.abs" + +#endif /* CONFIG_64BIT */ + +/* + * Is a address valid? This does a straightforward calculation rather + * than tests. + * + * Address valid if: + * - "addr" doesn't have any high-bits set + * - AND "size" doesn't have any high-bits set + * - AND "addr+size" doesn't have any high-bits set + * - OR we are in kernel mode. + * + * __ua_size() is a trick to avoid runtime checking of positive constant + * sizes; for those we already know at compile time that the size is ok. + */ +#define __ua_size(size) \ + ((__builtin_constant_p(size) && (signed long) (size) > 0) ? 0 : (size)) + +/* + * access_ok: - Checks if a user space pointer is valid + * @addr: User space pointer to start of block to check + * @size: Size of block to check + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * Checks if a pointer to a block of memory in user space is valid. + * + * Returns true (nonzero) if the memory block may be valid, false (zero) + * if it is definitely invalid. + * + * Note that, depending on architecture, this function probably just + * checks that the pointer is in the user space range - after calling + * this function, memory access functions may still return -EFAULT. + */ +static inline int __access_ok(const void __user *p, unsigned long size) +{ + unsigned long addr = (unsigned long)p; + unsigned long end = addr + size - !!size; + + return (__UA_LIMIT & (addr | end | __ua_size(size))) == 0; +} + +#define access_ok(addr, size) \ + likely(__access_ok((addr), (size))) + +/* + * get_user: - Get a simple variable from user space. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define get_user(x,ptr) \ +({ \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + \ + might_fault(); \ + access_ok(__p, sizeof(*__p)) ? __get_user((x), __p) : \ + ((x) = 0, -EFAULT); \ +}) + +/* + * put_user: - Write a simple value into user space. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Returns zero on success, or -EFAULT on error. + */ +#define put_user(x,ptr) \ +({ \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + \ + might_fault(); \ + access_ok(__p, sizeof(*__p)) ? __put_user((x), __p) : -EFAULT; \ +}) + +/* + * __get_user: - Get a simple variable from user space, with less checking. + * @x: Variable to store result. + * @ptr: Source address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple variable from user space to kernel + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and the result of + * dereferencing @ptr must be assignable to @x without a cast. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + * On error, the variable @x is set to zero. + */ +#define __get_user(x,ptr) \ +({ \ + int __gu_err = 0; \ + \ + __chk_user_ptr(ptr); \ + __get_user_common((x), sizeof(*(ptr)), ptr); \ + __gu_err; \ +}) + +/* + * __put_user: - Write a simple value into user space, with less checking. + * @x: Value to copy to user space. + * @ptr: Destination address, in user space. + * + * Context: User context only. This function may sleep if pagefaults are + * enabled. + * + * This macro copies a single simple value from kernel space to user + * space. It supports simple types like char and int, but not larger + * data types like structures or arrays. + * + * @ptr must have pointer-to-simple-variable type, and @x must be assignable + * to the result of dereferencing @ptr. + * + * Caller must check the pointer with access_ok() before calling this + * function. + * + * Returns zero on success, or -EFAULT on error. + */ +#define __put_user(x,ptr) \ +({ \ + int __pu_err = 0; \ + __typeof__(*(ptr)) __pu_val; \ + \ + __pu_val = (x); \ + __chk_user_ptr(ptr); \ + __put_user_common(ptr, sizeof(*(ptr))); \ + __pu_err; \ +}) + +struct __large_struct { unsigned long buf[100]; }; +#define __m(x) (*(struct __large_struct __user *)(x)) + +#ifdef CONFIG_32BIT +#define __GET_DW(val, insn, ptr) __get_data_asm_ll32(val, insn, ptr) +#endif +#ifdef CONFIG_64BIT +#define __GET_DW(val, insn, ptr) __get_data_asm(val, insn, ptr) +#endif + +#define __get_user_common(val, size, ptr) \ +do { \ + switch (size) { \ + case 1: __get_data_asm(val, "ld.b", ptr); break; \ + case 2: __get_data_asm(val, "ld.h", ptr); break; \ + case 4: __get_data_asm(val, "ld.w", ptr); break; \ + case 8: __GET_DW(val, "ld.d", ptr); break; \ + default: BUILD_BUG(); break; \ + } \ +} while (0) + +#define __get_kernel_common(val, size, ptr) __get_user_common(val, size, ptr) + +#define __get_data_asm(val, insn, ptr) \ +{ \ + long __gu_tmp; \ + \ + __asm__ __volatile__( \ + "1: " insn " %1, %2 \n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + "3: li.w %0, %3 \n" \ + " or %1, $r0, $r0 \n" \ + " b 2b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " "__UA_ADDR "\t1b, 3b \n" \ + " .previous \n" \ + : "+r" (__gu_err), "=r" (__gu_tmp) \ + : "m" (__m(ptr)), "i" (-EFAULT)); \ + \ + (val) = (__typeof__(*(ptr))) __gu_tmp; \ +} + +/* + * Get a long long 64 using 32 bit registers. + */ +#define __get_data_asm_ll32(val, insn, ptr) \ +{ \ + union { \ + unsigned long long l; \ + __typeof__(*(addr)) t; \ + } __gu_tmp; \ + \ + __asm__ __volatile__( \ + "1: ld.w %1, (%2) \n" \ + "2: ld.w %D1, 4(%2) \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + "4: li.w %0, %3 \n" \ + " slli.d %1, $r0, 0 \n" \ + " slli.d %D1, $r0, 0 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " " __UA_ADDR " 1b, 4b \n" \ + " " __UA_ADDR " 2b, 4b \n" \ + " .previous \n" \ + : "+r" (__gu_err), "=&r" (__gu_tmp.l) \ + : "r" (ptr), "i" (-EFAULT)); \ + \ + (val) = __gu_tmp.t; \ +} + +#ifdef CONFIG_32BIT +#define __PUT_DW(insn, ptr) __put_data_asm_ll32(insn, ptr) +#endif +#ifdef CONFIG_64BIT +#define __PUT_DW(insn, ptr) __put_data_asm(insn, ptr) +#endif + +#define __put_user_common(ptr, size) \ +do { \ + switch (size) { \ + case 1: __put_data_asm("st.b", ptr); break; \ + case 2: __put_data_asm("st.h", ptr); break; \ + case 4: __put_data_asm("st.w", ptr); break; \ + case 8: __PUT_DW("st.d", ptr); break; \ + default: BUILD_BUG(); break; \ + } \ +} while (0) + +#define __put_kernel_common(ptr, size) __put_user_common(ptr, size) + +#define __put_data_asm(insn, ptr) \ +{ \ + __asm__ __volatile__( \ + "1: " insn " %z2, %1 # __put_user_asm\n" \ + "2: \n" \ + " .section .fixup,\"ax\" \n" \ + "3: li.w %0, %3 \n" \ + " b 2b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " " __UA_ADDR " 1b, 3b \n" \ + " .previous \n" \ + : "+r" (__pu_err), "=m" (__m(ptr)) \ + : "Jr" (__pu_val), "i" (-EFAULT)); \ +} + +#define __put_data_asm_ll32(insn, ptr) \ +{ \ + __asm__ __volatile__( \ + "1: st.w %1, (%2) # __put_user_asm_ll32 \n" \ + "2: st.w %D1, 4(%2) \n" \ + "3: \n" \ + " .section .fixup,\"ax\" \n" \ + "4: li.w %0, %3 \n" \ + " b 3b \n" \ + " .previous \n" \ + " .section __ex_table,\"a\" \n" \ + " " __UA_ADDR " 1b, 4b \n" \ + " " __UA_ADDR " 2b, 4b \n" \ + " .previous" \ + : "+r" (__pu_err) \ + : "r" (__pu_val), "r" (ptr), "i" (-EFAULT)); \ +} + +#define HAVE_GET_KERNEL_NOFAULT + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __gu_err = 0; \ + \ + __get_kernel_common(*((type *)(dst)), sizeof(type), \ + (__force type *)(src)); \ + if (unlikely(__gu_err)) \ + goto err_label; \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + type __pu_val; \ + int __pu_err = 0; \ + \ + __pu_val = *(__force type *)(src); \ + __put_kernel_common(((type *)(dst)), sizeof(type)); \ + if (unlikely(__pu_err)) \ + goto err_label; \ +} while (0) + +extern unsigned long __copy_user(void *to, const void *from, __kernel_size_t n); + +static inline unsigned long __must_check +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return __copy_user(to, from, n); +} + +static inline unsigned long __must_check +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return __copy_user(to, from, n); +} + +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER + +/* + * __clear_user: - Zero a block of memory in user space, with less checking. + * @addr: Destination address, in user space. + * @to: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +extern unsigned long __clear_user(void __user *addr, __kernel_size_t size); + +#define clear_user(addr,n) \ +({ \ + void __user * __cl_addr = (addr); \ + unsigned long __cl_size = (n); \ + if (__cl_size && access_ok(__cl_addr, __cl_size)) \ + __cl_size = __clear_user(__cl_addr, __cl_size); \ + __cl_size; \ +}) + +extern long strncpy_from_user(char *dest, const char __user *src, long count); +extern long strnlen_user(const char __user *str, long n); + +#endif /* _ASM_UACCESS_H */ diff --git a/arch/loongarch/include/asm/unistd.h b/arch/loongarch/include/asm/unistd.h new file mode 100644 index 000000000000..b21b66cca8d9 --- /dev/null +++ b/arch/loongarch/include/asm/unistd.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#include + +#define NR_syscalls (__NR_syscalls) diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h new file mode 100644 index 000000000000..ad3ffae9b60e --- /dev/null +++ b/arch/loongarch/include/asm/vdso.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ASM_VDSO_H +#define __ASM_VDSO_H + +#include +#include + +#include + +/* + * struct loongarch_vdso_info - Details of a VDSO image. + * @vdso: Pointer to VDSO image (page-aligned). + * @size: Size of the VDSO image (page-aligned). + * @off_rt_sigreturn: Offset of the rt_sigreturn() trampoline. + * @code_mapping: Special mapping structure for vdso code. + * @code_mapping: Special mapping structure for vdso data. + * + * This structure contains details of a VDSO image, including the image data + * and offsets of certain symbols required by the kernel. It is generated as + * part of the VDSO build process, aside from the mapping page array, which is + * populated at runtime. + */ +struct loongarch_vdso_info { + void *vdso; + unsigned long size; + unsigned long offset_sigreturn; + struct vm_special_mapping code_mapping; + struct vm_special_mapping data_mapping; +}; + +extern struct loongarch_vdso_info vdso_info; + +#endif /* __ASM_VDSO_H */ diff --git a/arch/loongarch/include/asm/vdso/clocksource.h b/arch/loongarch/include/asm/vdso/clocksource.h new file mode 100644 index 000000000000..13cd580d406d --- /dev/null +++ b/arch/loongarch/include/asm/vdso/clocksource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __ASM_VDSOCLOCKSOURCE_H +#define __ASM_VDSOCLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_CPU + +#endif /* __ASM_VDSOCLOCKSOURCE_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..5d2f7c5f0c03 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline long gettimeofday_fallback( + struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct __kernel_old_timeval *tv asm("a0") = _tv; + register struct timezone *tz asm("a1") = _tz; + register long nr asm("a7") = __NR_gettimeofday; + register long ret asm("a0"); + + asm volatile( + " syscall 0\n" + : "+r" (ret) + : "r" (nr), "r" (tv), "r" (tz) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); + + return ret; +} + +static __always_inline long clock_gettime_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long nr asm("a7") = __NR_clock_gettime; + register long ret asm("a0"); + + asm volatile( + " syscall 0\n" + : "+r" (ret) + : "r" (nr), "r" (clkid), "r" (ts) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); + + return ret; +} + +static __always_inline int clock_getres_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register clockid_t clkid asm("a0") = _clkid; + register struct __kernel_timespec *ts asm("a1") = _ts; + register long nr asm("a7") = __NR_clock_getres; + register long ret asm("a0"); + + asm volatile( + " syscall 0\n" + : "+r" (ret) + : "r" (nr), "r" (clkid), "r" (ts) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, + const struct vdso_data *vd) +{ + uint64_t count; + + __asm__ __volatile__( + " rdtime.d %0, $zero\n" + : "=r" (count)); + + return count; +} + +static inline bool loongarch_vdso_hres_capable(void) +{ + return true; +} +#define __arch_vdso_hres_capable loongarch_vdso_hres_capable + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return get_vdso_data(); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/processor.h b/arch/loongarch/include/asm/vdso/processor.h new file mode 100644 index 000000000000..99aa47ca40e4 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/processor.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#ifndef __ASSEMBLY__ + +#define cpu_relax() barrier() + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h new file mode 100644 index 000000000000..3ddfde3f7557 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +static inline unsigned long get_vdso_base(void) +{ + unsigned long addr; + + __asm__( + " la.pcrel %0, _start \n" + : "=r" (addr) + : + :); + + return addr; +} + +static inline const struct vdso_data *get_vdso_data(void) +{ + return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE); +} + +#endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..5de615383a22 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__loongarch_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __loongarch_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/loongarch/include/asm/vermagic.h b/arch/loongarch/include/asm/vermagic.h new file mode 100644 index 000000000000..aacbce545a79 --- /dev/null +++ b/arch/loongarch/include/asm/vermagic.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#define MODULE_PROC_FAMILY "LOONGARCH " + +#ifdef CONFIG_32BIT +#define MODULE_KERNEL_TYPE "32BIT " +#elif defined CONFIG_64BIT +#define MODULE_KERNEL_TYPE "64BIT " +#endif + +#define MODULE_ARCH_VERMAGIC \ + MODULE_PROC_FAMILY MODULE_KERNEL_TYPE + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/loongarch/include/asm/vmalloc.h b/arch/loongarch/include/asm/vmalloc.h new file mode 100644 index 000000000000..965a0d41ac2d --- /dev/null +++ b/arch/loongarch/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_LOONGARCH_VMALLOC_H +#define _ASM_LOONGARCH_VMALLOC_H + +#endif /* _ASM_LOONGARCH_VMALLOC_H */ diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild new file mode 100644 index 000000000000..4aa680ca2e5f --- /dev/null +++ b/arch/loongarch/include/uapi/asm/Kbuild @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +generic-y += kvm_para.h diff --git a/arch/loongarch/include/uapi/asm/auxvec.h b/arch/loongarch/include/uapi/asm/auxvec.h new file mode 100644 index 000000000000..bb0c71b4d64c --- /dev/null +++ b/arch/loongarch/include/uapi/asm/auxvec.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#ifndef __ASM_AUXVEC_H +#define __ASM_AUXVEC_H + +/* Location of VDSO image. */ +#define AT_SYSINFO_EHDR 33 + +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + +#endif /* __ASM_AUXVEC_H */ diff --git a/arch/loongarch/include/uapi/asm/bitfield.h b/arch/loongarch/include/uapi/asm/bitfield.h new file mode 100644 index 000000000000..1bdadee88617 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bitfield.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef __UAPI_ASM_BITFIELD_H +#define __UAPI_ASM_BITFIELD_H + +/* + * * Damn ... bitfields depend from byteorder :-( + * */ +#define __BITFIELD_FIELD(field, more) \ + more \ + field; + +#endif /* __UAPI_ASM_BITFIELD_H */ diff --git a/arch/loongarch/include/uapi/asm/bitsperlong.h b/arch/loongarch/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..00b4ba1e5cdf --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bitsperlong.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_LOONGARCH_BITSPERLONG_H +#define __ASM_LOONGARCH_BITSPERLONG_H + +#define __BITS_PER_LONG (__SIZEOF_LONG__ * 8) + +#include + +#endif /* __ASM_LOONGARCH_BITSPERLONG_H */ diff --git a/arch/loongarch/include/uapi/asm/break.h b/arch/loongarch/include/uapi/asm/break.h new file mode 100644 index 000000000000..414b0f49e8ea --- /dev/null +++ b/arch/loongarch/include/uapi/asm/break.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#ifndef __UAPI_ASM_BREAK_H +#define __UAPI_ASM_BREAK_H + +#define BRK_DEFAULT 0 /* Used as default */ +#define BRK_BUG 1 /* Used by BUG() */ +#define BRK_KDB 2 /* Used in KDB_ENTER() */ +#define BRK_MATHEMU 3 /* Used by FPU emulator */ +#define BRK_USERBP 4 /* User bp (used by debuggers) */ +#define BRK_SSTEPBP 5 /* User bp (used by debuggers) */ +#define BRK_OVERFLOW 6 /* Overflow check */ +#define BRK_DIVZERO 7 /* Divide by zero check */ +#define BRK_RANGE 8 /* Range error check */ +#define BRK_MULOVFL 9 /* Multiply overflow */ +#define BRK_KPROBE_BP 10 /* Kprobe break */ +#define BRK_KPROBE_SSTEPBP 11 /* Kprobe single step break */ +#define BRK_UPROBE_BP 12 /* See */ +#define BRK_UPROBE_XOLBP 13 /* See */ + +#endif /* __UAPI_ASM_BREAK_H */ diff --git a/arch/loongarch/include/uapi/asm/byteorder.h b/arch/loongarch/include/uapi/asm/byteorder.h new file mode 100644 index 000000000000..bbb85c62740c --- /dev/null +++ b/arch/loongarch/include/uapi/asm/byteorder.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef _ASM_BYTEORDER_H +#define _ASM_BYTEORDER_H + +#include + +#endif /* _ASM_BYTEORDER_H */ diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h new file mode 100644 index 000000000000..8840b72fa8e8 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_HWCAP_H +#define _UAPI_ASM_HWCAP_H + +/* HWCAP flags */ +#define HWCAP_LOONGARCH_CPUCFG (1 << 0) +#define HWCAP_LOONGARCH_LAM (1 << 1) +#define HWCAP_LOONGARCH_UAL (1 << 2) +#define HWCAP_LOONGARCH_FPU (1 << 3) +#define HWCAP_LOONGARCH_LSX (1 << 4) +#define HWCAP_LOONGARCH_LASX (1 << 5) +#define HWCAP_LOONGARCH_CRC32 (1 << 6) +#define HWCAP_LOONGARCH_COMPLEX (1 << 7) +#define HWCAP_LOONGARCH_CRYPTO (1 << 8) +#define HWCAP_LOONGARCH_LVZ (1 << 9) +#define HWCAP_LOONGARCH_LBT_X86 (1 << 10) +#define HWCAP_LOONGARCH_LBT_ARM (1 << 11) +#define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) + +#endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/include/uapi/asm/inst.h b/arch/loongarch/include/uapi/asm/inst.h new file mode 100644 index 000000000000..de8c554bb7aa --- /dev/null +++ b/arch/loongarch/include/uapi/asm/inst.h @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Format of an instruction in memory. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _UAPI_ASM_INST_H +#define _UAPI_ASM_INST_H + +#include + +enum reg0_op { + tlbclr_op = 0x19208, gtlbclr_op=0x19208, + tlbflush_op = 0x19209, gtlbflush_op=0x19209, + tlbsrch_op = 0x1920a, gtlbsrch_op=0x1920a, + tlbrd_op = 0x1920b, gtlbrd_op=0x1920b, + tlbwr_op = 0x1920c, gtlbwr_op=0x1920c, + tlbfill_op = 0x1920d, gtlbfill_op=0x1920d, + ertn_op = 0x1920e, +}; + +enum reg0i15_op { + break_op = 0x54, dbcl_op, syscall_op, hypcall_op, + idle_op = 0xc91, dbar_op = 0x70e4, ibar_op, +}; + +enum reg0i26_op { + b_op = 0x14, bl_op, +}; + +enum reg1i20_op { + lu12iw_op = 0xa, lu32id_op, pcaddi_op, pcalau12i_op, + pcaddu12i_op, pcaddu18i_op, +}; + +enum reg1i21_op { + beqz_op = 0x10, bnez_op, bceqz_op, bcnez_op=0x12, jiscr0_op=0x12, jiscr1_op=0x12, +}; + +enum reg2_op { + gr2scr_op = 0x2, scr2gr_op, clow_op, + clzw_op, ctow_op, ctzw_op, clod_op, + clzd_op, ctod_op, ctzd_op, revb2h_op, + revb4h_op, revb2w_op, revbd_op, revh2w_op, + revhd_op, bitrev4b_op, bitrev8b_op, bitrevw_op, + bitrevd_op, extwh_op, extwb_op, rdtimelw_op, + rdtimehw_op, rdtimed_op, cpucfg_op, + iocsrrdb_op = 0x19200, iocsrrdh_op, iocsrrdw_op, iocsrrdd_op, + iocsrwrb_op, iocsrwrh_op, iocsrwrw_op, iocsrwrd_op, + movgr2fcsr_op = 0x4530, movfcsr2gr_op = 0x4532, + movgr2cf_op = 0x4536, movcf2gr_op = 0x4537, +}; + +enum reg2ui3_op { + rotrib_op = 0x261, rcrib_op = 0x281, +}; + +enum reg2ui4_op { + rotrih_op = 0x131, rcrih_op = 0x141, +}; + +enum reg2ui5_op { + slliw_op = 0x81, srliw_op = 0x89, sraiw_op = 0x91, rotriw_op = 0x99, + rcriw_op = 0xa1, +}; + +enum reg2ui6_op { + sllid_op = 0x41, srlid_op = 0x45, sraid_op = 0x49, rotrid_op = 0x4d, + rcrid_op = 0x51, +}; + +enum reg2ui12_op { + andi_op = 0xd, ori_op, xori_op, +}; + +enum reg2lsbw_op { + bstrinsw_op = 0x3, bstrpickw_op = 0x3, +}; + +enum reg2lsbd_op { + bstrinsd_op = 0x2, bstrpickd_op = 0x3, +}; + +enum reg2i8_op { + lddir_op = 0x190, ldpte_op, +}; + +enum reg2i8idx1_op { + vstelmd_op = 0x622, +}; + +enum reg2i8idx2_op { + vstelmw_op = 0x312, xvstelmd_op = 0x331, +}; + +enum reg2i8idx3_op { + vstelmh_op = 0x18a, xvstelmw_op = 0x199, +}; + +enum reg2i8idx4_op { + vstelmb_op = 0xc6, xvstelmh_op = 0xcd, +}; + +enum reg2i8idx5_op { + xvstelmb_op = 0x67, +}; + +enum reg2i9_op { + vldrepld_op = 0x602, xvldrepld_op = 0x642, +}; + +enum reg2i10_op { + vldreplw_op = 0x302, xvldreplw_op = 0x322, +}; + +enum reg2i11_op { + vldreplh_op = 0x182, xvldreplh_op = 0x192, +}; + +enum reg2i12_op { + slti_op = 0x8, sltui_op, addiw_op, addid_op, + lu52id_op, cacop_op = 0x18, xvldreplb_op = 0xca, + ldb_op = 0xa0, ldh_op, ldw_op, ldd_op, stb_op, sth_op, + stw_op, std_op, ldbu_op, ldhu_op, ldwu_op, preld_op, + flds_op, fsts_op, fldd_op, fstd_op, vld_op, vst_op, xvld_op, + xvst_op, ldlw_op = 0xb8, ldrw_op, ldld_op, ldrd_op, stlw_op, + strw_op, stld_op, strd_op, vldreplb_op = 0xc2, +}; + +enum reg2i14_op { + llw_op = 0x20, scw_op, lld_op, scd_op, ldptrw_op, stptrw_op, + ldptrd_op, stptrd_op, +}; + +enum reg2i16_op { + addu16id_op = 0x4, jirl_op = 0x13, beq_op = 0x16, bne_op, blt_op, bge_op, bltu_op, bgeu_op, +}; + +enum reg2csr_op { + csrrd_op = 0x4, csrwr_op = 0x4, csrxchg_op = 0x4, + gcsrrd_op = 0x5, gcsrwr_op = 0x5, gcsrxchg_op = 0x5, +}; + +enum reg3_op { + asrtled_op = 0x2, asrtgtd_op, + addw_op = 0x20, addd_op, subw_op, subd_op, + slt_op, sltu_op, maskeqz_op, masknez_op, + nor_op, and_op, or_op, xor_op, orn_op, + andn_op, sllw_op, srlw_op, sraw_op, slld_op, + srld_op, srad_op, rotrb_op, rotrh_op, + rotrw_op, rotrd_op, mulw_op, mulhw_op, + mulhwu_op, muld_op, mulhd_op, mulhdu_op, + mulwdw_op, mulwdwu_op, divw_op, modw_op, + divwu_op, modwu_op, divd_op, modd_op, + divdu_op, moddu_op, crcwbw_op, + crcwhw_op, crcwww_op, crcwdw_op, crccwbw_op, + crccwhw_op, crccwww_op, crccwdw_op, addu12iw_op, + addu12id_op, + adcb_op = 0x60, adch_op, adcw_op, adcd_op, + sbcb_op, sbch_op, sbcw_op, sbcd_op, + rcrb_op, rcrh_op, rcrw_op, rcrd_op, + ldxb_op = 0x7000, ldxh_op = 0x7008, ldxw_op = 0x7010, ldxd_op = 0x7018, + stxb_op = 0x7020, stxh_op = 0x7028, stxw_op = 0x7030, stxd_op = 0x7038, + ldxbu_op = 0x7040, ldxhu_op = 0x7048, ldxwu_op = 0x7050, + preldx_op = 0x7058, fldxs_op = 0x7060, fldxd_op = 0x7068, + fstxs_op = 0x7070, fstxd_op = 0x7078, vldx_op = 0x7080, + vstx_op = 0x7088, xvldx_op = 0x7090, xvstx_op = 0x7098, + amswapw_op = 0x70c0, amswapd_op, amaddw_op, amaddd_op, amandw_op, + amandd_op, amorw_op, amord_op, amxorw_op, amxord_op, ammaxw_op, + ammaxd_op, amminw_op, ammind_op, ammaxwu_op, ammaxdu_op, + amminwu_op, ammindu_op, amswap_dbw_op, amswap_dbd_op, amadd_dbw_op, + amadd_dbd_op, amand_dbw_op, amand_dbd_op, amor_dbw_op, amor_dbd_op, + amxor_dbw_op, amxor_dbd_op, ammax_dbw_op, ammax_dbd_op, ammin_dbw_op, + ammin_dbd_op, ammax_dbwu_op, ammax_dbdu_op, ammin_dbwu_op, + ammin_dbdu_op, fldgts_op = 0x70e8, fldgtd_op, + fldles_op, fldled_op, fstgts_op, fstgtd_op, fstles_op, fstled_op, + ldgtb_op, ldgth_op, ldgtw_op, ldgtd_op, ldleb_op, ldleh_op, ldlew_op, + ldled_op, stgtb_op, stgth_op, stgtw_op, stgtd_op, stleb_op, stleh_op, + stlew_op, stled_op, +}; + +enum reg3sa2_op { + alslw_op = 0x2, alslwu_op, bytepickw_op, alsld_op = 0x16, + +}; + +enum reg3sa3_op { + bytepickd_op = 0x3, +}; + +struct reg2_format { + __BITFIELD_FIELD(unsigned int opcode : 22, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))) +}; + +struct reg2ui3_format { + __BITFIELD_FIELD(unsigned int opcode : 19, + __BITFIELD_FIELD(unsigned int simmediate : 3, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui4_format { + __BITFIELD_FIELD(unsigned int opcode : 18, + __BITFIELD_FIELD(unsigned int simmediate : 4, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui5_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int simmediate : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui6_format { + __BITFIELD_FIELD(unsigned int opcode : 16, + __BITFIELD_FIELD(unsigned int simmediate : 6, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2lsbw_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int msbw : 5, + __BITFIELD_FIELD(unsigned int op : 1, + __BITFIELD_FIELD(unsigned int lsbw : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))))) +}; + +struct reg2lsbd_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int msbd : 6, + __BITFIELD_FIELD(unsigned int lsbd : 6, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg3sa2_format { + __BITFIELD_FIELD(unsigned int opcode : 15, + __BITFIELD_FIELD(unsigned int simmediate : 2, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3sa3_format { + __BITFIELD_FIELD(unsigned int opcode : 14, + __BITFIELD_FIELD(unsigned int simmediate : 3, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg3sa4_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int simmediate : 4, + __BITFIELD_FIELD(unsigned int rk : 5, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg4_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int fa : 5, + __BITFIELD_FIELD(unsigned int fk : 5, + __BITFIELD_FIELD(unsigned int fj : 5, + __BITFIELD_FIELD(unsigned int fd : 5, + ;))))) +}; + +struct reg2i8_format { + __BITFIELD_FIELD(unsigned int opcode : 14, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i8idx1_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int idx : 1, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx2_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int idx : 2, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx3_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int idx : 3, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx4_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int idx : 4, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i8idx5_format { + __BITFIELD_FIELD(unsigned int opcode : 9, + __BITFIELD_FIELD(unsigned int idx : 5, + __BITFIELD_FIELD(unsigned int simmediate : 8, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))))) +}; + +struct reg2i9_format { + __BITFIELD_FIELD(unsigned int opcode : 13, + __BITFIELD_FIELD(unsigned int simmediate : 9, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i10_format { + __BITFIELD_FIELD(unsigned int opcode : 12, + __BITFIELD_FIELD(unsigned int simmediate : 10, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i11_format { + __BITFIELD_FIELD(unsigned int opcode : 11, + __BITFIELD_FIELD(unsigned int simmediate : 11, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i12_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(signed int simmediate : 12, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2ui12_format { + __BITFIELD_FIELD(unsigned int opcode : 10, + __BITFIELD_FIELD(unsigned int simmediate : 12, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i14_format { + __BITFIELD_FIELD(unsigned int opcode : 8, + __BITFIELD_FIELD(unsigned int simmediate : 14, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2i16_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate : 16, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg2csr_format { + __BITFIELD_FIELD(unsigned int opcode : 8, + __BITFIELD_FIELD(unsigned int csr : 14, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int rd : 5, + ;)))) +}; + +struct reg1i21_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate_l : 16, + __BITFIELD_FIELD(unsigned int rj : 5, + __BITFIELD_FIELD(unsigned int simmediate_h : 5, + ;)))) +}; + +struct reg1i20_format { + __BITFIELD_FIELD(unsigned int opcode : 7, + __BITFIELD_FIELD(unsigned int simmediate : 20, + __BITFIELD_FIELD(unsigned int rd : 5, + ;))) +}; + +struct reg0i15_format { + __BITFIELD_FIELD(unsigned int opcode : 17, + __BITFIELD_FIELD(unsigned int simmediate : 15, + ;)) +}; + +struct reg0i26_format { + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int simmediate_l : 16, + __BITFIELD_FIELD(unsigned int simmediate_h : 10, + ;))) +}; + +union loongarch_instruction { + unsigned int word; + unsigned short halfword[2]; + unsigned char byte[4]; + struct reg2_format reg2_format; + struct reg2ui3_format reg2ui3_format; + struct reg2ui4_format reg2ui4_format; + struct reg2ui5_format reg2ui5_format; + struct reg2ui6_format reg2ui6_format; + struct reg2ui12_format reg2ui12_format; + struct reg2lsbw_format reg2lsbw_format; + struct reg2lsbd_format reg2lsbd_format; + struct reg3_format reg3_format; + struct reg3sa2_format reg3sa2_format; + struct reg3sa3_format reg3sa3_format; + struct reg3sa4_format reg3sa4_format; + struct reg4_format reg4_format; + struct reg2i8_format reg2i8_format; + struct reg2i8idx1_format reg2i8idx1_format; + struct reg2i8idx2_format reg2i8idx2_format; + struct reg2i8idx3_format reg2i8idx3_format; + struct reg2i8idx4_format reg2i8idx4_format; + struct reg2i8idx5_format reg2i8idx5_format; + struct reg2i9_format reg2i9_format; + struct reg2i10_format reg2i10_format; + struct reg2i11_format reg2i11_format; + struct reg2i12_format reg2i12_format; + struct reg2i14_format reg2i14_format; + struct reg2i16_format reg2i16_format; + struct reg2csr_format reg2csr_format; + struct reg1i21_format reg1i21_format; + struct reg1i20_format reg1i20_format; + struct reg0i15_format reg0i15_format; + struct reg0i26_format reg0i26_format; +}; + +#endif /* _UAPI_ASM_INST_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..2a6efcc760c8 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Huacai Chen +*/ +#ifndef _UAPI_ASM_PTRACE_H +#define _UAPI_ASM_PTRACE_H + +#include + +#ifndef __KERNEL__ +#include +#endif + +/* + * For PTRACE_{POKE,PEEK}USR. 0 - 31 are GPRs, + * 32 is syscall's original ARG0, 33 is PC, 34 is BADVADDR. + */ +#define GPR_BASE 0 +#define GPR_NUM 32 +#define GPR_END (GPR_BASE + GPR_NUM - 1) +#define ARG0 (GPR_END + 1) +#define PC (GPR_END + 2) +#define BADVADDR (GPR_END + 3) + +#define NUM_FPU_REGS 32 + +struct user_pt_regs { + /* Main processor registers. */ + unsigned long regs[32]; + + /* Original syscall arg0. */ + unsigned long orig_a0; + + /* Special CSR registers. */ + unsigned long csr_era; + unsigned long csr_badv; + unsigned long reserved[10]; +} __attribute__((aligned(8))); + +struct user_fp_state { + uint64_t fpr[32]; + uint64_t fcc; + uint32_t fcsr; +}; + +#define PTRACE_SYSEMU 0x1f +#define PTRACE_SYSEMU_SINGLESTEP 0x20 + +#endif /* _UAPI_ASM_PTRACE_H */ diff --git a/arch/loongarch/include/uapi/asm/reg.h b/arch/loongarch/include/uapi/asm/reg.h new file mode 100644 index 000000000000..24b79dbd3bee --- /dev/null +++ b/arch/loongarch/include/uapi/asm/reg.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Various register offset definitions for debuggers, core file + * examiners and whatnot. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef __UAPI_ASM_LOONGARCH_REG_H +#define __UAPI_ASM_LOONGARCH_REG_H + +#define LOONGARCH_EF_R0 0 +#define LOONGARCH_EF_R1 1 +#define LOONGARCH_EF_R2 2 +#define LOONGARCH_EF_R3 3 +#define LOONGARCH_EF_R4 4 +#define LOONGARCH_EF_R5 5 +#define LOONGARCH_EF_R6 6 +#define LOONGARCH_EF_R7 7 +#define LOONGARCH_EF_R8 8 +#define LOONGARCH_EF_R9 9 +#define LOONGARCH_EF_R10 10 +#define LOONGARCH_EF_R11 11 +#define LOONGARCH_EF_R12 12 +#define LOONGARCH_EF_R13 13 +#define LOONGARCH_EF_R14 14 +#define LOONGARCH_EF_R15 15 +#define LOONGARCH_EF_R16 16 +#define LOONGARCH_EF_R17 17 +#define LOONGARCH_EF_R18 18 +#define LOONGARCH_EF_R19 19 +#define LOONGARCH_EF_R20 20 +#define LOONGARCH_EF_R21 21 +#define LOONGARCH_EF_R22 22 +#define LOONGARCH_EF_R23 23 +#define LOONGARCH_EF_R24 24 +#define LOONGARCH_EF_R25 25 +#define LOONGARCH_EF_R26 26 +#define LOONGARCH_EF_R27 27 +#define LOONGARCH_EF_R28 28 +#define LOONGARCH_EF_R29 29 +#define LOONGARCH_EF_R30 30 +#define LOONGARCH_EF_R31 31 + +/* + * Saved special registers + */ +#define LOONGARCH_EF_ORIG_A0 32 +#define LOONGARCH_EF_CSR_ERA 33 +#define LOONGARCH_EF_CSR_BADV 34 +#define LOONGARCH_EF_CSR_CRMD 35 +#define LOONGARCH_EF_CSR_PRMD 36 +#define LOONGARCH_EF_CSR_EUEN 37 +#define LOONGARCH_EF_CSR_ECFG 38 +#define LOONGARCH_EF_CSR_ESTAT 39 + +#define LOONGARCH_EF_SIZE 320 /* size in bytes */ + +#endif /* __UAPI_ASM_LOONGARCH_REG_H */ diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h new file mode 100644 index 000000000000..aeea923d0597 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ +#ifndef _UAPI_ASM_SIGCONTEXT_H +#define _UAPI_ASM_SIGCONTEXT_H + +#include +#include + +/* FP context was used */ +#define SC_USED_FP (1 << 0) +/* Address error was due to memory load */ +#define SC_ADDRERR_RD (1 << 30) +/* Address error was due to memory store */ +#define SC_ADDRERR_WR (1 << 31) + +struct sigcontext { + __u64 sc_pc; + __u64 sc_regs[32]; + __u32 sc_flags; + __u64 sc_extcontext[0] __attribute__((__aligned__(16))); +}; + +#define CONTEXT_INFO_ALIGN 16 +struct sctx_info { + __u32 magic; + __u32 size; + __u64 padding; /* padding to 16 bytes */ +}; + +/* FPU context */ +#define FPU_CTX_MAGIC 0x46505501 +#define FPU_CTX_ALIGN 8 +struct fpu_context { + __u64 regs[32]; + __u64 fcc; + __u32 fcsr; +}; + +/* LSX context */ +#define LSX_CTX_MAGIC 0x53580001 +#define LSX_CTX_ALIGN 16 +struct lsx_context { + __u64 regs[2*32]; + __u64 fcc; + __u32 fcsr; + __u32 vcsr; +}; + +/* LASX context */ +#define LASX_CTX_MAGIC 0x41535801 +#define LASX_CTX_ALIGN 32 +struct lasx_context { + __u64 regs[4*32]; + __u64 fcc; + __u32 fcsr; + __u32 vcsr; +}; + +#endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/include/uapi/asm/signal.h b/arch/loongarch/include/uapi/asm/signal.h new file mode 100644 index 000000000000..c1333dc49f82 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/signal.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#ifndef _UAPI_ASM_SIGNAL_H +#define _UAPI_ASM_SIGNAL_H + +#define MINSIGSTKSZ 4096 +#define SIGSTKSZ 16384 + +#include + +#endif diff --git a/arch/loongarch/include/uapi/asm/swab.h b/arch/loongarch/include/uapi/asm/swab.h new file mode 100644 index 000000000000..ea16e98e18a6 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/swab.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technologies, Inc. All rights reserved. + * Authors: Jun Yi + */ +#ifndef _ASM_SWAB_H +#define _ASM_SWAB_H + +#include +#include + +#define __SWAB_64_THRU_32__ + +static inline __attribute_const__ __u16 __arch_swab16(__u16 x) +{ + __asm__( + " revb.2h %0, %1 \n" + : "=r" (x) + : "r" (x)); + + return x; +} +#define __arch_swab16 __arch_swab16 + +static inline __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + __asm__( + " revb.2h %0, %1 \n" + " rotri.w %0, %0, 16 \n" + : "=r" (x) + : "r" (x)); + + return x; +} +#define __arch_swab32 __arch_swab32 + +#ifdef __loongarch64 +static inline __attribute_const__ __u64 __arch_swab64(__u64 x) +{ + __asm__( + " revb.4h %0, %1 \n" + " revh.d %0, %0 \n" + : "=r" (x) + : "r" (x)); + + return x; +} +#define __arch_swab64 __arch_swab64 +#endif /* __loongarch64 */ +#endif /* _ASM_SWAB_H */ diff --git a/arch/loongarch/include/uapi/asm/ucontext.h b/arch/loongarch/include/uapi/asm/ucontext.h new file mode 100644 index 000000000000..12577e22b1c7 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/ucontext.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LOONGARCH_UAPI_ASM_UCONTEXT_H +#define __LOONGARCH_UAPI_ASM_UCONTEXT_H + +/** + * struct ucontext - user context structure + * @uc_flags: + * @uc_link: + * @uc_stack: + * @uc_mcontext: holds basic processor state + * @uc_sigmask: + * @uc_extcontext: holds extended processor state + */ +struct ucontext { + unsigned long uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + sigset_t uc_sigmask; + /* There's some padding here to allow sigset_t to be expanded in the + * future. Though this is unlikely, other architectures put uc_sigmask + * at the end of this structure and explicitly state it can be + * expanded, so we didn't want to box ourselves in here. */ + __u8 __unused[1024 / 8 - sizeof(sigset_t)]; + /* We can't put uc_sigmask at the end of this structure because we need + * to be able to expand sigcontext in the future. For example, the + * vector ISA extension will almost certainly add ISA state. We want + * to ensure all user-visible ISA state can be saved and restored via a + * ucontext, so we're putting this at the end in order to allow for + * infinite extensibility. Since we know this will be extended and we + * assume sigset_t won't be extended an extreme amount, we're + * prioritizing this. */ + struct sigcontext uc_mcontext; +}; + +#endif /* __LOONGARCH_UAPI_ASM_UCONTEXT_H */ diff --git a/arch/loongarch/include/uapi/asm/unistd.h b/arch/loongarch/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..b344b1f91715 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/unistd.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile new file mode 100644 index 000000000000..a68623bf57ed --- /dev/null +++ b/arch/loongarch/kernel/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux/LoongArch kernel. +# + +extra-y := head.o vmlinux.lds + +obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ + process.o ptrace.o reset.o setup.o signal.o io.o \ + syscall.o time.o topology.o traps.o unaligned.o \ + switch.o cacheinfo.o cmpxchg.o vdso.o + +obj-$(CONFIG_ACPI) += acpi.o +obj-$(CONFIG_EFI) += efi.o + +obj-$(CONFIG_MODULES) += module.o module-sections.o + +obj-$(CONFIG_CPU_HAS_FPU) += fpu.o + +obj-$(CONFIG_PROC_FS) += proc.o + +CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/access-helper.h b/arch/loongarch/kernel/access-helper.h new file mode 100644 index 000000000000..4a35ca81bd08 --- /dev/null +++ b/arch/loongarch/kernel/access-helper.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +static inline int __get_inst(u32 *i, u32 *p, bool user) +{ + return user ? get_user(*i, (u32 __user *)p) : get_kernel_nofault(*i, p); +} + +static inline int __get_addr(unsigned long *a, unsigned long *p, bool user) +{ + return user ? get_user(*a, (unsigned long __user *)p) : get_kernel_nofault(*a, p); +} diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c new file mode 100644 index 000000000000..4e9c25ae8e54 --- /dev/null +++ b/arch/loongarch/kernel/acpi.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * acpi.c - Architecture-Specific Low-Level ACPI Boot Support + * + * Author: Jianmin Lv + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +int acpi_disabled; +EXPORT_SYMBOL(acpi_disabled); +int acpi_noirq; +int acpi_pci_disabled; +EXPORT_SYMBOL(acpi_pci_disabled); +int acpi_strict = 1; /* We have no workarounds on LoongArch */ +int num_processors; +int disabled_cpus; +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_LPIC; + +u64 acpi_saved_sp; + +#define MAX_CORE_PIC 256 + +#define PREFIX "ACPI: " + +int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) +{ + if (irqp != NULL) + *irqp = acpi_register_gsi(NULL, gsi, -1, -1); + return (*irqp >= 0) ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); + +int acpi_isa_irq_to_gsi(unsigned int isa_irq, u32 *gsi) +{ + if (gsi) + *gsi = isa_irq; + return 0; +} + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) +{ + int id; + struct irq_fwspec fwspec; + + switch (gsi) { + case GSI_MIN_CPU_IRQ ... GSI_MAX_CPU_IRQ: + fwspec.fwnode = liointc_domain->fwnode; + fwspec.param[0] = gsi - GSI_MIN_CPU_IRQ; + fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity); + fwspec.param_count = 2; + + return irq_create_fwspec_mapping(&fwspec); + + case GSI_MIN_LPC_IRQ ... GSI_MAX_LPC_IRQ: + if (!pch_lpc_domain) + return -EINVAL; + + fwspec.fwnode = pch_lpc_domain->fwnode; + fwspec.param[0] = gsi - GSI_MIN_LPC_IRQ; + fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity); + fwspec.param_count = 2; + + return irq_create_fwspec_mapping(&fwspec); + + case GSI_MIN_PCH_IRQ ... GSI_MAX_PCH_IRQ: + id = find_pch_pic(gsi); + if (id < 0) + return -EINVAL; + + fwspec.fwnode = pch_pic_domain[id]->fwnode; + fwspec.param[0] = gsi - acpi_pchpic[id]->gsi_base; + fwspec.param[1] = acpi_dev_get_irq_type(trigger, polarity); + fwspec.param_count = 2; + + return irq_create_fwspec_mapping(&fwspec); + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(acpi_register_gsi); + +void acpi_unregister_gsi(u32 gsi) +{ + +} +EXPORT_SYMBOL_GPL(acpi_unregister_gsi); + +void __init __iomem * __acpi_map_table(unsigned long phys, unsigned long size) +{ + + if (!phys || !size) + return NULL; + + return early_memremap(phys, size); +} +void __init __acpi_unmap_table(void __iomem *map, unsigned long size) +{ + if (!map || !size) + return; + + early_memunmap(map, size); +} + +void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + if (!memblock_is_memory(phys)) + return ioremap(phys, size); + else + return ioremap_cache(phys, size); +} + +void __init acpi_boot_table_init(void) +{ + /* + * If acpi_disabled, bail out + */ + if (acpi_disabled) + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } +} + +static int __init +acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_core_pic *processor = NULL; + + processor = (struct acpi_madt_core_pic *)header; + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(&header->common); + + return 0; +} + +static int __init +acpi_parse_liointc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_lio_pic *liointc = NULL; + + liointc = (struct acpi_madt_lio_pic *)header; + + if (BAD_MADT_ENTRY(liointc, end)) + return -EINVAL; + + acpi_liointc = liointc; + + return 0; +} + +static int __init +acpi_parse_eiointc(union acpi_subtable_headers *header, const unsigned long end) +{ + static int id = 0; + struct acpi_madt_eio_pic *eiointc = NULL; + + eiointc = (struct acpi_madt_eio_pic *)header; + + if (BAD_MADT_ENTRY(eiointc, end)) + return -EINVAL; + + acpi_eiointc[id++] = eiointc; + loongson_sysconf.nr_io_pics = id; + + return 0; +} + +static int __init +acpi_parse_htintc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_ht_pic *htintc = NULL; + + htintc = (struct acpi_madt_ht_pic *)header; + + if (BAD_MADT_ENTRY(htintc, end)) + return -EINVAL; + + acpi_htintc = htintc; + loongson_sysconf.nr_io_pics = 1; + + return 0; +} + +static int __init +acpi_parse_pch_pic(union acpi_subtable_headers *header, const unsigned long end) +{ + static int id = 0; + struct acpi_madt_bio_pic *pchpic = NULL; + + pchpic = (struct acpi_madt_bio_pic *)header; + + if (BAD_MADT_ENTRY(pchpic, end)) + return -EINVAL; + + acpi_pchpic[id++] = pchpic; + + return 0; +} + +static int __init +acpi_parse_pch_msi(union acpi_subtable_headers *header, const unsigned long end) +{ + static int id = 0; + struct acpi_madt_msi_pic *pchmsi = NULL; + + pchmsi = (struct acpi_madt_msi_pic *)header; + + if (BAD_MADT_ENTRY(pchmsi, end)) + return -EINVAL; + + acpi_pchmsi[id++] = pchmsi; + + return 0; +} + +static int __init +acpi_parse_pch_lpc(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_lpc_pic *pchlpc = NULL; + + pchlpc = (struct acpi_madt_lpc_pic *)header; + + if (BAD_MADT_ENTRY(pchlpc, end)) + return -EINVAL; + + acpi_pchlpc = pchlpc; + + return 0; +} + +static void __init acpi_process_madt(void) +{ + int error; + + /* Parse MADT CPUINTC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, acpi_parse_cpuintc, MAX_CORE_PIC); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (CPUINTC entries), ACPI disabled\n"); + return; + } + + loongson_sysconf.nr_cpus = num_processors; + + /* Parse MADT LIOINTC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_LIO_PIC, acpi_parse_liointc, 1); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (LIOINTC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT EIOINTC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, acpi_parse_eiointc, MAX_IO_PICS); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (EIOINTC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT HTVEC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_HT_PIC, acpi_parse_htintc, 1); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (HTVEC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT PCHPIC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_BIO_PIC, acpi_parse_pch_pic, MAX_IO_PICS); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (PCHPIC entries), ACPI disabled\n"); + return; + } + + /* Parse MADT PCHMSI entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, acpi_parse_pch_msi, MAX_IO_PICS); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (PCHMSI entries), ACPI disabled\n"); + return; + } + + /* Parse MADT PCHLPC entries */ + error = acpi_table_parse_madt(ACPI_MADT_TYPE_LPC_PIC, acpi_parse_pch_lpc, 1); + if (error < 0) { + disable_acpi(); + pr_err(PREFIX "Invalid BIOS MADT (PCHLPC entries), ACPI disabled\n"); + return; + } +} + +int __init acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + */ + if (acpi_disabled) + return -1; + + loongson_sysconf.boot_cpu_id = read_csr_cpuid(); + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + acpi_process_madt(); + + /* Do not enable ACPI SPCR console by default */ + acpi_parse_spcr(earlycon_acpi_spcr_enable, false); + + return 0; +} + +void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + memblock_reserve(addr, size); +} diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c new file mode 100644 index 000000000000..b4f7949ca87f --- /dev/null +++ b/arch/loongarch/kernel/asm-offsets.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * asm-offsets.c: Calculate pt_regs and task_struct offsets. + * + * Copyright (C) 2020 Loongson Technologies, Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +void output_ptreg_defines(void) +{ + COMMENT("LoongArch pt_regs offsets."); + OFFSET(PT_R0, pt_regs, regs[0]); + OFFSET(PT_R1, pt_regs, regs[1]); + OFFSET(PT_R2, pt_regs, regs[2]); + OFFSET(PT_R3, pt_regs, regs[3]); + OFFSET(PT_R4, pt_regs, regs[4]); + OFFSET(PT_R5, pt_regs, regs[5]); + OFFSET(PT_R6, pt_regs, regs[6]); + OFFSET(PT_R7, pt_regs, regs[7]); + OFFSET(PT_R8, pt_regs, regs[8]); + OFFSET(PT_R9, pt_regs, regs[9]); + OFFSET(PT_R10, pt_regs, regs[10]); + OFFSET(PT_R11, pt_regs, regs[11]); + OFFSET(PT_R12, pt_regs, regs[12]); + OFFSET(PT_R13, pt_regs, regs[13]); + OFFSET(PT_R14, pt_regs, regs[14]); + OFFSET(PT_R15, pt_regs, regs[15]); + OFFSET(PT_R16, pt_regs, regs[16]); + OFFSET(PT_R17, pt_regs, regs[17]); + OFFSET(PT_R18, pt_regs, regs[18]); + OFFSET(PT_R19, pt_regs, regs[19]); + OFFSET(PT_R20, pt_regs, regs[20]); + OFFSET(PT_R21, pt_regs, regs[21]); + OFFSET(PT_R22, pt_regs, regs[22]); + OFFSET(PT_R23, pt_regs, regs[23]); + OFFSET(PT_R24, pt_regs, regs[24]); + OFFSET(PT_R25, pt_regs, regs[25]); + OFFSET(PT_R26, pt_regs, regs[26]); + OFFSET(PT_R27, pt_regs, regs[27]); + OFFSET(PT_R28, pt_regs, regs[28]); + OFFSET(PT_R29, pt_regs, regs[29]); + OFFSET(PT_R30, pt_regs, regs[30]); + OFFSET(PT_R31, pt_regs, regs[31]); + OFFSET(PT_CRMD, pt_regs, csr_crmd); + OFFSET(PT_PRMD, pt_regs, csr_prmd); + OFFSET(PT_EUEN, pt_regs, csr_euen); + OFFSET(PT_ECFG, pt_regs, csr_ecfg); + OFFSET(PT_ESTAT, pt_regs, csr_estat); + OFFSET(PT_ERA, pt_regs, csr_era); + OFFSET(PT_BVADDR, pt_regs, csr_badvaddr); + OFFSET(PT_ORIG_A0, pt_regs, orig_a0); + DEFINE(PT_SIZE, sizeof(struct pt_regs)); + BLANK(); +} + +void output_task_defines(void) +{ + COMMENT("LoongArch task_struct offsets."); + OFFSET(TASK_STATE, task_struct, state); + OFFSET(TASK_THREAD_INFO, task_struct, stack); + OFFSET(TASK_FLAGS, task_struct, flags); + OFFSET(TASK_MM, task_struct, mm); + OFFSET(TASK_PID, task_struct, pid); + DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); + BLANK(); +} + +void output_thread_info_defines(void) +{ + COMMENT("LoongArch thread_info offsets."); + OFFSET(TI_TASK, thread_info, task); + OFFSET(TI_FLAGS, thread_info, flags); + OFFSET(TI_TP_VALUE, thread_info, tp_value); + OFFSET(TI_CPU, thread_info, cpu); + OFFSET(TI_PRE_COUNT, thread_info, preempt_count); + OFFSET(TI_REGS, thread_info, regs); + DEFINE(_THREAD_SIZE, THREAD_SIZE); + DEFINE(_THREAD_MASK, THREAD_MASK); + DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); + DEFINE(_IRQ_STACK_START, IRQ_STACK_START); + BLANK(); +} + +void output_thread_defines(void) +{ + COMMENT("LoongArch specific thread_struct offsets."); + OFFSET(THREAD_REG01, task_struct, thread.reg01); + OFFSET(THREAD_REG03, task_struct, thread.reg03); + OFFSET(THREAD_REG22, task_struct, thread.reg22); + OFFSET(THREAD_REG23, task_struct, thread.reg23); + OFFSET(THREAD_REG24, task_struct, thread.reg24); + OFFSET(THREAD_REG25, task_struct, thread.reg25); + OFFSET(THREAD_REG26, task_struct, thread.reg26); + OFFSET(THREAD_REG27, task_struct, thread.reg27); + OFFSET(THREAD_REG28, task_struct, thread.reg28); + OFFSET(THREAD_REG29, task_struct, thread.reg29); + OFFSET(THREAD_REG30, task_struct, thread.reg30); + OFFSET(THREAD_REG31, task_struct, thread.reg31); + OFFSET(THREAD_CSRCRMD, task_struct, + thread.csr_crmd); + OFFSET(THREAD_CSRPRMD, task_struct, + thread.csr_prmd); + OFFSET(THREAD_CSREUEN, task_struct, + thread.csr_euen); + OFFSET(THREAD_CSRECFG, task_struct, + thread.csr_ecfg); + + OFFSET(THREAD_SCR0, task_struct, thread.scr0); + OFFSET(THREAD_SCR1, task_struct, thread.scr1); + OFFSET(THREAD_SCR2, task_struct, thread.scr2); + OFFSET(THREAD_SCR3, task_struct, thread.scr3); + + OFFSET(THREAD_EFLAGS, task_struct, thread.eflags); + + OFFSET(THREAD_FPU, task_struct, thread.fpu); + + OFFSET(THREAD_BVADDR, task_struct, \ + thread.csr_badvaddr); + OFFSET(THREAD_ECODE, task_struct, \ + thread.error_code); + OFFSET(THREAD_TRAPNO, task_struct, thread.trap_nr); + BLANK(); +} + +void output_thread_fpu_defines(void) +{ + OFFSET(THREAD_FPR0, loongarch_fpu, fpr[0]); + OFFSET(THREAD_FPR1, loongarch_fpu, fpr[1]); + OFFSET(THREAD_FPR2, loongarch_fpu, fpr[2]); + OFFSET(THREAD_FPR3, loongarch_fpu, fpr[3]); + OFFSET(THREAD_FPR4, loongarch_fpu, fpr[4]); + OFFSET(THREAD_FPR5, loongarch_fpu, fpr[5]); + OFFSET(THREAD_FPR6, loongarch_fpu, fpr[6]); + OFFSET(THREAD_FPR7, loongarch_fpu, fpr[7]); + OFFSET(THREAD_FPR8, loongarch_fpu, fpr[8]); + OFFSET(THREAD_FPR9, loongarch_fpu, fpr[9]); + OFFSET(THREAD_FPR10, loongarch_fpu, fpr[10]); + OFFSET(THREAD_FPR11, loongarch_fpu, fpr[11]); + OFFSET(THREAD_FPR12, loongarch_fpu, fpr[12]); + OFFSET(THREAD_FPR13, loongarch_fpu, fpr[13]); + OFFSET(THREAD_FPR14, loongarch_fpu, fpr[14]); + OFFSET(THREAD_FPR15, loongarch_fpu, fpr[15]); + OFFSET(THREAD_FPR16, loongarch_fpu, fpr[16]); + OFFSET(THREAD_FPR17, loongarch_fpu, fpr[17]); + OFFSET(THREAD_FPR18, loongarch_fpu, fpr[18]); + OFFSET(THREAD_FPR19, loongarch_fpu, fpr[19]); + OFFSET(THREAD_FPR20, loongarch_fpu, fpr[20]); + OFFSET(THREAD_FPR21, loongarch_fpu, fpr[21]); + OFFSET(THREAD_FPR22, loongarch_fpu, fpr[22]); + OFFSET(THREAD_FPR23, loongarch_fpu, fpr[23]); + OFFSET(THREAD_FPR24, loongarch_fpu, fpr[24]); + OFFSET(THREAD_FPR25, loongarch_fpu, fpr[25]); + OFFSET(THREAD_FPR26, loongarch_fpu, fpr[26]); + OFFSET(THREAD_FPR27, loongarch_fpu, fpr[27]); + OFFSET(THREAD_FPR28, loongarch_fpu, fpr[28]); + OFFSET(THREAD_FPR29, loongarch_fpu, fpr[29]); + OFFSET(THREAD_FPR30, loongarch_fpu, fpr[30]); + OFFSET(THREAD_FPR31, loongarch_fpu, fpr[31]); + + OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); + OFFSET(THREAD_FCC, loongarch_fpu, fcc); + OFFSET(THREAD_VCSR, loongarch_fpu, vcsr); + BLANK(); +} + +void output_mm_defines(void) +{ + COMMENT("Size of struct page"); + DEFINE(STRUCT_PAGE_SIZE, sizeof(struct page)); + BLANK(); + COMMENT("Linux mm_struct offsets."); + OFFSET(MM_USERS, mm_struct, mm_users); + OFFSET(MM_PGD, mm_struct, pgd); + OFFSET(MM_CONTEXT, mm_struct, context); + BLANK(); + DEFINE(_PGD_T_SIZE, sizeof(pgd_t)); + DEFINE(_PMD_T_SIZE, sizeof(pmd_t)); + DEFINE(_PTE_T_SIZE, sizeof(pte_t)); + BLANK(); + DEFINE(_PGD_T_LOG2, PGD_T_LOG2); +#ifndef __PAGETABLE_PMD_FOLDED + DEFINE(_PMD_T_LOG2, PMD_T_LOG2); +#endif + DEFINE(_PTE_T_LOG2, PTE_T_LOG2); + BLANK(); + DEFINE(_PGD_ORDER, PGD_ORDER); +#ifndef __PAGETABLE_PMD_FOLDED + DEFINE(_PMD_ORDER, PMD_ORDER); +#endif + DEFINE(_PTE_ORDER, PTE_ORDER); + BLANK(); + DEFINE(_PMD_SHIFT, PMD_SHIFT); + DEFINE(_PGDIR_SHIFT, PGDIR_SHIFT); + BLANK(); + DEFINE(_PTRS_PER_PGD, PTRS_PER_PGD); + DEFINE(_PTRS_PER_PMD, PTRS_PER_PMD); + DEFINE(_PTRS_PER_PTE, PTRS_PER_PTE); + BLANK(); + DEFINE(_PAGE_SHIFT, PAGE_SHIFT); + DEFINE(_PAGE_SIZE, PAGE_SIZE); + BLANK(); +} + +void output_sc_defines(void) +{ + COMMENT("Linux sigcontext offsets."); + OFFSET(SC_REGS, sigcontext, sc_regs); + OFFSET(SC_PC, sigcontext, sc_pc); + BLANK(); +} + +void output_signal_defines(void) +{ + COMMENT("Linux signal numbers."); + DEFINE(_SIGHUP, SIGHUP); + DEFINE(_SIGINT, SIGINT); + DEFINE(_SIGQUIT, SIGQUIT); + DEFINE(_SIGILL, SIGILL); + DEFINE(_SIGTRAP, SIGTRAP); + DEFINE(_SIGIOT, SIGIOT); + DEFINE(_SIGABRT, SIGABRT); + DEFINE(_SIGFPE, SIGFPE); + DEFINE(_SIGKILL, SIGKILL); + DEFINE(_SIGBUS, SIGBUS); + DEFINE(_SIGSEGV, SIGSEGV); + DEFINE(_SIGSYS, SIGSYS); + DEFINE(_SIGPIPE, SIGPIPE); + DEFINE(_SIGALRM, SIGALRM); + DEFINE(_SIGTERM, SIGTERM); + DEFINE(_SIGUSR1, SIGUSR1); + DEFINE(_SIGUSR2, SIGUSR2); + DEFINE(_SIGCHLD, SIGCHLD); + DEFINE(_SIGPWR, SIGPWR); + DEFINE(_SIGWINCH, SIGWINCH); + DEFINE(_SIGURG, SIGURG); + DEFINE(_SIGIO, SIGIO); + DEFINE(_SIGSTOP, SIGSTOP); + DEFINE(_SIGTSTP, SIGTSTP); + DEFINE(_SIGCONT, SIGCONT); + DEFINE(_SIGTTIN, SIGTTIN); + DEFINE(_SIGTTOU, SIGTTOU); + DEFINE(_SIGVTALRM, SIGVTALRM); + DEFINE(_SIGPROF, SIGPROF); + DEFINE(_SIGXCPU, SIGXCPU); + DEFINE(_SIGXFSZ, SIGXFSZ); + BLANK(); +} diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c new file mode 100644 index 000000000000..e4e8489b9dea --- /dev/null +++ b/arch/loongarch/kernel/cacheinfo.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * LoongArch cacheinfo support + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +/* Populates leaf and increments to next leaf */ +#define populate_cache(cache, leaf, c_level, c_type) \ +do { \ + leaf->type = c_type; \ + leaf->level = c_level; \ + leaf->coherency_line_size = c->cache.linesz; \ + leaf->number_of_sets = c->cache.sets; \ + leaf->ways_of_associativity = c->cache.ways; \ + leaf->size = c->cache.linesz * c->cache.sets * \ + c->cache.ways; \ + leaf++; \ +} while (0) + +int init_cache_level(unsigned int cpu) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + int levels = 0, leaves = 0; + + /* + * If Dcache is not set, we assume the cache structures + * are not properly initialized. + */ + if (c->dcache.waysize) + levels += 1; + else + return -ENOENT; + + + leaves += (c->icache.waysize) ? 2 : 1; + + if (c->vcache.waysize) { + levels++; + leaves++; + } + + if (c->scache.waysize) { + levels++; + leaves++; + } + + if (c->tcache.waysize) { + levels++; + leaves++; + } + + this_cpu_ci->num_levels = levels; + this_cpu_ci->num_leaves = leaves; + return 0; +} + +static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, + struct cacheinfo *sib_leaf) +{ + return !((this_leaf->level == 1) || (this_leaf->level == 2)); +} + +static void cache_cpumap_setup(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf, *sib_leaf; + unsigned int index; + + for (index = 0; index < this_cpu_ci->num_leaves; index++) { + unsigned int i; + + this_leaf = this_cpu_ci->info_list + index; + /* skip if shared_cpu_map is already populated */ + if (!cpumask_empty(&this_leaf->shared_cpu_map)) + continue; + + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + for_each_online_cpu(i) { + struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); + + if (i == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ + sib_leaf = sib_cpu_ci->info_list + index; + if (cache_leaves_are_shared(this_leaf, sib_leaf)) { + cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); + cpumask_set_cpu(i, &this_leaf->shared_cpu_map); + } + } + } +} + +int populate_cache_leaves(unsigned int cpu) +{ + int level=1; + struct cpuinfo_loongarch *c = ¤t_cpu_data; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + + if (c->icache.waysize) { + populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); + populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); + } else { + populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + } + + if (c->vcache.waysize) + populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + + if (c->scache.waysize) + populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED); + + if (c->tcache.waysize) + populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + + cache_cpumap_setup(cpu); + this_cpu_ci->cpu_map_populated = true; + + return 0; +} diff --git a/arch/loongarch/kernel/cmpxchg.c b/arch/loongarch/kernel/cmpxchg.c new file mode 100644 index 000000000000..5deb293aa0ec --- /dev/null +++ b/arch/loongarch/kernel/cmpxchg.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 2017 Imagination Technologies + * Author: Paul Burton + */ + +#include +#include + +unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size) +{ + u32 old32, mask, temp; + volatile u32 *ptr32; + unsigned int shift; + + /* Check that ptr is naturally aligned */ + WARN_ON((unsigned long)ptr & (size - 1)); + + /* Mask value to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + val &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * exchange within the naturally aligned 4 byte integerthat includes + * it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " andn %1, %0, %4 \n" + " or %1, %1, %5 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) + : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (mask), "Jr" (val << shift) + : "memory"); + + return (old32 & mask) >> shift; +} + +unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) +{ + u32 old32, mask, temp; + volatile u32 *ptr32; + unsigned int shift; + + /* Check that ptr is naturally aligned */ + WARN_ON((unsigned long)ptr & (size - 1)); + + /* Mask inputs to the correct size. */ + mask = GENMASK((size * BITS_PER_BYTE) - 1, 0); + old &= mask; + new &= mask; + + /* + * Calculate a shift & mask that correspond to the value we wish to + * compare & exchange within the naturally aligned 4 byte integer + * that includes it. + */ + shift = (unsigned long)ptr & 0x3; + shift *= BITS_PER_BYTE; + old <<= shift; + new <<= shift; + mask <<= shift; + + /* + * Calculate a pointer to the naturally aligned 4 byte integer that + * includes our byte of interest, and load its value. + */ + ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3); + + asm volatile ( + "1: ll.w %0, %3 \n" + " and %1, %0, %4 \n" + " bne %1, %5, 2f \n" + " andn %1, %0, %4 \n" + " or %1, %1, %6 \n" + " sc.w %1, %2 \n" + " beqz %1, 1b \n" + "2: \n" + : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) + : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) + : "memory"); + + return (old32 & mask) >> shift; +} diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c new file mode 100644 index 000000000000..ee078ee003ce --- /dev/null +++ b/arch/loongarch/kernel/cpu-probe.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Processor capabilities determination functions. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* Hardware capabilities */ +unsigned int elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +/* + * Determine the FCSR mask for FPU hardware. + */ +static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_loongarch *c) +{ + unsigned long sr, mask, fcsr, fcsr0, fcsr1; + + fcsr = c->fpu_csr0; + mask = FPU_CSR_ALL_X | FPU_CSR_ALL_E | FPU_CSR_ALL_S | FPU_CSR_RM; + + sr = read_csr_euen(); + enable_fpu(); + + fcsr0 = fcsr & mask; + write_fcsr(LOONGARCH_FCSR0, fcsr0); + fcsr0 = read_fcsr(LOONGARCH_FCSR0); + + fcsr1 = fcsr | ~mask; + write_fcsr(LOONGARCH_FCSR0, fcsr1); + fcsr1 = read_fcsr(LOONGARCH_FCSR0); + + write_fcsr(LOONGARCH_FCSR0, fcsr); + + write_csr_euen(sr); + + c->fpu_mask = ~(fcsr0 ^ fcsr1) & ~mask; +} + +static inline void set_elf_platform(int cpu, const char *plat) +{ + if (cpu == 0) + __elf_platform = plat; +} + +/* MAP BASE */ +unsigned long vm_map_base; +EXPORT_SYMBOL_GPL(vm_map_base); + +static void cpu_probe_addrbits(struct cpuinfo_loongarch *c) +{ +#ifdef __NEED_ADDRBITS_PROBE + c->pabits = (read_cpucfg(LOONGARCH_CPUCFG1) & CPUCFG1_PABITS) >> 4; + c->vabits = (read_cpucfg(LOONGARCH_CPUCFG1) & CPUCFG1_VABITS) >> 12; + vm_map_base = 0UL - (1UL << c->vabits); +#endif +} + +static void set_isa(struct cpuinfo_loongarch *c, unsigned int isa) +{ + switch (isa) { + case LOONGARCH_CPU_ISA_LA64: + c->isa_level |= LOONGARCH_CPU_ISA_LA64; + fallthrough; + case LOONGARCH_CPU_ISA_LA32S: + c->isa_level |= LOONGARCH_CPU_ISA_LA32S; + fallthrough; + case LOONGARCH_CPU_ISA_LA32R: + c->isa_level |= LOONGARCH_CPU_ISA_LA32R; + break; + } +} + +static void cpu_probe_common(struct cpuinfo_loongarch *c) +{ + unsigned int config; + unsigned long asid_mask; + + c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | + LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; + + elf_hwcap |= HWCAP_LOONGARCH_CRC32; + + config = read_cpucfg(LOONGARCH_CPUCFG1); + if (config & CPUCFG1_UAL) { + c->options |= LOONGARCH_CPU_UAL; + elf_hwcap |= HWCAP_LOONGARCH_UAL; + } + + config = read_cpucfg(LOONGARCH_CPUCFG2); + if (config & CPUCFG2_LAM) { + c->options |= LOONGARCH_CPU_LAM; + elf_hwcap |= HWCAP_LOONGARCH_LAM; + } + if (config & CPUCFG2_FP) { + c->options |= LOONGARCH_CPU_FPU; + elf_hwcap |= HWCAP_LOONGARCH_FPU; + } + if (config & CPUCFG2_COMPLEX) { + c->options |= LOONGARCH_CPU_COMPLEX; + elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; + } + if (config & CPUCFG2_CRYPTO) { + c->options |= LOONGARCH_CPU_CRYPTO; + elf_hwcap |= HWCAP_LOONGARCH_CRYPTO; + } + if (config & CPUCFG2_LVZP) { + c->options |= LOONGARCH_CPU_LVZ; + elf_hwcap |= HWCAP_LOONGARCH_LVZ; + } + + config = read_cpucfg(LOONGARCH_CPUCFG6); + if (config & CPUCFG6_PMP) + c->options |= LOONGARCH_CPU_PMP; + + config = iocsr_read32(LOONGARCH_IOCSR_FEATURES); + if (config & IOCSRF_CSRIPI) + c->options |= LOONGARCH_CPU_CSRIPI; + if (config & IOCSRF_EXTIOI) + c->options |= LOONGARCH_CPU_EXTIOI; + if (config & IOCSRF_FREQSCALE) + c->options |= LOONGARCH_CPU_SCALEFREQ; + if (config & IOCSRF_FLATMODE) + c->options |= LOONGARCH_CPU_FLATMODE; + if (config & IOCSRF_EIODECODE) + c->options |= LOONGARCH_CPU_EIODECODE; + if (config & IOCSRF_VM) + c->options |= LOONGARCH_CPU_HYPERVISOR; + + config = csr_read32(LOONGARCH_CSR_ASID); + config = (config & CSR_ASID_BIT) >> CSR_ASID_BIT_SHIFT; + asid_mask = GENMASK(config - 1, 0); + set_cpu_asid_mask(c, asid_mask); + + config = read_csr_prcfg1(); + c->ksave_mask = GENMASK((config & CSR_CONF1_KSNUM) - 1, 0); + c->ksave_mask &= ~(EXC_KSAVE_MASK | PERCPU_KSAVE_MASK | KVM_KSAVE_MASK); + + config = read_csr_prcfg3(); + switch (config & CSR_CONF3_TLBTYPE) { + case 0: + c->tlbsizemtlb = 0; + c->tlbsizestlbsets = 0; + c->tlbsizestlbways = 0; + c->tlbsize = 0; + break; + case 1: + c->tlbsizemtlb = ((config & CSR_CONF3_MTLBSIZE) >> CSR_CONF3_MTLBSIZE_SHIFT) + 1; + c->tlbsizestlbsets = 0; + c->tlbsizestlbways = 0; + c->tlbsize = c->tlbsizemtlb + c->tlbsizestlbsets * c->tlbsizestlbways; + break; + case 2: + c->tlbsizemtlb = ((config & CSR_CONF3_MTLBSIZE) >> CSR_CONF3_MTLBSIZE_SHIFT) + 1; + c->tlbsizestlbsets = 1 << ((config & CSR_CONF3_STLBIDX) >> CSR_CONF3_STLBIDX_SHIFT); + c->tlbsizestlbways = ((config & CSR_CONF3_STLBWAYS) >> CSR_CONF3_STLBWAYS_SHIFT) + 1; + c->tlbsize = c->tlbsizemtlb + c->tlbsizestlbsets * c->tlbsizestlbways; + break; + default: + pr_warn("Warning: unknown TLB type\n"); + } +} + +#define MAX_NAME_LEN 32 +#define VENDOR_OFFSET 0 +#define CPUNAME_OFFSET 9 + +static char cpu_full_name[MAX_NAME_LEN] = " - "; + +static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int cpu) +{ + uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); + uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); + + __cpu_full_name[cpu] = cpu_full_name; + *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); + *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); + + switch (c->processor_id & PRID_SERIES_MASK) { + case PRID_SERIES_LA132: + c->cputype = CPU_LOONGSON32; + set_isa(c, LOONGARCH_CPU_ISA_LA32S); + __cpu_family[cpu] = "Loongson-32bit"; + pr_info("32-bit Loongson Processor probed (LA132 Core)\n"); + break; + case PRID_SERIES_LA264: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("64-bit Loongson Processor probed (LA264 Core)\n"); + break; + case PRID_SERIES_LA364: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("64-bit Loongson Processor probed (LA364 Core)\n"); + break; + case PRID_SERIES_LA464: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("64-bit Loongson Processor probed (LA464 Core)\n"); + break; + case PRID_SERIES_LA664: + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("64-bit Loongson Processor probed (LA664 Core)\n"); + break; + default: /* Default to 64 bit */ + c->cputype = CPU_LOONGSON64; + set_isa(c, LOONGARCH_CPU_ISA_LA64); + __cpu_family[cpu] = "Loongson-64bit"; + pr_info("64-bit Loongson Processor probed (Unknown Core)\n"); + } +} + +#ifdef CONFIG_64BIT +/* For use by uaccess.h */ +u64 __ua_limit; +EXPORT_SYMBOL(__ua_limit); +#endif + +const char *__cpu_family[NR_CPUS]; +const char *__cpu_full_name[NR_CPUS]; +const char *__elf_platform; + +static void cpu_report(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + + pr_info("CPU%d revision is: %08x (%s)\n", + smp_processor_id(), c->processor_id, cpu_family_string()); + if (c->options & LOONGARCH_CPU_FPU) + pr_info("FPU%d revision is: %08x\n", smp_processor_id(), c->fpu_vers); +} + +void cpu_probe(void) +{ + unsigned int cpu = smp_processor_id(); + struct cpuinfo_loongarch *c = ¤t_cpu_data; + + /* + * Set a default ELF platform, cpu probe may later + * overwrite it with a more precise value + */ + set_elf_platform(cpu, "loongarch"); + + c->cputype = CPU_UNKNOWN; + c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); + c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; + + c->fpu_csr0 = FPU_CSR_RN; + c->fpu_mask = FPU_CSR_RSVD; + + cpu_probe_common(c); + + per_cpu_trap_init(cpu); + + switch (c->processor_id & PRID_COMP_MASK) { + case PRID_COMP_LOONGSON: + cpu_probe_loongson(c, cpu); + break; + } + + BUG_ON(!__cpu_family[cpu]); + BUG_ON(c->cputype == CPU_UNKNOWN); + + cpu_probe_addrbits(c); + +#ifdef CONFIG_64BIT + if (cpu == 0) + __ua_limit = ~((1ull << cpu_vabits) - 1); +#endif + + cpu_report(); +} diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c new file mode 100644 index 000000000000..7bcd8fc57956 --- /dev/null +++ b/arch/loongarch/kernel/efi.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * EFI partition + * + * Just for ACPI here, complete it when implementing EFI runtime. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * Jianmin Lv: + * Huacai Chen: + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static unsigned long efi_nr_tables; +static unsigned long efi_config_table; +static unsigned long screen_info_table __initdata = EFI_INVALID_TABLE_ADDR; + +static efi_system_table_t *efi_systab; +static efi_config_table_type_t arch_tables[] __initdata = { + {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table, "SINFO"}, + {}, +}; + +static void __init init_screen_info(void) +{ + struct screen_info *si; + + if (screen_info_table == EFI_INVALID_TABLE_ADDR) + return; + + si = early_memremap_ro(screen_info_table, sizeof(*si)); + if (!si) { + pr_err("Could not map screen_info config table\n"); + return; + } + screen_info = *si; + memset(si, 0, sizeof(*si)); + early_memunmap(si, sizeof(*si)); + + if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI) + memblock_reserve(screen_info.lfb_base, screen_info.lfb_size); +} + +void __init efi_runtime_init(void) +{ + if (!efi_enabled(EFI_BOOT) || !efi_systab->runtime) + return; + + if (efi_runtime_disabled()) { + pr_info("EFI runtime services will be disabled.\n"); + return; + } + + efi.runtime = (efi_runtime_services_t *)efi_systab->runtime; + efi.runtime_version = (unsigned int)efi.runtime->hdr.revision; + + efi_native_runtime_setup(); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); +} + +void __init efi_init(void) +{ + int size; + void *config_tables; + + if (!efi_system_table) + return; + + efi_systab = (efi_system_table_t *)early_memremap_ro(efi_system_table, sizeof(*efi_systab)); + if (!efi_systab) { + pr_err("Can't find EFI system table.\n"); + return; + } + + set_bit(EFI_64BIT, &efi.flags); + efi_nr_tables = efi_systab->nr_tables; + efi_config_table = (unsigned long)efi_systab->tables; + + size = sizeof(efi_config_table_t); + config_tables = early_memremap(efi_config_table, efi_nr_tables * size); + efi_config_parse_tables(config_tables, efi_systab->nr_tables, arch_tables); + early_memunmap(config_tables, efi_nr_tables * size); + + init_screen_info(); +} diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c new file mode 100644 index 000000000000..900e8d377b7a --- /dev/null +++ b/arch/loongarch/kernel/elf.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include + +#include +#include + +int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, + bool is_interp, struct arch_elf_state *state) +{ + return 0; +} + +int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr, + struct arch_elf_state *state) +{ + return 0; +} + +void loongarch_set_personality_fcsr(struct arch_elf_state *state) +{ + current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; +} diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S new file mode 100644 index 000000000000..1803840381d3 --- /dev/null +++ b/arch/loongarch/kernel/entry.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include + + .text + .cfi_sections .debug_frame + .align 5 +SYM_FUNC_START(handle_syscall) + csrrd t0, PERCPU_BASE_KS + la.abs t1, kernelsp + add.d t1, t1, t0 + move t2, sp + ld.d sp, t1, 0 + + addi.d sp, sp, -PT_SIZE + cfi_st t2, PT_R3 + cfi_rel_offset sp, PT_R3 + st.d zero, sp, PT_R0 + csrrd t2, LOONGARCH_CSR_PRMD + st.d t2, sp, PT_PRMD + csrrd t2, LOONGARCH_CSR_CRMD + st.d t2, sp, PT_CRMD + csrrd t2, LOONGARCH_CSR_EUEN + st.d t2, sp, PT_EUEN + csrrd t2, LOONGARCH_CSR_ECFG + st.d t2, sp, PT_ECFG + csrrd t2, LOONGARCH_CSR_ESTAT + st.d t2, sp, PT_ESTAT + cfi_st ra, PT_R1 + cfi_st a0, PT_R4 + cfi_st a1, PT_R5 + cfi_st a2, PT_R6 + cfi_st a3, PT_R7 + cfi_st a4, PT_R8 + cfi_st a5, PT_R9 + cfi_st a6, PT_R10 + cfi_st a7, PT_R11 + csrrd ra, LOONGARCH_CSR_ERA + st.d ra, sp, PT_ERA + cfi_rel_offset ra, PT_ERA + + cfi_st tp, PT_R2 + cfi_st u0, PT_R21 + cfi_st fp, PT_R22 + + SAVE_STATIC + + move u0, t0 + li.d tp, ~_THREAD_MASK + and tp, tp, sp + + move a0, sp + bl do_syscall + + RESTORE_ALL_AND_RET +SYM_FUNC_END(handle_syscall) + +SYM_CODE_START(ret_from_fork) + bl schedule_tail # a0 = struct task_struct *prev + move a0, sp + bl syscall_exit_to_user_mode + RESTORE_STATIC + RESTORE_SOME + RESTORE_SP_AND_RET +SYM_CODE_END(ret_from_fork) + +SYM_CODE_START(ret_from_kernel_thread) + bl schedule_tail # a0 = struct task_struct *prev + move a0, s1 + jirl ra, s0, 0 + move a0, sp + bl syscall_exit_to_user_mode + RESTORE_STATIC + RESTORE_SOME + RESTORE_SP_AND_RET +SYM_CODE_END(ret_from_kernel_thread) diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S new file mode 100644 index 000000000000..1ecf418bcefc --- /dev/null +++ b/arch/loongarch/kernel/fpu.S @@ -0,0 +1,264 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2019 Pei Huang + * Copyright (C) 2019 Lu Zeng + * Copyright (C) 2020 Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define FPU_REG_WIDTH 8 +#define LSX_REG_WIDTH 16 +#define LASX_REG_WIDTH 32 + + .macro EX insn, reg, src, offs +.ex\@: \insn \reg, \src, \offs + .section __ex_table,"a" + PTR .ex\@, fault + .previous + .endm + + .macro sc_save_fp base + EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) + EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) + EX fst.d $f2, \base, (2 * FPU_REG_WIDTH) + EX fst.d $f3, \base, (3 * FPU_REG_WIDTH) + EX fst.d $f4, \base, (4 * FPU_REG_WIDTH) + EX fst.d $f5, \base, (5 * FPU_REG_WIDTH) + EX fst.d $f6, \base, (6 * FPU_REG_WIDTH) + EX fst.d $f7, \base, (7 * FPU_REG_WIDTH) + EX fst.d $f8, \base, (8 * FPU_REG_WIDTH) + EX fst.d $f9, \base, (9 * FPU_REG_WIDTH) + EX fst.d $f10, \base, (10 * FPU_REG_WIDTH) + EX fst.d $f11, \base, (11 * FPU_REG_WIDTH) + EX fst.d $f12, \base, (12 * FPU_REG_WIDTH) + EX fst.d $f13, \base, (13 * FPU_REG_WIDTH) + EX fst.d $f14, \base, (14 * FPU_REG_WIDTH) + EX fst.d $f15, \base, (15 * FPU_REG_WIDTH) + EX fst.d $f16, \base, (16 * FPU_REG_WIDTH) + EX fst.d $f17, \base, (17 * FPU_REG_WIDTH) + EX fst.d $f18, \base, (18 * FPU_REG_WIDTH) + EX fst.d $f19, \base, (19 * FPU_REG_WIDTH) + EX fst.d $f20, \base, (20 * FPU_REG_WIDTH) + EX fst.d $f21, \base, (21 * FPU_REG_WIDTH) + EX fst.d $f22, \base, (22 * FPU_REG_WIDTH) + EX fst.d $f23, \base, (23 * FPU_REG_WIDTH) + EX fst.d $f24, \base, (24 * FPU_REG_WIDTH) + EX fst.d $f25, \base, (25 * FPU_REG_WIDTH) + EX fst.d $f26, \base, (26 * FPU_REG_WIDTH) + EX fst.d $f27, \base, (27 * FPU_REG_WIDTH) + EX fst.d $f28, \base, (28 * FPU_REG_WIDTH) + EX fst.d $f29, \base, (29 * FPU_REG_WIDTH) + EX fst.d $f30, \base, (30 * FPU_REG_WIDTH) + EX fst.d $f31, \base, (31 * FPU_REG_WIDTH) + .endm + + .macro sc_restore_fp base + EX fld.d $f0, \base, (0 * FPU_REG_WIDTH) + EX fld.d $f1, \base, (1 * FPU_REG_WIDTH) + EX fld.d $f2, \base, (2 * FPU_REG_WIDTH) + EX fld.d $f3, \base, (3 * FPU_REG_WIDTH) + EX fld.d $f4, \base, (4 * FPU_REG_WIDTH) + EX fld.d $f5, \base, (5 * FPU_REG_WIDTH) + EX fld.d $f6, \base, (6 * FPU_REG_WIDTH) + EX fld.d $f7, \base, (7 * FPU_REG_WIDTH) + EX fld.d $f8, \base, (8 * FPU_REG_WIDTH) + EX fld.d $f9, \base, (9 * FPU_REG_WIDTH) + EX fld.d $f10, \base, (10 * FPU_REG_WIDTH) + EX fld.d $f11, \base, (11 * FPU_REG_WIDTH) + EX fld.d $f12, \base, (12 * FPU_REG_WIDTH) + EX fld.d $f13, \base, (13 * FPU_REG_WIDTH) + EX fld.d $f14, \base, (14 * FPU_REG_WIDTH) + EX fld.d $f15, \base, (15 * FPU_REG_WIDTH) + EX fld.d $f16, \base, (16 * FPU_REG_WIDTH) + EX fld.d $f17, \base, (17 * FPU_REG_WIDTH) + EX fld.d $f18, \base, (18 * FPU_REG_WIDTH) + EX fld.d $f19, \base, (19 * FPU_REG_WIDTH) + EX fld.d $f20, \base, (20 * FPU_REG_WIDTH) + EX fld.d $f21, \base, (21 * FPU_REG_WIDTH) + EX fld.d $f22, \base, (22 * FPU_REG_WIDTH) + EX fld.d $f23, \base, (23 * FPU_REG_WIDTH) + EX fld.d $f24, \base, (24 * FPU_REG_WIDTH) + EX fld.d $f25, \base, (25 * FPU_REG_WIDTH) + EX fld.d $f26, \base, (26 * FPU_REG_WIDTH) + EX fld.d $f27, \base, (27 * FPU_REG_WIDTH) + EX fld.d $f28, \base, (28 * FPU_REG_WIDTH) + EX fld.d $f29, \base, (29 * FPU_REG_WIDTH) + EX fld.d $f30, \base, (30 * FPU_REG_WIDTH) + EX fld.d $f31, \base, (31 * FPU_REG_WIDTH) + .endm + + .macro sc_save_fcc base, tmp0, tmp1 + movcf2gr \tmp0, $fcc0 + move \tmp1, \tmp0 + movcf2gr \tmp0, $fcc1 + bstrins.d \tmp1, \tmp0, 15, 8 + movcf2gr \tmp0, $fcc2 + bstrins.d \tmp1, \tmp0, 23, 16 + movcf2gr \tmp0, $fcc3 + bstrins.d \tmp1, \tmp0, 31, 24 + movcf2gr \tmp0, $fcc4 + bstrins.d \tmp1, \tmp0, 39, 32 + movcf2gr \tmp0, $fcc5 + bstrins.d \tmp1, \tmp0, 47, 40 + movcf2gr \tmp0, $fcc6 + bstrins.d \tmp1, \tmp0, 55, 48 + movcf2gr \tmp0, $fcc7 + bstrins.d \tmp1, \tmp0, 63, 56 + EX st.d \tmp1, \base, 0 + .endm + + .macro sc_restore_fcc base, tmp0, tmp1 + EX ld.d \tmp0, \base, 0 + bstrpick.d \tmp1, \tmp0, 7, 0 + movgr2cf $fcc0, \tmp1 + bstrpick.d \tmp1, \tmp0, 15, 8 + movgr2cf $fcc1, \tmp1 + bstrpick.d \tmp1, \tmp0, 23, 16 + movgr2cf $fcc2, \tmp1 + bstrpick.d \tmp1, \tmp0, 31, 24 + movgr2cf $fcc3, \tmp1 + bstrpick.d \tmp1, \tmp0, 39, 32 + movgr2cf $fcc4, \tmp1 + bstrpick.d \tmp1, \tmp0, 47, 40 + movgr2cf $fcc5, \tmp1 + bstrpick.d \tmp1, \tmp0, 55, 48 + movgr2cf $fcc6, \tmp1 + bstrpick.d \tmp1, \tmp0, 63, 56 + movgr2cf $fcc7, \tmp1 + .endm + + .macro sc_save_fcsr base, tmp0 + movfcsr2gr \tmp0, fcsr0 + EX st.w \tmp0, \base, 0 + .endm + + .macro sc_restore_fcsr base, tmp0 + EX ld.w \tmp0, \base, 0 + movgr2fcsr fcsr0, \tmp0 + .endm + + .macro sc_save_vcsr base, tmp0 + movfcsr2gr \tmp0, vcsr16 + EX st.w \tmp0, \base, 0 + .endm + + .macro sc_restore_vcsr base, tmp0 + EX ld.w \tmp0, \base, 0 + movgr2fcsr vcsr16, \tmp0 + .endm + +/* + * Save a thread's fp context. + */ +SYM_FUNC_START(_save_fp) + fpu_save_csr a0 t1 + fpu_save_double a0 t1 # clobbers t1 + fpu_save_cc a0 t1 t2 # clobbers t1, t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_fp) +EXPORT_SYMBOL(_save_fp) + +/* + * Restore a thread's fp context. + */ +SYM_FUNC_START(_restore_fp) + fpu_restore_double a0 t1 # clobbers t1 + fpu_restore_csr a0 t1 + fpu_restore_cc a0 t1 t2 # clobbers t1, t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_fp) + +/* + * Load the FPU with signalling NANS. This bit pattern we're using has + * the property that no matter whether considered as single or as double + * precision represents signaling NANS. + * + * The value to initialize fcsr0 to comes in $a0. + */ + +SYM_FUNC_START(_init_fpu) + li.w t1, CSR_EUEN_FPEN + csrxchg t1, t1, LOONGARCH_CSR_EUEN + + movgr2fcsr fcsr0, a0 + + li.w t1, -1 # SNaN + + movgr2fr.d $f0, t1 + movgr2fr.d $f1, t1 + movgr2fr.d $f2, t1 + movgr2fr.d $f3, t1 + movgr2fr.d $f4, t1 + movgr2fr.d $f5, t1 + movgr2fr.d $f6, t1 + movgr2fr.d $f7, t1 + movgr2fr.d $f8, t1 + movgr2fr.d $f9, t1 + movgr2fr.d $f10, t1 + movgr2fr.d $f11, t1 + movgr2fr.d $f12, t1 + movgr2fr.d $f13, t1 + movgr2fr.d $f14, t1 + movgr2fr.d $f15, t1 + movgr2fr.d $f16, t1 + movgr2fr.d $f17, t1 + movgr2fr.d $f18, t1 + movgr2fr.d $f19, t1 + movgr2fr.d $f20, t1 + movgr2fr.d $f21, t1 + movgr2fr.d $f22, t1 + movgr2fr.d $f23, t1 + movgr2fr.d $f24, t1 + movgr2fr.d $f25, t1 + movgr2fr.d $f26, t1 + movgr2fr.d $f27, t1 + movgr2fr.d $f28, t1 + movgr2fr.d $f29, t1 + movgr2fr.d $f30, t1 + movgr2fr.d $f31, t1 + + jirl zero, ra, 0 +SYM_FUNC_END(_init_fpu) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_fp_context) + sc_save_fcc a1 t1 t2 + sc_save_fcsr a2 t1 + sc_save_fp a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_fp_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_fp_context) + sc_restore_fp a0 + sc_restore_fcc a1 t1 t2 + sc_restore_fcsr a2 t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_fp_context) + +SYM_FUNC_START(fault) + li.w a0, -EFAULT # failure + jirl zero, ra, 0 +SYM_FUNC_END(fault) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S new file mode 100644 index 000000000000..03a058c48a5f --- /dev/null +++ b/arch/loongarch/kernel/genex.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include + + .align 5 +SYM_FUNC_START(__arch_cpu_idle) + /* start of rollback region */ + LONG_L t0, tp, TI_FLAGS + nop + andi t0, t0, _TIF_NEED_RESCHED + bnez t0, 1f + nop + nop + nop + idle 0 + /* end of rollback region */ +1: jirl zero, ra, 0 +SYM_FUNC_END(__arch_cpu_idle) + +SYM_FUNC_START(handle_vint) + BACKUP_T0T1 + SAVE_ALL + la.abs t1, __arch_cpu_idle + LONG_L t0, sp, PT_ERA + /* 32 byte rollback region */ + ori t0, t0, 0x1f + xori t0, t0, 0x1f + bne t0, t1, 1f + LONG_S t0, sp, PT_ERA +1: move a0, sp + move a1, sp + la.abs t0, do_vint + jirl ra, t0, 0 + RESTORE_ALL_AND_RET +SYM_FUNC_END(handle_vint) + +SYM_FUNC_START(except_vec_cex) + b cache_parity_error +SYM_FUNC_END(except_vec_cex) + + .macro build_prep_badv + csrrd t0, LOONGARCH_CSR_BADV + PTR_S t0, sp, PT_BVADDR + .endm + + .macro build_prep_fcsr + movfcsr2gr a1, fcsr0 + .endm + + .macro build_prep_none + .endm + + .macro BUILD_HANDLER exception handler prep + .align 5 + SYM_FUNC_START(handle_\exception) + BACKUP_T0T1 + SAVE_ALL + build_prep_\prep + move a0, sp + la.abs t0, do_\handler + jirl ra, t0, 0 + RESTORE_ALL_AND_RET + SYM_FUNC_END(handle_\exception) + .endm + + BUILD_HANDLER ade ade badv + BUILD_HANDLER ale ale badv + BUILD_HANDLER bp bp none + BUILD_HANDLER fpe fpe fcsr + BUILD_HANDLER fpu fpu none + BUILD_HANDLER lsx lsx none + BUILD_HANDLER lasx lasx none + BUILD_HANDLER lbt lbt none + BUILD_HANDLER ri ri none + BUILD_HANDLER watch watch none + BUILD_HANDLER reserved reserved none /* others */ + +SYM_FUNC_START(handle_sys) + la.abs t0, handle_syscall + jirl zero, t0, 0 +SYM_FUNC_END(handle_sys) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S new file mode 100644 index 000000000000..d3cafd1a8d90 --- /dev/null +++ b/arch/loongarch/kernel/head.S @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include + +#include +#include +#include +#include +#include +#include + +SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE) + + __REF + +SYM_CODE_START(kernel_entry) # kernel entry point + + /* Config direct window and set PG */ + li.d t0, CSR_DMW0_INIT # UC, PLV0, 0x8000 xxxx xxxx xxxx + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0, 0x9000 xxxx xxxx xxxx + csrwr t0, LOONGARCH_CSR_DMWIN1 + /* Enable PG */ + li.w t0, 0xb0 # PLV=0, IE=0, PG=1 + csrwr t0, LOONGARCH_CSR_CRMD + li.w t0, 0x04 # PLV=0, PIE=1, PWE=0 + csrwr t0, LOONGARCH_CSR_PRMD + li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 + csrwr t0, LOONGARCH_CSR_EUEN + + /* We might not get launched at the address the kernel is linked to, + so we jump there. */ + la.abs t0, 0f + jirl zero, t0, 0 +0: + la t0, __bss_start # clear .bss + st.d zero, t0, 0 + la t1, __bss_stop - LONGSIZE +1: + addi.d t0, t0, LONGSIZE + st.d zero, t0, 0 + bne t0, t1, 1b + + la t0, fw_arg0 + st.d a0, t0, 0 # firmware arguments + la t0, fw_arg1 + st.d a1, t0, 0 + la t0, fw_arg2 + st.d a2, t0, 0 + + /* KSave3 used for percpu base, initialized as 0 */ + csrwr zero, PERCPU_BASE_KS + /* GPR21 used for percpu base (runtime), initialized as 0 */ + or u0, zero, zero + + la tp, init_thread_union + /* Set the SP after an empty pt_regs. */ + PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) + PTR_ADD sp, sp, tp + set_saved_sp sp, t0, t1 + PTR_ADDI sp, sp, -4 * SZREG # init stack pointer + + bl start_kernel + +SYM_CODE_END(kernel_entry) + +SYM_ENTRY(kernel_entry_end, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/loongarch/kernel/idle.c b/arch/loongarch/kernel/idle.c new file mode 100644 index 000000000000..27030472ce84 --- /dev/null +++ b/arch/loongarch/kernel/idle.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch idle loop support. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +void __cpuidle arch_cpu_idle(void) +{ + raw_local_irq_enable(); + __arch_cpu_idle(); /* idle instruction needs irq enabled */ +} diff --git a/arch/loongarch/kernel/io.c b/arch/loongarch/kernel/io.c new file mode 100644 index 000000000000..07c5fb99cbb4 --- /dev/null +++ b/arch/loongarch/kernel/io.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include + +/* + * Copy data from IO memory space to "real" memory space. + */ +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)from, 8)) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } + + while (count >= 8) { + *(u64 *)to = __raw_readq(from); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + *(u8 *)to = __raw_readb(from); + from++; + to++; + count--; + } +} +EXPORT_SYMBOL(__memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + */ +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +{ + while (count && !IS_ALIGNED((unsigned long)to, 8)) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } + + while (count >= 8) { + __raw_writeq(*(u64 *)from, to); + from += 8; + to += 8; + count -= 8; + } + + while (count) { + __raw_writeb(*(u8 *)from, to); + from++; + to++; + count--; + } +} +EXPORT_SYMBOL(__memcpy_toio); + +/* + * "memset" on IO memory space. + */ +void __memset_io(volatile void __iomem *dst, int c, size_t count) +{ + u64 qc = (u8)c; + + qc |= qc << 8; + qc |= qc << 16; + qc |= qc << 32; + + while (count && !IS_ALIGNED((unsigned long)dst, 8)) { + __raw_writeb(c, dst); + dst++; + count--; + } + + while (count >= 8) { + __raw_writeq(qc, dst); + dst += 8; + count -= 8; + } + + while (count) { + __raw_writeb(c, dst); + dst++; + count--; + } +} +EXPORT_SYMBOL(__memset_io); diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c new file mode 100644 index 000000000000..b56f43582283 --- /dev/null +++ b/arch/loongarch/kernel/irq.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +DEFINE_PER_CPU(unsigned long, irq_stack); + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk("unexpected IRQ # %d\n", irq); +} + +atomic_t irq_err_count; + +int arch_show_interrupts(struct seq_file *p, int prec) +{ + seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); + return 0; +} + +asmlinkage void spurious_interrupt(void) +{ + atomic_inc(&irq_err_count); +} + +void __init init_IRQ(void) +{ + int i; + unsigned int order = get_order(IRQ_STACK_SIZE); + struct page *page; + + for (i = 0; i < NR_IRQS; i++) + irq_set_noprobe(i); + + arch_init_irq(); + + for_each_possible_cpu(i) { + page = alloc_pages_node(cpu_to_node(i), GFP_KERNEL, order); + + per_cpu(irq_stack, i) = (unsigned long)page_address(page); + pr_debug("CPU%d IRQ stack at 0x%lx - 0x%lx\n", i, + per_cpu(irq_stack, i), per_cpu(irq_stack, i) + IRQ_STACK_SIZE); + } +} diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c new file mode 100644 index 000000000000..6d498288977d --- /dev/null +++ b/arch/loongarch/kernel/module-sections.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val) +{ + int nr; + struct mod_section *plt_sec = &mod->arch.plt; + struct mod_section *plt_idx_sec = &mod->arch.plt_idx; + struct plt_entry *plt = get_plt_entry(val, plt_sec, plt_idx_sec); + struct plt_idx_entry *plt_idx; + + if (plt) + return (Elf_Addr)plt; + + nr = plt_sec->num_entries; + + /* There is no duplicate entry, create a new one */ + plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt[nr] = emit_plt_entry(val); + plt_idx = (struct plt_idx_entry *)plt_idx_sec->shdr->sh_addr; + plt_idx[nr] = emit_plt_idx_entry(val); + + plt_sec->num_entries++; + plt_idx_sec->num_entries++; + BUG_ON(plt_sec->num_entries > plt_sec->max_entries); + + return (Elf_Addr)&plt[nr]; +} + +static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +{ + return x->r_info == y->r_info && x->r_addend == y->r_addend; +} + +static bool duplicate_rela(const Elf_Rela *rela, int idx) +{ + int i; + + for (i = 0; i < idx; i++) { + if (is_rela_equal(&rela[i], &rela[idx])) + return true; + } + + return false; +} + +static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) +{ + unsigned int i, type; + + for (i = 0; i < num; i++) { + type = ELF_R_TYPE(relas[i].r_info); + if (type == R_LARCH_SOP_PUSH_PLT_PCREL) { + if (!duplicate_rela(relas, i)) + (*plts)++; + } + } +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned int i, num_plts = 0; + + /* + * Find the empty .plt sections. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.plt.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) + mod->arch.plt_idx.shdr = sechdrs + i; + } + + if (!mod->arch.plt.shdr) { + pr_err("%s: module PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } + if (!mod->arch.plt_idx.shdr) { + pr_err("%s: module PLT.IDX section(s) missing\n", mod->name); + return -ENOEXEC; + } + + /* Calculate the maxinum number of entries */ + for (i = 0; i < ehdr->e_shnum; i++) { + int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela); + Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; + Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dst_sec->sh_flags & SHF_EXECINSTR)) + continue; + + count_max_entries(relas, num_rela, &num_plts); + } + + mod->arch.plt.shdr->sh_type = SHT_NOBITS; + mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + mod->arch.plt.num_entries = 0; + mod->arch.plt.max_entries = num_plts; + + mod->arch.plt_idx.shdr->sh_type = SHT_NOBITS; + mod->arch.plt_idx.shdr->sh_flags = SHF_ALLOC; + mod->arch.plt_idx.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt_idx.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); + mod->arch.plt_idx.num_entries = 0; + mod->arch.plt_idx.max_entries = num_plts; + + return 0; +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c new file mode 100644 index 000000000000..bbe7c763fbfb --- /dev/null +++ b/arch/loongarch/kernel/module.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Author: Huacai Chen +*/ + +#define pr_fmt(fmt) "kmod: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline bool signed_imm_check(long val, unsigned int bit) +{ + return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1)); +} + +static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) +{ + return val < (1UL << bit); +} + +static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top) +{ + if (RELA_STACK_DEPTH <= *rela_stack_top) + return -ENOEXEC; + + rela_stack[(*rela_stack_top)++] = stack_value; + pr_debug("%s stack_value = 0x%llx\n", __func__, stack_value); + + return 0; +} + +static int rela_stack_pop(s64 *stack_value, s64 *rela_stack, size_t *rela_stack_top) +{ + if (*rela_stack_top == 0) + return -ENOEXEC; + + *stack_value = rela_stack[--(*rela_stack_top)]; + pr_debug("%s stack_value = 0x%llx\n", __func__, *stack_value); + + return 0; +} + +static int apply_r_larch_none(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + return 0; +} + +static int apply_r_larch_error(struct module *me, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + pr_err("%s: Unsupport relocation type %u, please add its support.\n", me->name, type); + return -EINVAL; +} + +static int apply_r_larch_32(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + *location = v; + return 0; +} + +static int apply_r_larch_64(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + *(Elf_Addr *)location = v; + return 0; +} + +static int apply_r_larch_sop_push_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + return rela_stack_push(v - (u64)location, rela_stack, rela_stack_top); +} + +static int apply_r_larch_sop_push_absolute(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + return rela_stack_push(v, rela_stack, rela_stack_top); +} + +static int apply_r_larch_sop_push_dup(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + int err = 0; + s64 opr1; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_push(opr1, rela_stack, rela_stack_top); + if (err) + return err; + + return 0; +} + +static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + + if (offset >= SZ_128M) + v = module_emit_plt_entry(mod, v); + + if (offset < -SZ_128M) + v = module_emit_plt_entry(mod, v); + + return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top, type); +} + +static int apply_r_larch_sop(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + int err = 0; + s64 opr1, opr2, opr3; + + if (type == R_LARCH_SOP_IF_ELSE) { + err = rela_stack_pop(&opr3, rela_stack, rela_stack_top); + if (err) + return err; + } + + err = rela_stack_pop(&opr2, rela_stack, rela_stack_top); + if (err) + return err; + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + switch (type) { + case R_LARCH_SOP_AND: + err = rela_stack_push(opr1 & opr2, rela_stack, rela_stack_top); + break; + case R_LARCH_SOP_ADD: + err = rela_stack_push(opr1 + opr2, rela_stack, rela_stack_top); + break; + case R_LARCH_SOP_SUB: + err = rela_stack_push(opr1 - opr2, rela_stack, rela_stack_top); + break; + case R_LARCH_SOP_SL: + err = rela_stack_push(opr1 << opr2, rela_stack, rela_stack_top); + break; + case R_LARCH_SOP_SR: + err = rela_stack_push(opr1 >> opr2, rela_stack, rela_stack_top); + break; + case R_LARCH_SOP_IF_ELSE: + err = rela_stack_push(opr1 ? opr2 : opr3, rela_stack, rela_stack_top); + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return err; +} + +static int apply_r_larch_sop_imm_field(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + int err = 0; + s64 opr1; + union loongarch_instruction *insn = (union loongarch_instruction *)location; + + err = rela_stack_pop(&opr1, rela_stack, rela_stack_top); + if (err) + return err; + + switch (type) { + case R_LARCH_SOP_POP_32_U_10_12: + if (!unsigned_imm_check(opr1, 12)) + goto overflow; + + /* (*(uint32_t *) PC) [21 ... 10] = opr [11 ... 0] */ + insn->reg2ui12_format.simmediate = opr1 & 0xfff; + return 0; + case R_LARCH_SOP_POP_32_S_10_12: + if (!signed_imm_check(opr1, 12)) + goto overflow; + + insn->reg2i12_format.simmediate = opr1 & 0xfff; + return 0; + case R_LARCH_SOP_POP_32_S_10_16: + if (!signed_imm_check(opr1, 16)) + goto overflow; + + insn->reg2i16_format.simmediate = opr1 & 0xffff; + return 0; + case R_LARCH_SOP_POP_32_S_10_16_S2: + if (opr1 % 4) + goto unaligned; + + if (!signed_imm_check(opr1, 18)) + goto overflow; + + insn->reg2i16_format.simmediate = (opr1 >> 2) & 0xffff; + return 0; + case R_LARCH_SOP_POP_32_S_5_20: + if (!signed_imm_check(opr1, 20)) + goto overflow; + + insn->reg1i20_format.simmediate = (opr1) & 0xfffff; + return 0; + case R_LARCH_SOP_POP_32_S_0_5_10_16_S2: + if (opr1 % 4) + goto unaligned; + + if (!signed_imm_check(opr1, 23)) + goto overflow; + + opr1 >>= 2; + insn->reg1i21_format.simmediate_l = opr1 & 0xffff; + insn->reg1i21_format.simmediate_h = (opr1 >> 16) & 0x1f; + return 0; + case R_LARCH_SOP_POP_32_S_0_10_10_16_S2: + if (opr1 % 4) + goto unaligned; + + if (!signed_imm_check(opr1, 28)) + goto overflow; + + opr1 >>= 2; + insn->reg0i26_format.simmediate_l = opr1 & 0xffff; + insn->reg0i26_format.simmediate_h = (opr1 >> 16) & 0x3ff; + return 0; + case R_LARCH_SOP_POP_32_U: + if (!unsigned_imm_check(opr1, 32)) + goto overflow; + + /* (*(uint32_t *) PC) = opr */ + *location = (u32)opr1; + return 0; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + +overflow: + pr_err("module %s: opr1 = 0x%llx overflow! dangerous %s (%u) relocation\n", + mod->name, opr1, __func__, type); + return -ENOEXEC; + +unaligned: + pr_err("module %s: opr1 = 0x%llx unaligned! dangerous %s (%u) relocation\n", + mod->name, opr1, __func__, type); + return -ENOEXEC; +} + +static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + switch (type) { + case R_LARCH_ADD32: + *(s32 *)location += v; + return 0; + case R_LARCH_ADD64: + *(s64 *)location += v; + return 0; + case R_LARCH_SUB32: + *(s32 *)location -= v; + return 0; + case R_LARCH_SUB64: + *(s64 *)location -= v; + return 0; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } +} + +/* + * reloc_handlers_rela() - Apply a particular relocation to a module + * @mod: the module to apply the reloc to + * @location: the address at which the reloc is to be applied + * @v: the value of the reloc, with addend for RELA-style + * @rela_stack: the stack used for store relocation info, LOCAL to THIS module + * @rela_stac_top: where the stack operation(pop/push) applies to + * + * Return: 0 upon success, else -ERRNO + */ +typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type); + +/* The handlers for known reloc types */ +static reloc_rela_handler reloc_rela_handlers[] = { + [R_LARCH_NONE ... R_LARCH_SUB64] = apply_r_larch_error, + + [R_LARCH_NONE] = apply_r_larch_none, + [R_LARCH_32] = apply_r_larch_32, + [R_LARCH_64] = apply_r_larch_64, + [R_LARCH_MARK_LA] = apply_r_larch_none, + [R_LARCH_MARK_PCREL] = apply_r_larch_none, + [R_LARCH_SOP_PUSH_PCREL] = apply_r_larch_sop_push_pcrel, + [R_LARCH_SOP_PUSH_ABSOLUTE] = apply_r_larch_sop_push_absolute, + [R_LARCH_SOP_PUSH_DUP] = apply_r_larch_sop_push_dup, + [R_LARCH_SOP_PUSH_PLT_PCREL] = apply_r_larch_sop_push_plt_pcrel, + [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, + [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, + [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, +}; + +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *mod) +{ + int i, err; + unsigned int type; + s64 rela_stack[RELA_STACK_DEPTH]; + size_t rela_stack_top = 0; + reloc_rela_handler handler; + void *location; + Elf_Addr v; + Elf_Sym *sym; + Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr; + + pr_debug("%s: Applying relocate section %u to %u\n", __func__, relsec, + sechdrs[relsec].sh_info); + + rela_stack_top = 0; + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf_Sym *)sechdrs[symindex].sh_addr + ELF_R_SYM(rel[i].r_info); + if (IS_ERR_VALUE(sym->st_value)) { + /* Ignore unresolved weak symbol */ + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) + continue; + pr_warn("%s: Unknown symbol %s\n", mod->name, strtab + sym->st_name); + return -ENOENT; + } + + type = ELF_R_TYPE(rel[i].r_info); + + if (type < ARRAY_SIZE(reloc_rela_handlers)) + handler = reloc_rela_handlers[type]; + else + handler = NULL; + + if (!handler) { + pr_err("%s: Unknown relocation type %u\n", mod->name, type); + return -EINVAL; + } + + pr_debug("type %d st_value %llx r_addend %llx loc %llx\n", + (int)ELF_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)location); + + v = sym->st_value + rel[i].r_addend; + err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + if (err) + return err; + } + + return 0; +} + +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); +} diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c new file mode 100644 index 000000000000..be461efeb7a6 --- /dev/null +++ b/arch/loongarch/kernel/proc.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * * No lock; only written during early bootup by CPU 0. + * */ +static RAW_NOTIFIER_HEAD(proc_cpuinfo_chain); + +int __ref register_proc_cpuinfo_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&proc_cpuinfo_chain, nb); +} + +int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v) +{ + return raw_notifier_call_chain(&proc_cpuinfo_chain, val, v); +} + +static int show_cpuinfo(struct seq_file *m, void *v) +{ + unsigned long n = (unsigned long) v - 1; + unsigned int version = cpu_data[n].processor_id & 0xff; + unsigned int fp_version = cpu_data[n].fpu_vers; + struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; + + /* + * For the first processor also print the system type + */ + if (n == 0) + seq_printf(m, "system type\t\t: %s\n\n", get_system_type()); + + seq_printf(m, "processor\t\t: %ld\n", n); + seq_printf(m, "package\t\t\t: %d\n", cpu_data[n].package); + seq_printf(m, "core\t\t\t: %d\n", cpu_data[n].core); + seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]); + seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]); + seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version); + seq_printf(m, "FPU Revision\t\t: 0x%02x\n", fp_version); + seq_printf(m, "CPU MHz\t\t\t: %llu.%02llu\n", + cpu_clock_freq / 1000000, (cpu_clock_freq / 10000) % 100); + seq_printf(m, "BogoMIPS\t\t: %llu.%02llu\n", + (lpj_fine * cpu_clock_freq / const_clock_freq) / (500000/HZ), + ((lpj_fine * cpu_clock_freq / const_clock_freq) / (5000/HZ)) % 100); + seq_printf(m, "TLB Entries\t\t: %d\n", cpu_data[n].tlbsize); + seq_printf(m, "Address Sizes\t\t: %d bits physical, %d bits virtual\n", + cpu_pabits + 1, cpu_vabits + 1); + + seq_printf(m, "ISA\t\t\t:"); + if (cpu_has_loongarch32) + seq_printf(m, " loongarch32"); + if (cpu_has_loongarch64) + seq_printf(m, " loongarch64"); + seq_printf(m, "\n"); + + seq_printf(m, "Features\t\t:"); + if (cpu_has_cpucfg) seq_printf(m, " cpucfg"); + if (cpu_has_lam) seq_printf(m, " lam"); + if (cpu_has_ual) seq_printf(m, " ual"); + if (cpu_has_fpu) seq_printf(m, " fpu"); + if (cpu_has_lsx) seq_printf(m, " lsx"); + if (cpu_has_lasx) seq_printf(m, " lasx"); + if (cpu_has_complex) seq_printf(m, " complex"); + if (cpu_has_crypto) seq_printf(m, " crypto"); + if (cpu_has_lvz) seq_printf(m, " lvz"); + if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); + if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); + if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips"); + seq_printf(m, "\n"); + + seq_printf(m, "Hardware Watchpoint\t: %s", + cpu_has_watch ? "yes, " : "no\n"); + if (cpu_has_watch) { + seq_printf(m, "iwatch count: %d, dwatch count: %d\n", + cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); + } + + proc_cpuinfo_notifier_args.m = m; + proc_cpuinfo_notifier_args.n = n; + + raw_notifier_call_chain(&proc_cpuinfo_chain, 0, + &proc_cpuinfo_notifier_args); + + seq_printf(m, "\n"); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + unsigned long i = *pos; + + return i < NR_CPUS ? (void *) (i + 1) : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c new file mode 100644 index 000000000000..def9edad44fc --- /dev/null +++ b/arch/loongarch/kernel/process.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Idle related variables and functions + */ + +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(boot_option_idle_override); + +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + play_dead(); +} +#endif + +asmlinkage void ret_from_fork(void); +asmlinkage void ret_from_kernel_thread(void); + +void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) +{ + unsigned long crmd; + unsigned long prmd; + unsigned long euen; + + /* New thread loses kernel privileges. */ + crmd = regs->csr_crmd & ~(PLV_MASK); + crmd |= PLV_USER; + regs->csr_crmd = crmd; + + prmd = regs->csr_prmd & ~(PLV_MASK); + prmd |= PLV_USER; + regs->csr_prmd = prmd; + + euen = regs->csr_euen & ~(CSR_EUEN_FPEN); + regs->csr_euen = euen; + lose_fpu(0); + + clear_thread_flag(TIF_LSX_CTX_LIVE); + clear_thread_flag(TIF_LASX_CTX_LIVE); + clear_used_math(); + regs->csr_era = pc; + regs->regs[3] = sp; +} + +void exit_thread(struct task_struct *tsk) +{ +} + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + /* + * Save any process state which is live in hardware registers to the + * parent context prior to duplication. This prevents the new child + * state becoming stale if the parent is preempted before copy_thread() + * gets a chance to save the parent's live hardware registers to the + * child context. + */ + preempt_disable(); + + if (is_fpu_owner()) + save_fp(current); + + preempt_enable(); + + if (used_math()) + memcpy(dst, src, sizeof(struct task_struct)); + else + memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr)); + + return 0; +} + +/* + * Copy architecture-specific thread state + */ +int copy_thread(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) +{ + unsigned long childksp; + struct pt_regs *childregs, *regs = current_pt_regs(); + + childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; + + /* set up new TSS. */ + childregs = (struct pt_regs *) childksp - 1; + /* Put the stack after the struct pt_regs. */ + childksp = (unsigned long) childregs; + p->thread.csr_euen = 0; + p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD); + p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD); + p->thread.csr_ecfg = csr_read32(LOONGARCH_CSR_ECFG); + if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { + /* kernel thread */ + p->thread.reg23 = usp; /* fn */ + p->thread.reg24 = kthread_arg; + p->thread.reg03 = childksp; + p->thread.reg01 = (unsigned long) ret_from_kernel_thread; + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->csr_euen = p->thread.csr_euen; + childregs->csr_crmd = p->thread.csr_crmd; + childregs->csr_prmd = p->thread.csr_prmd; + childregs->csr_ecfg = p->thread.csr_ecfg; + return 0; + } + + /* user thread */ + *childregs = *regs; + childregs->regs[4] = 0; /* Child gets zero as return value */ + if (usp) + childregs->regs[3] = usp; + + p->thread.reg03 = (unsigned long) childregs; + p->thread.reg01 = (unsigned long) ret_from_fork; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + childregs->csr_euen = 0; + + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDSIMD); + clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); + clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); + + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + + return 0; +} + +unsigned long get_wchan(struct task_struct *task) +{ + return 0; +} + +unsigned long stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* Space for the VDSO & data page */ + top -= PAGE_ALIGN(current->thread.vdso->size); + top -= PAGE_SIZE; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + +/* + * Don't forget that the stack pointer must be aligned on a 8 bytes + * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. + */ +unsigned long arch_align_stack(unsigned long sp) +{ + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() & ~PAGE_MASK; + + return sp & STACK_ALIGN; +} + +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); +static struct cpumask backtrace_csd_busy; + +static void handle_backtrace(void *info) +{ + nmi_cpu_backtrace(get_irq_regs()); + cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); +} + +static void raise_backtrace(cpumask_t *mask) +{ + call_single_data_t *csd; + int cpu; + + for_each_cpu(cpu, mask) { + /* + * If we previously sent an IPI to the target CPU & it hasn't + * cleared its bit in the busy cpumask then it didn't handle + * our previous IPI & it's not safe for us to reuse the + * call_single_data_t. + */ + if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) { + pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n", + cpu); + continue; + } + + csd = &per_cpu(backtrace_csd, cpu); + csd->func = handle_backtrace; + smp_call_function_single_async(cpu, csd); + } +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); +} + +#ifdef CONFIG_64BIT +void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs) +{ + unsigned int i; + + for (i = LOONGARCH_EF_R1; i <= LOONGARCH_EF_R31; i++) { + uregs[i] = regs->regs[i - LOONGARCH_EF_R0]; + } + + uregs[LOONGARCH_EF_ORIG_A0] = regs->orig_a0; + uregs[LOONGARCH_EF_CSR_ERA] = regs->csr_era; + uregs[LOONGARCH_EF_CSR_BADV] = regs->csr_badvaddr; + uregs[LOONGARCH_EF_CSR_CRMD] = regs->csr_crmd; + uregs[LOONGARCH_EF_CSR_PRMD] = regs->csr_prmd; + uregs[LOONGARCH_EF_CSR_EUEN] = regs->csr_euen; + uregs[LOONGARCH_EF_CSR_ECFG] = regs->csr_ecfg; + uregs[LOONGARCH_EF_CSR_ESTAT] = regs->csr_estat; +} +#endif /* CONFIG_64BIT */ diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c new file mode 100644 index 000000000000..0abbf43b7311 --- /dev/null +++ b/arch/loongarch/kernel/ptrace.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Author: Huacai Chen +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void init_fp_ctx(struct task_struct *target) +{ + /* The target already has context */ + if (tsk_used_math(target)) + return; + + /* Begin with data registers set to all 1s... */ + memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr)); + set_stopped_child_used_math(target); +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *child) +{ + /* Don't load the watchpoint registers for the ex-child. */ + clear_tsk_thread_flag(child, TIF_LOAD_WATCH); + clear_tsk_thread_flag(child, TIF_SINGLESTEP); +} + +/* regset get/set implementations */ + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + struct pt_regs *regs = task_pt_regs(target); + + r = membuf_write(&to, ®s->regs, sizeof(u64) * GPR_NUM); + r = membuf_write(&to, ®s->orig_a0, sizeof(u64)); + r = membuf_write(&to, ®s->csr_era, sizeof(u64)); + r = membuf_write(&to, ®s->csr_badvaddr, sizeof(u64)); + + return r; +} + +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err; + int a0_start = sizeof(u64) * GPR_NUM; + int era_start = a0_start + sizeof(u64); + int badvaddr_start = era_start + sizeof(u64); + struct pt_regs *regs = task_pt_regs(target); + + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->regs, + 0, a0_start); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->orig_a0, + a0_start, a0_start + sizeof(u64)); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->csr_era, + era_start, era_start + sizeof(u64)); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->csr_badvaddr, + badvaddr_start, badvaddr_start + sizeof(u64)); + + return err; +} + + +/* + * Get the general floating-point registers. + */ +static int gfpr_get(struct task_struct *target, struct membuf *to) +{ + return membuf_write(to, &target->thread.fpu.fpr, + sizeof(elf_fpreg_t) * NUM_FPU_REGS); +} + +static int gfpr_get_simd(struct task_struct *target, struct membuf *to) +{ + int i, r; + u64 fpr_val; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS; i++) { + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0); + r = membuf_write(to, &fpr_val, sizeof(elf_fpreg_t)); + } + + return r; +} + +/* + * Choose the appropriate helper for general registers, and then copy + * the FCC and FCSR registers separately. + */ +static int fpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + r = gfpr_get(target, &to); + else + r = gfpr_get_simd(target, &to); + + r = membuf_write(&to, &target->thread.fpu.fcc, sizeof(target->thread.fpu.fcc)); + r = membuf_write(&to, &target->thread.fpu.fcsr, sizeof(target->thread.fpu.fcsr)); + + return r; +} + +static int gfpr_set(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + return user_regset_copyin(pos, count, kbuf, ubuf, + &target->thread.fpu.fpr, + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t)); +} + +static int gfpr_set_simd(struct task_struct *target, + unsigned int *pos, unsigned int *count, + const void **kbuf, const void __user **ubuf) +{ + int i, err; + u64 fpr_val; + + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) { + err = user_regset_copyin(pos, count, kbuf, ubuf, + &fpr_val, i * sizeof(elf_fpreg_t), + (i + 1) * sizeof(elf_fpreg_t)); + if (err) + return err; + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val); + } + + return 0; +} + +/* + * Choose the appropriate helper for general registers, and then copy + * the FCC register separately. + */ +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fcc_end = fcc_start + sizeof(u64); + int err; + + BUG_ON(count % sizeof(elf_fpreg_t)); + if (pos + count > sizeof(elf_fpregset_t)) + return -EIO; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) + err = gfpr_set(target, &pos, &count, &kbuf, &ubuf); + else + err = gfpr_set_simd(target, &pos, &count, &kbuf, &ubuf); + if (err) + return err; + + if (count > 0) + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcc, + fcc_start, fcc_end); + + return err; +} + +static int cfg_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int i, r; + u32 cfg_val; + + i = 0; + while (to.left > 0) { + cfg_val = read_cpucfg(i++); + r = membuf_write(&to, &cfg_val, sizeof(u32)); + } + + return r; +} + +/* + * CFG registers are read-only. + */ +static int cfg_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return 0; +} + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(n, r) {.name = #n, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + REG_OFFSET_NAME(r0, regs[0]), + REG_OFFSET_NAME(r1, regs[1]), + REG_OFFSET_NAME(r2, regs[2]), + REG_OFFSET_NAME(r3, regs[3]), + REG_OFFSET_NAME(r4, regs[4]), + REG_OFFSET_NAME(r5, regs[5]), + REG_OFFSET_NAME(r6, regs[6]), + REG_OFFSET_NAME(r7, regs[7]), + REG_OFFSET_NAME(r8, regs[8]), + REG_OFFSET_NAME(r9, regs[9]), + REG_OFFSET_NAME(r10, regs[10]), + REG_OFFSET_NAME(r11, regs[11]), + REG_OFFSET_NAME(r12, regs[12]), + REG_OFFSET_NAME(r13, regs[13]), + REG_OFFSET_NAME(r14, regs[14]), + REG_OFFSET_NAME(r15, regs[15]), + REG_OFFSET_NAME(r16, regs[16]), + REG_OFFSET_NAME(r17, regs[17]), + REG_OFFSET_NAME(r18, regs[18]), + REG_OFFSET_NAME(r19, regs[19]), + REG_OFFSET_NAME(r20, regs[20]), + REG_OFFSET_NAME(r21, regs[21]), + REG_OFFSET_NAME(r22, regs[22]), + REG_OFFSET_NAME(r23, regs[23]), + REG_OFFSET_NAME(r24, regs[24]), + REG_OFFSET_NAME(r25, regs[25]), + REG_OFFSET_NAME(r26, regs[26]), + REG_OFFSET_NAME(r27, regs[27]), + REG_OFFSET_NAME(r28, regs[28]), + REG_OFFSET_NAME(r29, regs[29]), + REG_OFFSET_NAME(r30, regs[30]), + REG_OFFSET_NAME(r31, regs[31]), + REG_OFFSET_NAME(orig_a0, orig_a0), + REG_OFFSET_NAME(csr_era, csr_era), + REG_OFFSET_NAME(csr_badvaddr, csr_badvaddr), + REG_OFFSET_NAME(csr_crmd, csr_crmd), + REG_OFFSET_NAME(csr_prmd, csr_prmd), + REG_OFFSET_NAME(csr_euen, csr_euen), + REG_OFFSET_NAME(csr_ecfg, csr_ecfg), + REG_OFFSET_NAME(csr_estat, csr_estat), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +enum loongarch_regset { + REGSET_GPR, + REGSET_FPR, + REGSET_CPUCFG, +}; + +static const struct user_regset loongarch64_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), + .align = sizeof(elf_greg_t), + .regset_get = gpr_get, + .set = gpr_set, + }, + [REGSET_FPR] = { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), + .align = sizeof(elf_fpreg_t), + .regset_get = fpr_get, + .set = fpr_set, + }, + [REGSET_CPUCFG] = { + .core_note_type = NT_LOONGARCH_CPUCFG, + .n = 64, + .size = sizeof(u32), + .align = sizeof(u32), + .regset_get = cfg_get, + .set = cfg_set, + }, +}; + +static const struct user_regset_view user_loongarch64_view = { + .name = "loongarch64", + .e_machine = ELF_ARCH, + .regsets = loongarch64_regsets, + .n = ARRAY_SIZE(loongarch64_regsets), +}; + + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_loongarch64_view; +} + +static inline int read_user(struct task_struct *target, unsigned long addr, + unsigned long __user *data) +{ + unsigned long tmp = 0; + + switch (addr) { + case 0 ... 31: + tmp = task_pt_regs(target)->regs[addr]; + break; + case ARG0: + tmp = task_pt_regs(target)->orig_a0; + break; + case PC: + tmp = task_pt_regs(target)->csr_era; + break; + case BADVADDR: + tmp = task_pt_regs(target)->csr_badvaddr; + break; + default: + return -EIO; + } + + return put_user(tmp, data); +} + +static inline int write_user(struct task_struct *target, unsigned long addr, + unsigned long data) +{ + switch (addr) { + case 0 ... 31: + task_pt_regs(target)->regs[addr] = data; + break; + case ARG0: + task_pt_regs(target)->orig_a0 = data; + break; + case PC: + task_pt_regs(target)->csr_era = data; + break; + case BADVADDR: + task_pt_regs(target)->csr_badvaddr = data; + break; + default: + return -EIO; + } + + return 0; +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + unsigned long __user *datap = (void __user *) data; + + switch (request) { + case PTRACE_PEEKUSR: + ret = read_user(child, addr, datap); + break; + + case PTRACE_POKEUSR: + ret = write_user(child, addr, data); + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c new file mode 100644 index 000000000000..f39d911254a2 --- /dev/null +++ b/arch/loongarch/kernel/reset.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static void machine_hang(void) +{ + local_irq_disable(); + clear_csr_ecfg(ECFG0_IM); + + pr_notice("\n\n** You can safely turn off the power now **\n\n"); + console_flush_on_panic(CONSOLE_FLUSH_PENDING); + + while (true) { + __arch_cpu_idle(); + local_irq_disable(); + } +} + +void (*pm_restart)(void) = machine_hang; +void (*pm_power_off)(void) = machine_hang; + +EXPORT_SYMBOL(pm_power_off); + +void machine_halt(void) +{ + machine_hang(); +} + +void machine_power_off(void) +{ + pm_power_off(); +} + +void machine_restart(char *command) +{ + do_kernel_restart(command); + pm_restart(); +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c new file mode 100644 index 000000000000..cc9a2d335715 --- /dev/null +++ b/arch/loongarch/kernel/setup.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +DEFINE_PER_CPU(unsigned long, kernelsp); +unsigned long fw_arg0, fw_arg1, fw_arg2; +struct cpuinfo_loongarch cpu_data[NR_CPUS] __read_mostly; + +EXPORT_SYMBOL(cpu_data); + +#ifdef CONFIG_VT +struct screen_info screen_info; +#endif + +/* + * Setup information + * + * These are initialized so they are in the .data section + */ + +static int num_standard_resources; +static struct resource *standard_resources; + +static struct resource code_resource = { .name = "Kernel code", }; +static struct resource data_resource = { .name = "Kernel data", }; +static struct resource bss_resource = { .name = "Kernel bss", }; + +unsigned long __kaslr_offset __ro_after_init; +EXPORT_SYMBOL(__kaslr_offset); + +static void *detect_magic __initdata = detect_memory_region; + +void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max) +{ + void *dm = &detect_magic; + phys_addr_t size; + + for (size = sz_min; size < sz_max; size <<= 1) { + if (!memcmp(dm, dm + size, sizeof(detect_magic))) + break; + } + + pr_debug("Memory: %lluMB of RAM detected at 0x%llx (min: %lluMB, max: %lluMB)\n", + ((unsigned long long) size) / SZ_1M, + (unsigned long long) start, + ((unsigned long long) sz_min) / SZ_1M, + ((unsigned long long) sz_max) / SZ_1M); + + memblock_add(start, size); +} + +static int usermem __initdata; + +static int __init early_parse_mem(char *p) +{ + phys_addr_t start, size; + + if (!p) { + pr_err("mem parameter is empty, do nothing\n"); + return -EINVAL; + } + + /* + * If a user specifies memory size, we + * blow away any automatically generated + * size. + */ + if (usermem == 0) { + usermem = 1; + memblock_remove(memblock_start_of_DRAM(), + memblock_end_of_DRAM() - memblock_start_of_DRAM()); + } + start = 0; + size = memparse(p, &p); + if (*p == '@') + start = memparse(p + 1, &p); + else { + pr_err("Invalid format!\n"); + return -EINVAL; + } + + memblock_add(start, size); + + return 0; +} +early_param("mem", early_parse_mem); + +static void __init check_kernel_sections_mem(void) +{ + phys_addr_t start = __pa_symbol(&_text); + phys_addr_t size = __pa_symbol(&_end) - start; + + if (!memblock_is_region_memory(start, size)) { + pr_info("Kernel sections are not in the memory maps\n"); + memblock_add(start, size); + } +} + +/* + * arch_mem_init - initialize memory management subsystem + */ +static void __init arch_mem_init(char **cmdline_p) +{ + /* call board setup routine */ + plat_mem_setup(); + memblock_set_bottom_up(true); + + if (usermem) + pr_info("User-defined physical RAM map overwrite\n"); + + check_kernel_sections_mem(); + + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate + * low memory as small as possible before plat_swiotlb_setup(), so + * make sparse_init() using top-down allocation. + */ + memblock_set_bottom_up(false); + sparse_init(); + memblock_set_bottom_up(true); + + swiotlb_init(1); + + dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); + + memblock_dump_all(); + + early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); +} + +static void __init resource_init(void) +{ + long i = 0; + size_t res_size; + struct resource *res; + struct memblock_region *region; + + code_resource.start = __pa_symbol(&_text); + code_resource.end = __pa_symbol(&_etext) - 1; + data_resource.start = __pa_symbol(&_etext); + data_resource.end = __pa_symbol(&_edata) - 1; + bss_resource.start = __pa_symbol(&__bss_start); + bss_resource.end = __pa_symbol(&__bss_stop) - 1; + + num_standard_resources = memblock.memory.cnt; + res_size = num_standard_resources * sizeof(*standard_resources); + standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES); + + for_each_mem_region(region) { + res = &standard_resources[i++]; + if (!memblock_is_nomap(region)) { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + } else { + res->name = "Reserved"; + res->flags = IORESOURCE_MEM; + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; + } + + request_resource(&iomem_resource, res); + + /* + * We don't know which RAM region contains kernel data, + * so we try it repeatedly and let the resource manager + * test it. + */ + request_resource(res, &code_resource); + request_resource(res, &data_resource); + request_resource(res, &bss_resource); + } +} + +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_range(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "Reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); + +void __init setup_arch(char **cmdline_p) +{ + cpu_probe(); + *cmdline_p = boot_command_line; + + early_init(); + parse_early_param(); + + platform_init(); + pagetable_init(); + arch_mem_init(cmdline_p); + + resource_init(); + + paging_init(); +} diff --git a/arch/loongarch/kernel/signal-common.h b/arch/loongarch/kernel/signal-common.h new file mode 100644 index 000000000000..004b540bd0b9 --- /dev/null +++ b/arch/loongarch/kernel/signal-common.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +*/ + +#ifndef __SIGNAL_COMMON_H +#define __SIGNAL_COMMON_H + +/* #define DEBUG_SIG */ + +#ifdef DEBUG_SIG +# define DEBUGP(fmt, args...) printk("%s: " fmt, __func__, ##args) +#else +# define DEBUGP(fmt, args...) +#endif + +struct _ctx_layout { + struct sctx_info *addr; + unsigned int size; /* used at save context */ +}; + +struct extctx_layout { + unsigned long size; + unsigned int flags; + struct _ctx_layout fpu; + struct _ctx_layout lsx; + struct _ctx_layout lasx; + struct _ctx_layout end; +}; + +/* + * Determine which stack to use.. + */ +extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + struct extctx_layout *extctx); +/* Check and clear pending FPU exceptions in saved CSR */ +extern int fcsr_pending(unsigned int __user *fcsr); + +/* Make sure we will not lose FPU ownership */ +#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) +#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) + +/* Assembly functions to move context to/from the FPU */ +extern asmlinkage int +_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +#endif /* __SIGNAL_COMMON_H */ diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c new file mode 100644 index 000000000000..5bf0c01c8312 --- /dev/null +++ b/arch/loongarch/kernel/signal.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* +* Author: Hanlu Li +* Author: Huacai Chen +* Copyright (C) 2020 Loongson Technology Corporation Limited +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "signal-common.h" + +struct rt_sigframe { + struct siginfo rs_info; + struct ucontext rs_uctx; +}; + +static void __user *get_ctx_through_ctxinfo(struct sctx_info *info) +{ + return (void __user *)((char *)info + sizeof(struct sctx_info)); +} + +/* + * Thread saved context copy to/from a signal context presumed to be on the + * user stack, and therefore accessed with appropriate macros from uaccess.h. + */ +static int copy_fpu_to_sigcontext(struct fpu_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= + __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[i]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +/* + * Wrappers for the assembly _{save,restore}_fp_context functions. + */ +static int save_hw_fpu_context(struct fpu_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_fp_context(regs, fcc, fcsr); +} + +static int restore_hw_fpu_context(struct fpu_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_fp_context(regs, fcc, fcsr); +} + +/* + * Helper routines + */ +static int protected_save_fpu_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->fpu.addr; + struct fpu_context __user *fpu_ctx = (struct fpu_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&fpu_ctx->regs; + uint64_t __user *fcc = &fpu_ctx->fcc; + uint32_t __user *fcsr = &fpu_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_fpu_owner()) + err = save_hw_fpu_context(fpu_ctx); + else + err = copy_fpu_to_sigcontext(fpu_ctx); + unlock_fpu_owner(); + + err |= __put_user(FPU_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->fpu.size, &info->size); + + if (likely(!err)) + break; + /* Touch the FPU context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[31]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_fpu_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->fpu.addr; + struct fpu_context __user *fpu_ctx = (struct fpu_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&fpu_ctx->regs; + uint64_t __user *fcc = &fpu_ctx->fcc; + uint32_t __user *fcsr = &fpu_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_fpu_owner()) + err = restore_hw_fpu_context(fpu_ctx); + else + err = copy_fpu_from_sigcontext(fpu_ctx); + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the FPU context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[31]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + +static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, + struct extctx_layout *extctx) +{ + int i, err = 0; + struct sctx_info __user *info; + + err |= __put_user(regs->csr_era, &sc->sc_pc); + err |= __put_user(extctx->flags, &sc->sc_flags); + + err |= __put_user(0, &sc->sc_regs[0]); + for (i = 1; i < 32; i++) + err |= __put_user(regs->regs[i], &sc->sc_regs[i]); + + if (extctx->fpu.addr) + err |= protected_save_fpu_context(extctx); + + /* Set the "end" magic */ + info = (struct sctx_info *)extctx->end.addr; + err |= __put_user(0, &info->magic); + err |= __put_user(0, &info->size); + + return err; +} + +int fcsr_pending(unsigned int __user *fcsr) +{ + int err, sig = 0; + unsigned int csr, enabled; + + err = __get_user(csr, fcsr); + enabled = ((csr & FPU_CSR_ALL_E) << 24); + /* + * If the signal handler set some FPU exceptions, clear it and + * send SIGFPE. + */ + if (csr & enabled) { + csr &= ~enabled; + err |= __put_user(csr, fcsr); + sig = SIGFPE; + } + return err ?: sig; +} + +static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *extctx) +{ + int err = 0; + unsigned int magic, size; + struct sctx_info __user *info = (struct sctx_info __user *)&sc->sc_extcontext; + + while(1) { + err |= __get_user(magic, &info->magic); + err |= __get_user(size, &info->size); + if (err) + return err; + + switch (magic) { + case 0: /* END */ + goto done; + + case FPU_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct fpu_context))) + goto invalid; + extctx->fpu.addr = info; + break; + + default: + goto invalid; + } + + info = (struct sctx_info *)((char *)info + size); + } + +done: + return 0; + +invalid: + return -EINVAL; +} + +static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +{ + int i, err = 0; + struct extctx_layout extctx; + + memset(&extctx, 0, sizeof(struct extctx_layout)); + + err = __get_user(extctx.flags, &sc->sc_flags); + if (err) + goto bad; + + err = parse_extcontext(sc, &extctx); + if (err) + goto bad; + + conditional_used_math(extctx.flags & SC_USED_FP); + + /* + * The signal handler may have used FPU; give it up if the program + * doesn't want it following sigreturn. + */ + if (!(extctx.flags & SC_USED_FP)) + lose_fpu(0); + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + err |= __get_user(regs->csr_era, &sc->sc_pc); + for (i = 1; i < 32; i++) + err |= __get_user(regs->regs[i], &sc->sc_regs[i]); + + if (extctx.fpu.addr) + err |= protected_restore_fpu_context(&extctx); + +bad: + return err; +} + +static unsigned int handle_flags(void) +{ + unsigned int flags = 0; + + flags = used_math() ? SC_USED_FP : 0; + + switch (current->thread.error_code) { + case 1: + flags |= SC_ADDRERR_RD; + break; + case 2: + flags |= SC_ADDRERR_WR; + break; + } + + return flags; +} + +static unsigned long extframe_alloc(struct extctx_layout *extctx, + struct _ctx_layout *layout, + size_t size, unsigned int align, unsigned long base) +{ + unsigned long new_base = base - size; + + new_base = round_down(new_base, (align < 16 ? 16 : align)); + new_base -= sizeof(struct sctx_info); + + layout->addr = (void *)new_base; + layout->size = (unsigned int)(base - new_base); + extctx->size += layout->size; + + return new_base; +} + +static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned long sp) +{ + unsigned long new_sp = sp; + + memset(extctx, 0, sizeof(struct extctx_layout)); + + extctx->flags = handle_flags(); + + /* Grow down, alloc "end" context info first. */ + new_sp -= sizeof(struct sctx_info); + extctx->end.addr = (void *)new_sp; + extctx->end.size = (unsigned int)sizeof(struct sctx_info); + extctx->size += extctx->end.size; + + if (extctx->flags & SC_USED_FP) { + if (cpu_has_fpu) + new_sp = extframe_alloc(extctx, &extctx->fpu, + sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); + } + + return new_sp; +} + +void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, + struct extctx_layout *extctx) +{ + unsigned long sp; + + /* Default to using normal stack */ + sp = regs->regs[3]; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && + !likely(on_sig_stack(sp - sizeof(struct rt_sigframe)))) + return (void __user __force *)(-1UL); + + sp = sigsp(sp, ksig); + sp = round_down(sp, 16); + sp = setup_extcontext(extctx, sp); + sp -= sizeof(struct rt_sigframe); + + if (!IS_ALIGNED(sp, 16)) + BUG(); + + return (void __user *)sp; +} + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ + +asmlinkage long sys_rt_sigreturn(void) +{ + int sig; + sigset_t set; + struct pt_regs *regs; + struct rt_sigframe __user *frame; + + regs = current_pt_regs(); + frame = (struct rt_sigframe __user *)regs->regs[3]; + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + if (__copy_from_user(&set, &frame->rs_uctx.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + sig = restore_sigcontext(regs, &frame->rs_uctx.uc_mcontext); + if (sig < 0) + goto badframe; + else if (sig) + force_sig(sig); + + regs->regs[0] = 0; /* No syscall restarting */ + if (restore_altstack(&frame->rs_uctx.uc_stack)) + goto badframe; + + return regs->regs[4]; + +badframe: + force_sig(SIGSEGV); + return 0; +} + +static int setup_rt_frame(void *sig_return, struct ksignal *ksig, + struct pt_regs *regs, sigset_t *set) +{ + int err = 0; + struct extctx_layout extctx; + struct rt_sigframe __user *frame; + + frame = get_sigframe(ksig, regs, &extctx); + if (!access_ok(frame, sizeof(*frame) + extctx.size)) + return -EFAULT; + + /* Create siginfo. */ + err |= copy_siginfo_to_user(&frame->rs_info, &ksig->info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->rs_uctx.uc_flags); + err |= __put_user(NULL, &frame->rs_uctx.uc_link); + err |= __save_altstack(&frame->rs_uctx.uc_stack, regs->regs[3]); + err |= setup_sigcontext(regs, &frame->rs_uctx.uc_mcontext, &extctx); + err |= __copy_to_user(&frame->rs_uctx.uc_sigmask, set, sizeof(*set)); + + if (err) + return -EFAULT; + + /* + * Arguments to signal handler: + * + * a0 = signal number + * a1 = pointer to siginfo + * a2 = pointer to ucontext + * + * c0_era point to the signal handler, $r3 (sp) points to + * the struct rt_sigframe. + */ + regs->regs[4] = ksig->sig; + regs->regs[5] = (unsigned long) &frame->rs_info; + regs->regs[6] = (unsigned long) &frame->rs_uctx; + regs->regs[3] = (unsigned long) frame; + regs->regs[1] = (unsigned long) sig_return; + regs->csr_era = (unsigned long) ksig->ka.sa.sa_handler; + + DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", + current->comm, current->pid, + frame, regs->csr_era, regs->regs[1]); + + return 0; +} + +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) +{ + int ret; + sigset_t *oldset = sigmask_to_save(); + void *vdso = current->mm->context.vdso; + + /* Are we from a system call? */ + if (regs->regs[0]) { + switch(regs->regs[4]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->regs[4] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { + regs->regs[4] = -EINTR; + break; + } + fallthrough; + case -ERESTARTNOINTR: + regs->regs[4] = regs->orig_a0; + regs->csr_era -= 4; + } + + regs->regs[0] = 0; /* Don't deal with this again. */ + } + + rseq_signal_deliver(ksig, regs); + + ret = setup_rt_frame(vdso + current->thread.vdso->offset_sigreturn, ksig, regs, oldset); + + signal_setup_done(ret, ksig, 0); +} + +void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) +{ + struct ksignal ksig; + + if (has_signal && get_signal(&ksig)) { + /* Whee! Actually deliver the signal. */ + handle_signal(&ksig, regs); + return; + } + + /* Are we from a system call? */ + if (regs->regs[0]) { + switch (regs->regs[4]) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->regs[4] = regs->orig_a0; + regs->csr_era -= 4; + break; + + case -ERESTART_RESTARTBLOCK: + regs->regs[4] = regs->orig_a0; + regs->regs[11] = __NR_restart_syscall; + regs->csr_era -= 4; + break; + } + regs->regs[0] = 0; /* Don't deal with this again. */ + } + + /* + * If there's no signal to deliver, we just put the saved sigmask + * back + */ + restore_saved_sigmask(); +} diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S new file mode 100644 index 000000000000..bf7002209600 --- /dev/null +++ b/arch/loongarch/kernel/switch.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#include + +/* + * task_struct *__switch_to(task_struct *prev, task_struct *next, + * struct thread_info *next_ti) + */ + .align 5 +SYM_FUNC_START(__switch_to) + csrrd t1, LOONGARCH_CSR_PRMD + stptr.d t1, a0, THREAD_CSRPRMD + + cpu_save_nonscratch a0 + stptr.d ra, a0, THREAD_REG01 + move tp, a2 + cpu_restore_nonscratch a1 + + li.w t0, _THREAD_SIZE - 32 + PTR_ADD t0, t0, tp + set_saved_sp t0, t1, t2 + + ldptr.d t1, a1, THREAD_CSRPRMD + csrwr t1, LOONGARCH_CSR_PRMD + + jr ra +SYM_FUNC_END(__switch_to) diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c new file mode 100644 index 000000000000..105c370a8c22 --- /dev/null +++ b/arch/loongarch/kernel/syscall.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* +* Copyright (C) 2020 Loongson Technology Corporation Limited +* +* Author: Hanlu Li +* Author: Huacai Chen +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, + prot, unsigned long, flags, unsigned long, fd, off_t, offset) +{ + if (offset & ~PAGE_MASK) + return -EINVAL; + + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); +} + +void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include +}; + +typedef long (*sys_call_fn)(unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long, unsigned long); + +void noinstr do_syscall(struct pt_regs *regs) +{ + unsigned long nr; + sys_call_fn syscall_fn; + + nr = regs->regs[11]; + /* Set for syscall restarting */ + if (nr < NR_syscalls) + regs->regs[0] = nr + 1; + + regs->csr_era += 4; + regs->orig_a0 = regs->regs[4]; + regs->regs[4] = -ENOSYS; + + nr = syscall_enter_from_user_mode(regs, nr); + + if (nr < NR_syscalls) { + syscall_fn = sys_call_table[nr]; + regs->regs[4] = syscall_fn(regs->orig_a0, regs->regs[5], regs->regs[6], + regs->regs[7], regs->regs[8], regs->regs[9]); + } + + syscall_exit_to_user_mode(regs); +} diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c new file mode 100644 index 000000000000..06ad7fe7e6e1 --- /dev/null +++ b/arch/loongarch/kernel/time.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Common time service routines for LoongArch machines. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +u64 cpu_clock_freq; +EXPORT_SYMBOL(cpu_clock_freq); +u64 const_clock_freq; +EXPORT_SYMBOL(const_clock_freq); + +static DEFINE_RAW_SPINLOCK(state_lock); +static DEFINE_PER_CPU(struct clock_event_device, constant_clockevent_device); + +static void constant_event_handler(struct clock_event_device *dev) +{ +} + +irqreturn_t constant_timer_interrupt(int irq, void *data) +{ + int cpu = smp_processor_id(); + struct clock_event_device *cd; + + /* Clear Timer Interrupt */ + write_csr_tintclear(CSR_TINTCLR_TI); + cd = &per_cpu(constant_clockevent_device, cpu); + cd->event_handler(cd); + + return IRQ_HANDLED; +} + +static int constant_set_state_oneshot(struct clock_event_device *evt) +{ + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config |= CSR_TCFG_EN; + timer_config &= ~CSR_TCFG_PERIOD; + csr_write64(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + + return 0; +} + +static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) +{ + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; + csr_write64(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + + return 0; +} + +static int constant_set_state_periodic(struct clock_event_device *evt) +{ + unsigned long period; + unsigned long timer_config; + + raw_spin_lock(&state_lock); + + period = const_clock_freq / HZ; + timer_config = period & CSR_TCFG_VAL; + timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); + csr_write64(timer_config, LOONGARCH_CSR_TCFG); + + raw_spin_unlock(&state_lock); + + return 0; +} + +static int constant_set_state_shutdown(struct clock_event_device *evt) +{ + return 0; +} + +static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt) +{ + unsigned long timer_config; + + delta &= CSR_TCFG_VAL; + timer_config = delta | CSR_TCFG_EN; + csr_write64(timer_config, LOONGARCH_CSR_TCFG); + + return 0; +} + +static unsigned long __init get_loops_per_jiffy(void) +{ + unsigned long lpj = (unsigned long)const_clock_freq; + + do_div(lpj, HZ); + + return lpj; +} + +static long init_timeval; + +void sync_counter(void) +{ + /* Ensure counter begin at 0 */ + csr_write64(-init_timeval, LOONGARCH_CSR_CNTC); +} + +int constant_clockevent_init(void) +{ + unsigned int irq; + unsigned int cpu = smp_processor_id(); + unsigned long min_delta = 0x600; + unsigned long max_delta = (1UL << 48) - 1; + struct clock_event_device *cd; + static int timer_irq_installed = 0; + + irq = get_timer_irq(); + + cd = &per_cpu(constant_clockevent_device, cpu); + + cd->name = "Constant"; + cd->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_PERCPU; + + cd->irq = irq; + cd->rating = 320; + cd->cpumask = cpumask_of(cpu); + cd->set_state_oneshot = constant_set_state_oneshot; + cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_periodic = constant_set_state_periodic; + cd->set_state_shutdown = constant_set_state_shutdown; + cd->set_next_event = constant_timer_next_event; + cd->event_handler = constant_event_handler; + + clockevents_config_and_register(cd, const_clock_freq, min_delta, max_delta); + + if (timer_irq_installed) + return 0; + + timer_irq_installed = 1; + + sync_counter(); + + if (request_irq(irq, constant_timer_interrupt, IRQF_PERCPU | IRQF_TIMER, "timer", NULL)) + pr_err("Failed to request irq %d (timer)\n", irq); + + lpj_fine = get_loops_per_jiffy(); + pr_info("Constant clock event device register\n"); + + return 0; +} + +static u64 read_const_counter(struct clocksource *clk) +{ + return drdtime(); +} + +static u64 native_sched_clock(void) +{ + return read_const_counter(NULL); +} + +static struct clocksource clocksource_const = { + .name = "Constant", + .rating = 400, + .read = read_const_counter, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .vdso_clock_mode = VDSO_CLOCKMODE_CPU, +}; + +int __init constant_clocksource_init(void) +{ + int res; + unsigned long freq = const_clock_freq; + + res = clocksource_register_hz(&clocksource_const, freq); + + sched_clock_register(native_sched_clock, 64, freq); + + pr_info("Constant clock source device register\n"); + + return res; +} + +void __init time_init(void) +{ + if (!cpu_has_cpucfg) + const_clock_freq = cpu_clock_freq; + else + const_clock_freq = calc_const_freq(); + + init_timeval = drdtime() - csr_read64(LOONGARCH_CSR_CNTC); + + constant_clockevent_init(); + constant_clocksource_init(); +} diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c new file mode 100644 index 000000000000..3b2cbb95875b --- /dev/null +++ b/arch/loongarch/kernel/topology.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +static struct cpu cpu_device; + +static int __init topology_init(void) +{ + return register_cpu(&cpu_device, 0); +} + +subsys_initcall(topology_init); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c new file mode 100644 index 000000000000..ca6535e49bc6 --- /dev/null +++ b/arch/loongarch/kernel/traps.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "access-helper.h" + +extern asmlinkage void handle_ade(void); +extern asmlinkage void handle_ale(void); +extern asmlinkage void handle_sys(void); +extern asmlinkage void handle_bp(void); +extern asmlinkage void handle_ri(void); +extern asmlinkage void handle_fpu(void); +extern asmlinkage void handle_fpe(void); +extern asmlinkage void handle_lbt(void); +extern asmlinkage void handle_lsx(void); +extern asmlinkage void handle_lasx(void); +extern asmlinkage void handle_reserved(void); +extern asmlinkage void handle_watch(void); +extern asmlinkage void handle_vint(void); + +static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, + const char *loglvl, bool user) +{ + unsigned long addr; + unsigned long *sp = (unsigned long *)(regs->regs[3] & ~3); + + printk("%sCall Trace:", loglvl); +#ifdef CONFIG_KALLSYMS + printk("%s\n", loglvl); +#endif + while (!kstack_end(sp)) { + if (__get_addr(&addr, sp++, user)) { + printk("%s (Bad stack address)", loglvl); + break; + } + if (__kernel_text_address(addr)) + print_ip_sym(loglvl, addr); + } + printk("%s\n", loglvl); +} + +static void show_stacktrace(struct task_struct *task, + const struct pt_regs *regs, const char *loglvl, bool user) +{ + int i; + const int field = 2 * sizeof(unsigned long); + unsigned long stackdata; + unsigned long *sp = (unsigned long *)regs->regs[3]; + + printk("%sStack :", loglvl); + i = 0; + while ((unsigned long) sp & (PAGE_SIZE - 1)) { + if (i && ((i % (64 / field)) == 0)) { + pr_cont("\n"); + printk("%s ", loglvl); + } + if (i > 39) { + pr_cont(" ..."); + break; + } + + if (__get_addr(&stackdata, sp++, user)) { + pr_cont(" (Bad stack address)"); + break; + } + + pr_cont(" %0*lx", field, stackdata); + i++; + } + pr_cont("\n"); + show_backtrace(task, regs, loglvl, user); +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + struct pt_regs regs; + + regs.csr_crmd = 0; + if (sp) { + regs.csr_era = 0; + regs.regs[1] = 0; + regs.regs[3] = (unsigned long)sp; + } else { + if (!task || task == current) + prepare_frametrace(®s); + else { + regs.csr_era = task->thread.reg01; + regs.regs[1] = 0; + regs.regs[3] = task->thread.reg03; + regs.regs[22] = task->thread.reg22; + } + } + + show_stacktrace(task, ®s, loglvl, false); +} + +static void show_code(unsigned int *pc, bool user) +{ + long i; + unsigned int insn; + + printk("Code:"); + + for(i = -3 ; i < 6 ; i++) { + if (__get_inst(&insn, pc + i, user)) { + pr_cont(" (Bad address in era)\n"); + break; + } + pr_cont("%c%08x%c", (i?' ':'<'), insn, (i?' ':'>')); + } + pr_cont("\n"); +} + +static void __show_regs(const struct pt_regs *regs) +{ + const int field = 2 * sizeof(unsigned long); + unsigned int excsubcode; + unsigned int exccode; + int i; + + show_regs_print_info(KERN_DEFAULT); + + /* + * Saved main processor registers + */ + for (i = 0; i < 32; ) { + if ((i % 4) == 0) + printk("$%2d :", i); + pr_cont(" %0*lx", field, regs->regs[i]); + + i++; + if ((i % 4) == 0) + pr_cont("\n"); + } + + /* + * Saved csr registers + */ + printk("era : %0*lx %pS\n", field, regs->csr_era, + (void *) regs->csr_era); + printk("ra : %0*lx %pS\n", field, regs->regs[1], + (void *) regs->regs[1]); + + printk("CSR crmd: %08lx ", regs->csr_crmd); + printk("CSR prmd: %08lx ", regs->csr_prmd); + printk("CSR euen: %08lx ", regs->csr_euen); + printk("CSR ecfg: %08lx ", regs->csr_ecfg); + printk("CSR estat: %08lx ", regs->csr_estat); + + pr_cont("\n"); + + exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT; + printk("ExcCode : %x (SubCode %x)\n", exccode, excsubcode); + + if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE) + printk("BadVA : %0*lx\n", field, regs->csr_badvaddr); + + printk("PrId : %08x (%s)\n", read_cpucfg(LOONGARCH_CPUCFG0), + cpu_family_string()); +} + +void show_regs(struct pt_regs *regs) +{ + __show_regs((struct pt_regs *)regs); + dump_stack(); +} + +void show_registers(struct pt_regs *regs) +{ + __show_regs(regs); + print_modules(); + printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n", + current->comm, current->pid, current_thread_info(), current); + + show_stacktrace(current, regs, KERN_DEFAULT, user_mode(regs)); + show_code((void *)regs->csr_era, user_mode(regs)); + printk("\n"); +} + +static DEFINE_RAW_SPINLOCK(die_lock); + +void __noreturn die(const char *str, struct pt_regs *regs) +{ + static int die_counter; + int sig = SIGSEGV; + + oops_enter(); + + if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr, + SIGSEGV) == NOTIFY_STOP) + sig = 0; + + console_verbose(); + raw_spin_lock_irq(&die_lock); + bust_spinlocks(1); + + printk("%s[#%d]:\n", str, ++die_counter); + show_registers(regs); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + raw_spin_unlock_irq(&die_lock); + + oops_exit(); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) + panic("Fatal exception"); + + do_exit(sig); +} + +static inline void setup_vint_size(unsigned int size) +{ + unsigned int vs; + + vs = ilog2(size/4); + + if(vs == 0 || vs > 7) + panic("vint_size %d Not support yet", vs); + + csr_xchg32(vs<mm); + vma = find_vma(current->mm, (unsigned long)fault_addr); + if (vma && (vma->vm_start <= (unsigned long)fault_addr)) + si_code = SEGV_ACCERR; + else + si_code = SEGV_MAPERR; + mmap_read_unlock(current->mm); + force_sig_fault(SIGSEGV, si_code, fault_addr); + return 1; + + default: + force_sig(sig); + return 1; + } +} + +/* + * Delayed fp exceptions when doing a lazy ctx switch + */ +asmlinkage void noinstr do_fpe(struct pt_regs *regs, unsigned long fcsr) +{ + int sig; + void __user *fault_addr; + irqentry_state_t state = irqentry_enter(regs); + + if (notify_die(DIE_FP, "FP exception", regs, 0, current->thread.trap_nr, + SIGFPE) == NOTIFY_STOP) + goto out; + + /* Clear FCSR.Cause before enabling interrupts */ + write_fcsr(LOONGARCH_FCSR0, fcsr & ~mask_fcsr_x(fcsr)); + local_irq_enable(); + + die_if_kernel("FP exception in kernel code", regs); + + sig = SIGFPE; + fault_addr = (void __user *) regs->csr_era; + + /* Send a signal if required. */ + process_fpemu_return(sig, fault_addr, fcsr); + +out: + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_ade(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + die_if_kernel("Kernel ade access", regs); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)regs->csr_badvaddr); + + irqentry_exit(regs, state); +} + +/* sysctl hooks */ +int unaligned_enabled __read_mostly = 1; /* Enabled by default */ +int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */ + +asmlinkage void noinstr do_ale(struct pt_regs *regs) +{ + unsigned int *pc; + irqentry_state_t state = irqentry_enter(regs); + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); + + /* + * Did we catch a fault trying to load an instruction? + */ + if (regs->csr_badvaddr == regs->csr_era) + goto sigbus; + if (user_mode(regs) && !test_thread_flag(TIF_FIXADE)) + goto sigbus; + if (!unaligned_enabled) + goto sigbus; + if (!no_unaligned_warning) + show_registers(regs); + + pc = (unsigned int *)exception_era(regs); + + emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc); + + goto out; + +sigbus: + die_if_kernel("Kernel ale access", regs); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); + +out: + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_bp(struct pt_regs *regs) +{ + bool user = user_mode(regs); + unsigned int opcode, bcode; + unsigned long era = exception_era(regs); + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + current->thread.trap_nr = read_csr_excode(); + if (__get_inst(&opcode, (u32 *)era, user)) + goto out_sigsegv; + + bcode = (opcode & 0x7fff); + + /* + * notify the kprobe handlers, if instruction is likely to + * pertain to them. + */ + switch (bcode) { + case BRK_KPROBE_BP: + if (notify_die(DIE_BREAK, "Kprobe", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + case BRK_KPROBE_SSTEPBP: + if (notify_die(DIE_SSTEPBP, "Kprobe_SingleStep", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + case BRK_UPROBE_BP: + if (notify_die(DIE_UPROBE, "Uprobe", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + case BRK_UPROBE_XOLBP: + if (notify_die(DIE_UPROBE_XOL, "Uprobe_XOL", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + default: + if (notify_die(DIE_TRAP, "Break", regs, bcode, + current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) + goto out; + else + break; + } + + switch (bcode) { + case BRK_BUG: + die_if_kernel("Kernel bug detected", regs); + force_sig(SIGTRAP); + break; + case BRK_DIVZERO: + die_if_kernel("Break instruction in kernel code", regs); + force_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)regs->csr_era); + break; + case BRK_OVERFLOW: + die_if_kernel("Break instruction in kernel code", regs); + force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->csr_era); + break; + default: + die_if_kernel("Break instruction in kernel code", regs); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->csr_era); + break; + } + +out: + local_irq_disable(); + irqentry_exit(regs, state); + return; + +out_sigsegv: + force_sig(SIGSEGV); + goto out; +} + +asmlinkage void noinstr do_watch(struct pt_regs *regs) +{ + pr_warn("Hardware watch point handler not implemented!\n"); +} + +asmlinkage void noinstr do_ri(struct pt_regs *regs) +{ + int status = -1; + unsigned int opcode = 0; + unsigned int __user *era = (unsigned int __user *)exception_era(regs); + unsigned long old_era = regs->csr_era; + unsigned long old_ra = regs->regs[1]; + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + current->thread.trap_nr = read_csr_excode(); + + if (notify_die(DIE_RI, "RI Fault", regs, 0, current->thread.trap_nr, + SIGILL) == NOTIFY_STOP) + goto out; + + die_if_kernel("Reserved instruction in kernel code", regs); + + if (unlikely(compute_return_era(regs) < 0)) + goto out; + + if (unlikely(get_user(opcode, era) < 0)) { + status = SIGSEGV; + current->thread.error_code = 1; + } + + if (status < 0) + status = SIGILL; + + if (unlikely(status > 0)) { + regs->csr_era = old_era; /* Undo skip-over. */ + regs->regs[1] = old_ra; + force_sig(status); + } + +out: + local_irq_disable(); + irqentry_exit(regs, state); +} + +static void init_restore_fp(void) +{ + if (!used_math()) { + /* First time FP context user. */ + init_fpu(); + } else { + /* This task has formerly used the FP context */ + if (!is_fpu_owner()) + own_fpu_inatomic(1); + } + + BUG_ON(!is_fp_enabled()); +} + +asmlinkage void noinstr do_fpu(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + die_if_kernel("do_fpu invoked from kernel context!", regs); + + preempt_disable(); + init_restore_fp(); + preempt_enable(); + + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_lsx(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); + force_sig(SIGILL); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_lasx(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); + force_sig(SIGILL); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_lbt(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); + force_sig(SIGILL); + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void noinstr do_reserved(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + local_irq_enable(); + /* + * Game over - no way to handle this if it ever occurs. Most probably + * caused by a fatal error after another hardware/software error. + */ + pr_err("Caught reserved exception %u on pid:%d [%s] - should not happen\n", + read_csr_excode(), current->pid, current->comm); + die_if_kernel("do_reserved exception", regs); + force_sig(SIGUNUSED); + + local_irq_disable(); + irqentry_exit(regs, state); +} + +asmlinkage void cache_parity_error(void) +{ + /* For the moment, report the problem and hang. */ + printk("Cache error exception:\n"); + printk("csr_merrctl == %08x\n", csr_read32(LOONGARCH_CSR_MERRCTL)); + printk("csr_merrera == %016llx\n", csr_read64(LOONGARCH_CSR_MERRERA)); + panic("Can't handle the cache error!"); +} + +asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp) +{ + register int cpu; + register unsigned long stack; + irqentry_state_t state = irqentry_enter(regs); + + cpu = smp_processor_id(); + + if (on_irq_stack(cpu, sp)) + handle_arch_irq(regs); + else { + stack = per_cpu(irq_stack, cpu) + IRQ_STACK_START; + + /* Save task's sp on IRQ stack for unwinding */ + *(unsigned long *)stack = sp; + + __asm__ __volatile__( + "move $s0, $sp \n" /* Preserve sp */ + "move $sp, %[stk] \n" /* Switch stack */ + "move $a0, %[regs] \n" + "la $t0, handle_arch_irq \n" + "ld.d $t1, $t0, 0 \n" + "jirl $ra, $t1, 0 \n" + "move $sp, $s0 \n" /* Restore sp */ + : /* No outputs */ + : [stk] "r" (stack), [regs] "r" (regs) + : "$a0", "$a1", "$a2", "$a3", "$a4", "$a5", "$a6", "$a7", "$s0", + "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", + "memory"); + } + + irqentry_exit(regs, state); +} + +extern void tlb_init(void); +extern void cache_error_setup(void); + +unsigned long eentry; +EXPORT_SYMBOL_GPL(eentry); +unsigned long tlbrentry; +EXPORT_SYMBOL_GPL(tlbrentry); + +long exception_handlers[VECSIZE * 128 / sizeof(long)] __aligned(SZ_64K); + +static void configure_exception_vector(void) +{ + eentry = (unsigned long)exception_handlers; + tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; + + csr_write64(eentry, LOONGARCH_CSR_EENTRY); + csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); + csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); +} + +void per_cpu_trap_init(int cpu) +{ + unsigned int i; + + setup_vint_size(VECSIZE); + + configure_exception_vector(); + + if (!cpu_data[cpu].asid_cache) + cpu_data[cpu].asid_cache = asid_first_version(cpu); + + mmgrab(&init_mm); + current->active_mm = &init_mm; + BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); + + /* Initialise exception handlers */ + if (cpu == 0) + for (i = 0; i < 64; i++) + set_handler(i * VECSIZE, handle_reserved, VECSIZE); + + tlb_init(); + cpu_cache_init(); +} + +/* Install CPU exception handler */ +void set_handler(unsigned long offset, void *addr, unsigned long size) +{ + memcpy((void *)(eentry + offset), addr, size); + local_flush_icache_range(eentry + offset, eentry + offset + size); +} + +static const char panic_null_cerr[] = + "Trying to set NULL cache error exception handler\n"; + +/* + * Install uncached CPU exception handler. + * This is suitable only for the cache error exception which is the only + * exception handler that is being run uncached. + */ +void set_merr_handler(unsigned long offset, void *addr, unsigned long size) +{ + unsigned long uncached_eentry = TO_UNCACHE(__pa(eentry)); + + if (!addr) + panic(panic_null_cerr); + + memcpy((void *)(uncached_eentry + offset), addr, size); +} + +void __init trap_init(void) +{ + long i; + + /* Set interrupt vector handler */ + for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++) + set_handler(i * VECSIZE, handle_vint, VECSIZE); + + set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); + set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); + set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); + set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); + set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); + set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE); + set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE); + set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE); + set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE); + set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE); + set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE); + set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE); + + cache_error_setup(); + + local_flush_icache_range(eentry, eentry + 0x400); +} diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c new file mode 100644 index 000000000000..bdff825d29ef --- /dev/null +++ b/arch/loongarch/kernel/unaligned.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle unaligned accesses by emulation. + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1996, 1998, 1999, 2002 by Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) 2014 Imagination Technologies Ltd. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "access-helper.h" + +#ifdef CONFIG_DEBUG_FS +static u32 unaligned_instructions_user; +static u32 unaligned_instructions_kernel; +#endif + +static inline unsigned long read_fpr(unsigned int idx) +{ +#define READ_FPR(idx, __value) \ + __asm__ __volatile__("movfr2gr.d %0, $f"#idx"\n\t" : "=r"(__value)); + + unsigned long __value; + + switch (idx) { + case 0: + READ_FPR(0, __value); + break; + case 1: + READ_FPR(1, __value); + break; + case 2: + READ_FPR(2, __value); + break; + case 3: + READ_FPR(3, __value); + break; + case 4: + READ_FPR(4, __value); + break; + case 5: + READ_FPR(5, __value); + break; + case 6: + READ_FPR(6, __value); + break; + case 7: + READ_FPR(7, __value); + break; + case 8: + READ_FPR(8, __value); + break; + case 9: + READ_FPR(9, __value); + break; + case 10: + READ_FPR(10, __value); + break; + case 11: + READ_FPR(11, __value); + break; + case 12: + READ_FPR(12, __value); + break; + case 13: + READ_FPR(13, __value); + break; + case 14: + READ_FPR(14, __value); + break; + case 15: + READ_FPR(15, __value); + break; + case 16: + READ_FPR(16, __value); + break; + case 17: + READ_FPR(17, __value); + break; + case 18: + READ_FPR(18, __value); + break; + case 19: + READ_FPR(19, __value); + break; + case 20: + READ_FPR(20, __value); + break; + case 21: + READ_FPR(21, __value); + break; + case 22: + READ_FPR(22, __value); + break; + case 23: + READ_FPR(23, __value); + break; + case 24: + READ_FPR(24, __value); + break; + case 25: + READ_FPR(25, __value); + break; + case 26: + READ_FPR(26, __value); + break; + case 27: + READ_FPR(27, __value); + break; + case 28: + READ_FPR(28, __value); + break; + case 29: + READ_FPR(29, __value); + break; + case 30: + READ_FPR(30, __value); + break; + case 31: + READ_FPR(31, __value); + break; + default: + panic("unexpected idx '%d'", idx); + } +#undef READ_FPR + return __value; +} + +static inline void write_fpr(unsigned int idx, unsigned long value) +{ +#define WRITE_FPR(idx, value) \ + __asm__ __volatile__("movgr2fr.d $f"#idx", %0\n\t" :: "r"(value)); + + switch (idx) { + case 0: + WRITE_FPR(0, value); + break; + case 1: + WRITE_FPR(1, value); + break; + case 2: + WRITE_FPR(2, value); + break; + case 3: + WRITE_FPR(3, value); + break; + case 4: + WRITE_FPR(4, value); + break; + case 5: + WRITE_FPR(5, value); + break; + case 6: + WRITE_FPR(6, value); + break; + case 7: + WRITE_FPR(7, value); + break; + case 8: + WRITE_FPR(8, value); + break; + case 9: + WRITE_FPR(9, value); + break; + case 10: + WRITE_FPR(10, value); + break; + case 11: + WRITE_FPR(11, value); + break; + case 12: + WRITE_FPR(12, value); + break; + case 13: + WRITE_FPR(13, value); + break; + case 14: + WRITE_FPR(14, value); + break; + case 15: + WRITE_FPR(15, value); + break; + case 16: + WRITE_FPR(16, value); + break; + case 17: + WRITE_FPR(17, value); + break; + case 18: + WRITE_FPR(18, value); + break; + case 19: + WRITE_FPR(19, value); + break; + case 20: + WRITE_FPR(20, value); + break; + case 21: + WRITE_FPR(21, value); + break; + case 22: + WRITE_FPR(22, value); + break; + case 23: + WRITE_FPR(23, value); + break; + case 24: + WRITE_FPR(24, value); + break; + case 25: + WRITE_FPR(25, value); + break; + case 26: + WRITE_FPR(26, value); + break; + case 27: + WRITE_FPR(27, value); + break; + case 28: + WRITE_FPR(28, value); + break; + case 29: + WRITE_FPR(29, value); + break; + case 30: + WRITE_FPR(30, value); + break; + case 31: + WRITE_FPR(31, value); + break; + default: + panic("unexpected idx '%d'", idx); + } +#undef WRITE_FPR +} + +void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc) +{ + bool fp = false; + bool sign, write; + bool user = user_mode(regs); + unsigned int res, size = 0; + unsigned long value = 0; + union loongarch_instruction insn; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); + + __get_inst(&insn.word, pc, user); + + switch (insn.reg2i12_format.opcode) { + case ldh_op: + size = 2; + sign = true; + write = false; + break; + case ldhu_op: + size = 2; + sign = false; + write = false; + break; + case sth_op: + size = 2; + sign = true; + write = true; + break; + case ldw_op: + size = 4; + sign = true; + write = false; + break; + case ldwu_op: + size = 4; + sign = false; + write = false; + break; + case stw_op: + size = 4; + sign = true; + write = true; + break; + case ldd_op: + size = 8; + sign = true; + write = false; + break; + case std_op: + size = 8; + sign = true; + write = true; + break; + case flds_op: + size = 4; + fp = true; + sign = true; + write = false; + break; + case fsts_op: + size = 4; + fp = true; + sign = true; + write = true; + break; + case fldd_op: + size = 8; + fp = true; + sign = true; + write = false; + break; + case fstd_op: + size = 8; + fp = true; + sign = true; + write = true; + break; + } + + switch (insn.reg2i14_format.opcode) { + case ldptrw_op: + size = 4; + sign = true; + write = false; + break; + case stptrw_op: + size = 4; + sign = true; + write = true; + break; + case ldptrd_op: + size = 8; + sign = true; + write = false; + break; + case stptrd_op: + size = 8; + sign = true; + write = true; + break; + } + + switch (insn.reg3_format.opcode) { + case ldxh_op: + size = 2; + sign = true; + write = false; + break; + case ldxhu_op: + size = 2; + sign = false; + write = false; + break; + case stxh_op: + size = 2; + sign = true; + write = true; + break; + case ldxw_op: + size = 4; + sign = true; + write = false; + break; + case ldxwu_op: + size = 4; + sign = false; + write = false; + break; + case stxw_op: + size = 4; + sign = true; + write = true; + break; + case ldxd_op: + size = 8; + sign = true; + write = false; + break; + case stxd_op: + size = 8; + sign = true; + write = true; + break; + case fldxs_op: + size = 4; + fp = true; + sign = true; + write = false; + break; + case fstxs_op: + size = 4; + fp = true; + sign = true; + write = true; + break; + case fldxd_op: + size = 8; + fp = true; + sign = true; + write = false; + break; + case fstxd_op: + size = 8; + fp = true; + sign = true; + write = true; + break; + } + + if (!size) + goto sigbus; + if (user && !access_ok(addr, size)) + goto sigbus; + + if (!write) { + res = unaligned_read(addr, &value, size, sign); + if (res) + goto fault; + + /* Rd is the same field in any formats */ + if (!fp) + regs->regs[insn.reg3_format.rd] = value; + else { + if (is_fpu_owner()) + write_fpr(insn.reg3_format.rd, value); + else + set_fpr64(¤t->thread.fpu.fpr[insn.reg3_format.rd], 0, value); + } + } else { + /* Rd is the same field in any formats */ + if (!fp) + value = regs->regs[insn.reg3_format.rd]; + else { + if (is_fpu_owner()) + value = read_fpr(insn.reg3_format.rd); + else + value = get_fpr64(¤t->thread.fpu.fpr[insn.reg3_format.rd], 0); + } + + res = unaligned_write(addr, value, size); + if (res) + goto fault; + } + +#ifdef CONFIG_DEBUG_FS + if (user) + unaligned_instructions_user++; + else + unaligned_instructions_kernel++; +#endif + + compute_return_era(regs); + + return; + +fault: + /* Did we have an exception handler installed? */ + if (fixup_exception(regs)) + return; + + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGSEGV); + + return; + +sigbus: + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGBUS); + + return; +} + +#ifdef CONFIG_DEBUG_FS +static int __init debugfs_unaligned(void) +{ + struct dentry *d; + + d = debugfs_create_dir("loongarch", NULL); + if (!d) + return -ENOMEM; + + debugfs_create_u32("unaligned_instructions_user", + S_IRUGO, d, &unaligned_instructions_user); + debugfs_create_u32("unaligned_instructions_kernel", + S_IRUGO, d, &unaligned_instructions_kernel); + + return 0; +} +arch_initcall(debugfs_unaligned); +#endif diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c new file mode 100644 index 000000000000..e47c3175bef5 --- /dev/null +++ b/arch/loongarch/kernel/vdso.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern char vdso_start[], vdso_end[]; + +/* Kernel-provided data used by the VDSO. */ +static union loongarch_vdso_data { + u8 page[PAGE_SIZE]; + struct vdso_data data[CS_BASES]; +} loongarch_vdso_data __page_aligned_data; +struct vdso_data *vdso_data = loongarch_vdso_data.data; +static struct page *vdso_pages[] = { NULL }; + +static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) +{ + current->mm->context.vdso = (void *)(new_vma->vm_start); + + return 0; +} + +struct loongarch_vdso_info vdso_info = { + .vdso = vdso_start, + .size = PAGE_SIZE, + .code_mapping = { + .name = "[vdso]", + .pages = vdso_pages, + .mremap = vdso_mremap, + }, + .data_mapping = { + .name = "[vvar]", + }, + .offset_sigreturn = vdso_offset_sigreturn, +}; + +static int __init init_vdso(void) +{ + unsigned long i, pfn; + + BUG_ON(!PAGE_ALIGNED(vdso_info.vdso)); + BUG_ON(!PAGE_ALIGNED(vdso_info.size)); + + pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso)); + for (i = 0; i < vdso_info.size / PAGE_SIZE; i++) + vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i); + + return 0; +} +subsys_initcall(init_vdso); + +static unsigned long vdso_base(void) +{ + unsigned long base = STACK_TOP; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + int ret; + unsigned long vvar_size, size, data_addr, vdso_addr; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct loongarch_vdso_info *info = current->thread.vdso; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + /* + * Determine total area size. This includes the VDSO data itself + * and the data page. + */ + vvar_size = PAGE_SIZE; + size = vvar_size + info->size; + + data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0); + if (IS_ERR_VALUE(data_addr)) { + ret = data_addr; + goto out; + } + vdso_addr = data_addr + PAGE_SIZE; + + vma = _install_special_mapping(mm, data_addr, vvar_size, + VM_READ | VM_MAYREAD, + &info->data_mapping); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; + } + + /* Map VDSO data page. */ + ret = remap_pfn_range(vma, data_addr, + virt_to_phys(vdso_data) >> PAGE_SHIFT, + PAGE_SIZE, PAGE_READONLY); + if (ret) + goto out; + + /* Map VDSO code page. */ + vma = _install_special_mapping(mm, vdso_addr, info->size, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &info->code_mapping); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; + } + + mm->context.vdso = (void *)vdso_addr; + ret = 0; + +out: + mmap_write_unlock(mm); + return ret; +} diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..1c88fe808286 --- /dev/null +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + +#define PAGE_SIZE _PAGE_SIZE +#define RUNTIME_DISCARD_EXIT + +/* + * Put .bss..swapper_pg_dir as the first thing in .bss. This will + * ensure that it has .bss alignment (64K). + */ +#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) + +#include + +OUTPUT_ARCH(loongarch) +ENTRY(kernel_entry) +PHDRS { + text PT_LOAD FLAGS(7); /* RWX */ + note PT_NOTE FLAGS(4); /* R__ */ +} + +jiffies = jiffies_64; + +SECTIONS +{ + . = VMLINUX_LOAD_ADDRESS; + /* Read-only */ + _text = .; /* Text and read-only data */ + .text : { + TEXT_TEXT + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + } :text = 0 + _etext = .; /* End of text section */ + + EXCEPTION_TABLE(16) + + _sdata = .; /* Start of data section */ + RO_DATA(4096) + RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) + + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : { + *(.sdata) + } + _edata = .; /* End of data section */ + + /* Will be freed after init */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ + __init_begin = .; + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + + . = ALIGN(4); + + /* .exit.text is discarded at runtime, not link time, to deal with + * references from .rodata + */ + .exit.text : { + EXIT_TEXT + } + .exit.data : { + EXIT_DATA + } + + /* + * Align to 64K in attempt to eliminate holes before the + * .bss..swapper_pg_dir section at the start of .bss. This + * also satisfies PAGE_SIZE alignment as the largest page size + * allowed is 64K. + */ + . = ALIGN(0x10000); + __init_end = .; + /* freed after init ends here */ + + /* + * Force .bss to 64K alignment so that .bss..swapper_pg_dir + * gets that alignment. .sbss should be empty, so there will be + * no holes after __init_end. */ + BSS_SECTION(0, 0x10000, 8) + + _end = . ; + + STABS_DEBUG + DWARF_DEBUG + + /* These must appear regardless of . */ + .gptab.sdata : { + *(.gptab.data) + *(.gptab.sdata) + } + .gptab.sbss : { + *(.gptab.bss) + *(.gptab.sbss) + } + + /* Sections to be discarded */ + DISCARDS + /DISCARD/ : { + *(.gnu.attributes) + *(.options) + *(.eh_frame) + } +} diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile new file mode 100644 index 000000000000..269f16f6d169 --- /dev/null +++ b/arch/loongarch/lib/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for LoongArch-specific library files.. +# + +lib-y += delay.o memset.o memcpy.o memmove.o \ + clear_user.o copy_user.o unaligned.o dump_tlb.o diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S new file mode 100644 index 000000000000..875a7e634220 --- /dev/null +++ b/arch/loongarch/lib/clear_user.S @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +.macro fixup_ex from, to, offset, fix +.if \fix + .section .fixup, "ax" +\to: addi.d a0, a1, \offset + jr ra + .previous +.endif + .section __ex_table, "a" + PTR \from\()b, \to\()b + .previous +.endm + +/* + * unsigned long __clear_user(void *addr, size_t size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user) + beqz a1, 2f + +1: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 1b + +2: move a0, a1 + jr ra + + fixup_ex 1, 3, 0, 1 +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S new file mode 100644 index 000000000000..b8fddc2d6384 --- /dev/null +++ b/arch/loongarch/lib/copy_user.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +.macro fixup_ex from, to, offset, fix +.if \fix + .section .fixup, "ax" +\to: addi.d a0, a2, \offset + jr ra + .previous +.endif + .section __ex_table, "a" + PTR \from\()b, \to\()b + .previous +.endm + +/* + * unsigned long __copy_user(void *to, const void *from, size_t n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user) + beqz a2, 3f + +1: ld.b t0, a1, 0 +2: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +3: move a0, a2 + jr ra + + fixup_ex 1, 4, 0, 1 + fixup_ex 2, 4, 0, 0 +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c new file mode 100644 index 000000000000..4a3c209bb335 --- /dev/null +++ b/arch/loongarch/lib/delay.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +#include +#include + +void __delay(unsigned long cycles) +{ + u64 t0 = get_cycles(); + + while ((unsigned long)(get_cycles() - t0) < cycles) + cpu_relax(); +} +EXPORT_SYMBOL(__delay); + +/* + * Division by multiplication: you don't have to worry about + * loss of precision. + * + * Use only for very small delays ( < 1 msec). Should probably use a + * lookup table, really, as the multiplications take much too long with + * short delays. This is a "reasonable" implementation, though (and the + * first constant multiplications gets optimized away if the delay is + * a constant) + */ + +void __udelay(unsigned long us) +{ + __delay((us * 0x000010c7ull * HZ * lpj_fine) >> 32); +} +EXPORT_SYMBOL(__udelay); + +void __ndelay(unsigned long ns) +{ + __delay((ns * 0x00000005ull * HZ * lpj_fine) >> 32); +} +EXPORT_SYMBOL(__ndelay); diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c new file mode 100644 index 000000000000..d6ef7d3c6709 --- /dev/null +++ b/arch/loongarch/lib/dump_tlb.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include +#include +#include + +void dump_tlb_regs(void) +{ + const int field = 2 * sizeof(unsigned long); + + pr_info("Index : %0x\n", read_csr_tlbidx()); + pr_info("PageSize : %0x\n", read_csr_pagesize()); + pr_info("EntryHi : %0*llx\n", field, read_csr_entryhi()); + pr_info("EntryLo0 : %0*llx\n", field, read_csr_entrylo0()); + pr_info("EntryLo1 : %0*llx\n", field, read_csr_entrylo1()); +} + +static void dump_tlb(int first, int last) +{ + unsigned long s_entryhi, entryhi, asid; + unsigned long long entrylo0, entrylo1, pa; + unsigned int index; + unsigned int s_index, s_asid; + unsigned int pagesize, c0, c1, i; + unsigned long asidmask = cpu_asid_mask(¤t_cpu_data); + int pwidth = 11; + int vwidth = 11; + int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4); + + s_entryhi = read_csr_entryhi(); + s_index = read_csr_tlbidx(); + s_asid = read_csr_asid(); + + for (i = first; i <= last; i++) { + write_csr_index(i); + tlb_read(); + pagesize = read_csr_pagesize(); + entryhi = read_csr_entryhi(); + entrylo0 = read_csr_entrylo0(); + entrylo1 = read_csr_entrylo1(); + index = read_csr_tlbidx(); + asid = read_csr_asid(); + + /* EHINV bit marks entire entry as invalid */ + if (index & CSR_TLBIDX_EHINV) + continue; + /* + * ASID takes effect in absence of G (global) bit. + */ + if (!((entrylo0 | entrylo1) & ENTRYLO_G) && + asid != s_asid) + continue; + + /* + * Only print entries in use + */ + printk("Index: %2d pgsize=%x ", i, (1 << pagesize)); + + c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; + c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; + + pr_cont("va=%0*lx asid=%0*lx", + vwidth, (entryhi & ~0x1fffUL), asidwidth, asid & asidmask); + + /* NR/NX are in awkward places, so mask them off separately */ + pa = entrylo0 & ~(ENTRYLO_NR | ENTRYLO_NX); + pa = pa & PAGE_MASK; + pr_cont("\n\t["); + pr_cont("ri=%d xi=%d ", + (entrylo0 & ENTRYLO_NR) ? 1 : 0, + (entrylo0 & ENTRYLO_NX) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld] [", + pwidth, pa, c0, + (entrylo0 & ENTRYLO_D) ? 1 : 0, + (entrylo0 & ENTRYLO_V) ? 1 : 0, + (entrylo0 & ENTRYLO_G) ? 1 : 0, + (entrylo0 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT); + /* NR/NX are in awkward places, so mask them off separately */ + pa = entrylo1 & ~(ENTRYLO_NR | ENTRYLO_NX); + pa = pa & PAGE_MASK; + pr_cont("ri=%d xi=%d ", + (entrylo1 & ENTRYLO_NR) ? 1 : 0, + (entrylo1 & ENTRYLO_NX) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n", + pwidth, pa, c1, + (entrylo1 & ENTRYLO_D) ? 1 : 0, + (entrylo1 & ENTRYLO_V) ? 1 : 0, + (entrylo1 & ENTRYLO_G) ? 1 : 0, + (entrylo1 & ENTRYLO_PLV) >> ENTRYLO_PLV_SHIFT); + } + printk("\n"); + + write_csr_entryhi(s_entryhi); + write_csr_tlbidx(s_index); + write_csr_asid(s_asid); +} + +void dump_tlb_all(void) +{ + dump_tlb(0, current_cpu_data.tlbsize - 1); +} diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S new file mode 100644 index 000000000000..ddb3f710012b --- /dev/null +++ b/arch/loongarch/lib/memcpy.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +/* + * void *memcpy(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(memcpy) + move a3, a0 + beqz a2, 2f + +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S new file mode 100644 index 000000000000..bdc2e877aca9 --- /dev/null +++ b/arch/loongarch/lib/memmove.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +/* + * void *rmemcpy(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(rmemcpy) + move a3, a0 + beqz a2, 2f + + add.d a0, a0, a2 + add.d a1, a1, a2 + +1: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(rmemcpy) + +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 2f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + +1: b memcpy + +2: b rmemcpy +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S new file mode 100644 index 000000000000..4efef40815d1 --- /dev/null +++ b/arch/loongarch/lib/memset.S @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Loongson Technology Co., Ltd. + */ + +#include +#include +#include + +/* + * void *memset(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(memset) + move a3, a0 + beqz a2, 2f + +1: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 1b + +2: move a0, a3 + jr ra +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S new file mode 100644 index 000000000000..b42c4a2edfad --- /dev/null +++ b/arch/loongarch/lib/unaligned.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include + +#include +#include +#include +#include +#include + +.macro fixup_ex from, to, fix +.if \fix + .section .fixup, "ax" +\to: li.w a0, -EFAULT + jr ra + .previous +.endif + .section __ex_table, "a" + PTR \from\()b, \to\()b + .previous +.endm + +/* + * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign) + * + * a0: addr + * a1: value + * a2: n + * a3: sign + */ +SYM_FUNC_START(unaligned_read) + beqz a2, 5f + + li.w t2, 0 + addi.d t0, a2, -1 + slli.d t1, t0, 3 + add.d a0, a0, t0 + + beqz a3, 2f +1: ld.b t3, a0, 0 + b 3f + +2: ld.bu t3, a0, 0 +3: sll.d t3, t3, t1 + or t2, t2, t3 + addi.d t1, t1, -8 + addi.d a0, a0, -1 + addi.d a2, a2, -1 + bgtz a2, 2b +4: st.d t2, a1, 0 + + move a0, a2 + jr ra + +5: li.w a0, -EFAULT + jr ra + + fixup_ex 1, 6, 1 + fixup_ex 2, 6, 0 + fixup_ex 4, 6, 0 +SYM_FUNC_END(unaligned_read) + +/* + * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n) + * + * a0: addr + * a1: value + * a2: n + */ +SYM_FUNC_START(unaligned_write) + beqz a2, 3f + + li.w t0, 0 +1: srl.d t1, a1, t0 +2: st.b t1, a0, 0 + addi.d t0, t0, 8 + addi.d a2, a2, -1 + addi.d a0, a0, 1 + bgtz a2, 1b + + move a0, a2 + jr ra + +3: li.w a0, -EFAULT + jr ra + + fixup_ex 2, 4, 1 +SYM_FUNC_END(unaligned_write) diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile new file mode 100644 index 000000000000..d89d3c8fe382 --- /dev/null +++ b/arch/loongarch/loongson64/Makefile @@ -0,0 +1,5 @@ +# +# All Loongson based systems +# + +obj-y += setup.o init.o env.o reset.o irq.o mem.o rtc.o diff --git a/arch/loongarch/loongson64/Platform b/arch/loongarch/loongson64/Platform new file mode 100644 index 000000000000..15a033be4699 --- /dev/null +++ b/arch/loongarch/loongson64/Platform @@ -0,0 +1,13 @@ +# +# Loongson Processors' Support +# + +cflags-$(CONFIG_CPU_LOONGSON64) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + +# +# Loongson Machines Support +# + +platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ +cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/loongarch/include/asm/mach-loongson64 +load-$(CONFIG_MACH_LOONGSON64) += 0x9000000000200000 diff --git a/arch/loongarch/loongson64/env.c b/arch/loongarch/loongson64/env.c new file mode 100644 index 000000000000..cd1629c6ed4e --- /dev/null +++ b/arch/loongarch/loongson64/env.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include + +u64 efi_system_table; +struct loongson_system_configuration loongson_sysconf; +EXPORT_SYMBOL(loongson_sysconf); + +u64 loongson_chipcfg[MAX_PACKAGES]; +u64 loongson_chiptemp[MAX_PACKAGES]; +u64 loongson_freqctrl[MAX_PACKAGES]; +unsigned long long smp_group[MAX_PACKAGES]; + +static void __init register_addrs_set(u64 *registers, const u64 addr, int num) +{ + u64 i; + + for (i = 0; i < num; i++) { + *registers = (i << 44) | addr; + registers++; + } +} + +void __init init_environ(void) +{ + int efi_boot = fw_arg0; + struct efi_memory_map_data data; + void *fdt_ptr = early_memremap_ro(fw_arg1, SZ_64K); + + if (efi_boot) + set_bit(EFI_BOOT, &efi.flags); + else + clear_bit(EFI_BOOT, &efi.flags); + + early_init_dt_scan(fdt_ptr); + early_init_fdt_reserve_self(); + efi_system_table = efi_get_fdt_params(&data); + + efi_memmap_init_early(&data); + memblock_reserve(data.phys_map & PAGE_MASK, + PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK))); + + register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16); + register_addrs_set(loongson_chipcfg, TO_UNCACHE(0x1fe00180), 16); + register_addrs_set(loongson_chiptemp, TO_UNCACHE(0x1fe0019c), 16); + register_addrs_set(loongson_freqctrl, TO_UNCACHE(0x1fe001d0), 16); +} + +static int __init init_cpu_fullname(void) +{ + int cpu; + + if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) { + for (cpu = 0; cpu < NR_CPUS; cpu++) + __cpu_full_name[cpu] = loongson_sysconf.cpuname; + } + return 0; +} +arch_initcall(init_cpu_fullname); + +static ssize_t boardinfo_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, + "BIOS Information\n" + "Vendor\t\t\t: %s\n" + "Version\t\t\t: %s\n" + "ROM Size\t\t: %d KB\n" + "Release Date\t\t: %s\n\n" + "Board Information\n" + "Manufacturer\t\t: %s\n" + "Board Name\t\t: %s\n" + "Family\t\t\t: LOONGSON64\n\n", + b_info.bios_vendor, b_info.bios_version, + b_info.bios_size, b_info.bios_release_date, + b_info.board_vendor, b_info.board_name); +} + +static struct kobj_attribute boardinfo_attr = __ATTR(boardinfo, 0444, + boardinfo_show, NULL); + +static int __init boardinfo_init(void) +{ + struct kobject *loongson_kobj; + + loongson_kobj = kobject_create_and_add("loongson", firmware_kobj); + + return sysfs_create_file(loongson_kobj, &boardinfo_attr.attr); +} +late_initcall(boardinfo_init); diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c new file mode 100644 index 000000000000..42f0988a1e24 --- /dev/null +++ b/arch/loongarch/loongson64/init.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define SMBIOS_BIOSSIZE_OFFSET 0x09 +#define SMBIOS_BIOSEXTERN_OFFSET 0x13 +#define SMBIOS_FREQLOW_OFFSET 0x16 +#define SMBIOS_FREQHIGH_OFFSET 0x17 +#define SMBIOS_FREQLOW_MASK 0xFF +#define SMBIOS_CORE_PACKAGE_OFFSET 0x23 +#define LOONGSON_EFI_ENABLE (1 << 3) + +struct loongson_board_info b_info; +static const char dmi_empty_string[] = " "; + +static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) +{ + const u8 *bp = ((u8 *) dm) + dm->length; + + if (s) { + s--; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; + s--; + } + + if (*bp != 0) { + size_t len = strlen(bp)+1; + size_t cmp_len = len > 8 ? 8 : len; + + if (!memcmp(bp, dmi_empty_string, cmp_len)) + return dmi_empty_string; + + return bp; + } + } + + return ""; + +} + +static void __init parse_cpu_table(const struct dmi_header *dm) +{ + long freq_temp = 0; + char *dmi_data = (char *)dm; + + freq_temp = ((*(dmi_data + SMBIOS_FREQHIGH_OFFSET) << 8) + \ + ((*(dmi_data + SMBIOS_FREQLOW_OFFSET)) & SMBIOS_FREQLOW_MASK)); + cpu_clock_freq = freq_temp * 1000000; + + loongson_sysconf.cpuname = (void *)dmi_string_parse(dm, dmi_data[16]); + loongson_sysconf.cores_per_package = *(dmi_data + SMBIOS_CORE_PACKAGE_OFFSET); + + pr_info("CpuClock = %llu\n", cpu_clock_freq); + +} + +static void __init parse_bios_table(const struct dmi_header *dm) +{ + int bios_extern; + char *dmi_data = (char *)dm; + + bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET); + b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET); + + if (bios_extern & LOONGSON_EFI_ENABLE) + set_bit(EFI_BOOT, &efi.flags); + else + clear_bit(EFI_BOOT, &efi.flags); +} + +static void __init find_tokens(const struct dmi_header *dm, void *dummy) +{ + switch (dm->type) { + case 0x0: /* Extern BIOS */ + parse_bios_table(dm); + break; + case 0x4: /* Calling interface */ + parse_cpu_table(dm); + break; + } +} +static void __init smbios_parse(void) +{ + b_info.bios_vendor = (void *)dmi_get_system_info(DMI_BIOS_VENDOR); + b_info.bios_version = (void *)dmi_get_system_info(DMI_BIOS_VERSION); + b_info.bios_release_date = (void *)dmi_get_system_info(DMI_BIOS_DATE); + b_info.board_vendor = (void *)dmi_get_system_info(DMI_BOARD_VENDOR); + b_info.board_name = (void *)dmi_get_system_info(DMI_BOARD_NAME); + dmi_walk(find_tokens, NULL); +} + +void __init early_init(void) +{ + init_environ(); + memblock_init(); +} + +void __init platform_init(void) +{ + efi_init(); +#ifdef CONFIG_ACPI_TABLE_UPGRADE + acpi_table_upgrade(); +#endif +#ifdef CONFIG_ACPI + acpi_gbl_use_default_register_widths = false; + acpi_boot_table_init(); + acpi_boot_init(); +#endif + + dmi_setup(); + smbios_parse(); + pr_info("The BIOS Version: %s\n",b_info.bios_version); + + efi_runtime_init(); +} diff --git a/arch/loongarch/loongson64/irq.c b/arch/loongarch/loongson64/irq.c new file mode 100644 index 000000000000..981f6447a7f9 --- /dev/null +++ b/arch/loongarch/loongson64/irq.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct acpi_madt_lio_pic *acpi_liointc; +struct acpi_madt_eio_pic *acpi_eiointc[MAX_IO_PICS]; + +struct acpi_madt_ht_pic *acpi_htintc; +struct acpi_madt_lpc_pic *acpi_pchlpc; +struct acpi_madt_msi_pic *acpi_pchmsi[MAX_IO_PICS]; +struct acpi_madt_bio_pic *acpi_pchpic[MAX_IO_PICS]; + +struct irq_domain *cpu_domain; +struct irq_domain *liointc_domain; +struct irq_domain *pch_lpc_domain; +struct irq_domain *pch_msi_domain[MAX_IO_PICS]; +struct irq_domain *pch_pic_domain[MAX_IO_PICS]; + +int find_pch_pic(u32 gsi) +{ + int i, start, end; + + /* Find the PCH_PIC that manages this GSI. */ + for(i = 0; i < loongson_sysconf.nr_io_pics; i++) { + struct acpi_madt_bio_pic *irq_cfg = acpi_pchpic[i]; + + start = irq_cfg->gsi_base; + end = irq_cfg->gsi_base + irq_cfg->size; + if (gsi >= start && gsi < end) + return i; + } + + printk(KERN_ERR "ERROR: Unable to locate PCH_PIC for GSI %d\n", gsi); + return -1; +} + +void __init setup_IRQ(void) +{ + int i; + struct irq_domain *parent_domain; + + if (!acpi_eiointc[0]) + cpu_data[0].options &= ~LOONGARCH_CPU_EXTIOI; + + cpu_domain = loongarch_cpu_irq_init(); + liointc_domain = liointc_acpi_init(cpu_domain, acpi_liointc); + + if (cpu_has_extioi) { + pr_info("Using EIOINTC interrupt mode\n"); + for(i = 0; i < loongson_sysconf.nr_io_pics; i++) { + parent_domain = eiointc_acpi_init(cpu_domain, acpi_eiointc[i]); + pch_pic_domain[i] = pch_pic_acpi_init(parent_domain, acpi_pchpic[i]); + pch_msi_domain[i] = pch_msi_acpi_init(parent_domain, acpi_pchmsi[i]); + } + } else { + pr_info("Using HTVECINTC interrupt mode\n"); + parent_domain = htvec_acpi_init(liointc_domain, acpi_htintc); + pch_pic_domain[0] = pch_pic_acpi_init(parent_domain, acpi_pchpic[0]); + pch_msi_domain[0] = pch_msi_acpi_init(parent_domain, acpi_pchmsi[0]); + } + + irq_set_default_host(pch_pic_domain[0]); + pch_lpc_domain = pch_lpc_acpi_init(pch_pic_domain[0], acpi_pchlpc); +} + +void __init arch_init_irq(void) +{ + clear_csr_ecfg(ECFG0_IM); + clear_csr_estat(ESTATF_IP); + + setup_IRQ(); + + set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); +} diff --git a/arch/loongarch/loongson64/mem.c b/arch/loongarch/loongson64/mem.c new file mode 100644 index 000000000000..c3be7db59000 --- /dev/null +++ b/arch/loongarch/loongson64/mem.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include + +#include +#include + +#include + +void __init memblock_init(void) +{ + u32 mem_type; + u64 mem_start, mem_end, mem_size; + efi_memory_desc_t *md; + + /* Parse memory information */ + for_each_efi_memory_desc(md) { + mem_type = md->type; + mem_start = md->phys_addr; + mem_size = md->num_pages << EFI_PAGE_SHIFT; + mem_end = mem_start + mem_size; + + switch (mem_type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_PERSISTENT_MEMORY: + case EFI_CONVENTIONAL_MEMORY: + memblock_add(mem_start, mem_size); + if (max_low_pfn < (mem_end >> PAGE_SHIFT)) + max_low_pfn = mem_end >> PAGE_SHIFT; + break; + case EFI_PAL_CODE: + case EFI_UNUSABLE_MEMORY: + case EFI_ACPI_RECLAIM_MEMORY: + memblock_add(mem_start, mem_size); + fallthrough; + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + memblock_reserve(mem_start, mem_size); + break; + } + } + + memblock_set_current_limit(PFN_PHYS(max_low_pfn)); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); + + /* Reserve the first 2MB */ + memblock_reserve(PHYS_OFFSET, 0x200000); + + /* Reserve the kernel text/data/bss */ + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); + + /* Reserve the initrd */ + reserve_initrd_mem(); +} diff --git a/arch/loongarch/loongson64/reset.c b/arch/loongarch/loongson64/reset.c new file mode 100644 index 000000000000..8c4085f81618 --- /dev/null +++ b/arch/loongarch/loongson64/reset.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen, chenhuacai@loongson.cn + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void loongson_restart(void) +{ +#ifdef CONFIG_EFI + if (efi_capsule_pending(NULL)) + efi_reboot(REBOOT_WARM, NULL); + else + efi_reboot(REBOOT_COLD, NULL); +#endif + if (!acpi_disabled) + acpi_reboot(); + + while (1) { + __arch_cpu_idle(); + } +} + +static void loongson_poweroff(void) +{ +#ifdef CONFIG_EFI + efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL); +#endif + while (1) { + __arch_cpu_idle(); + } +} + +static int __init loongarch_reboot_setup(void) +{ + pm_restart = loongson_restart; + pm_power_off = loongson_poweroff; + + return 0; +} + +arch_initcall(loongarch_reboot_setup); diff --git a/arch/loongarch/loongson64/rtc.c b/arch/loongarch/loongson64/rtc.c new file mode 100644 index 000000000000..2472f3a70f52 --- /dev/null +++ b/arch/loongarch/loongson64/rtc.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include + +#define RTC_TOYREAD0 0x2C +#define RTC_YEAR 0x30 + +unsigned long loongson_get_rtc_time(void) +{ + unsigned int year, mon, day, hour, min, sec; + unsigned int value; + + value = ls7a_readl(LS7A_RTC_REG_BASE + RTC_TOYREAD0); + sec = (value >> 4) & 0x3f; + min = (value >> 10) & 0x3f; + hour = (value >> 16) & 0x1f; + day = (value >> 21) & 0x1f; + mon = (value >> 26) & 0x3f; + year = ls7a_readl(LS7A_RTC_REG_BASE + RTC_YEAR); + + year = 1900 + year; + + return mktime64(year, mon, day, hour, min, sec); +} + +void read_persistent_clock64(struct timespec64 *ts) +{ + ts->tv_sec = loongson_get_rtc_time(); + ts->tv_nsec = 0; +} diff --git a/arch/loongarch/loongson64/setup.c b/arch/loongarch/loongson64/setup.c new file mode 100644 index 000000000000..701708769bc0 --- /dev/null +++ b/arch/loongarch/loongson64/setup.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include + +#ifdef CONFIG_VT +#include +#include +#include +#endif + +#include + +const char *get_system_type(void) +{ + return "generic-loongson-machine"; +} + +void __init plat_mem_setup(void) +{ +} + +static int __init register_gop_device(void) +{ + void *pd; + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + return 0; + pd = platform_device_register_data(NULL, "efi-framebuffer", 0, + &screen_info, sizeof(screen_info)); + return PTR_ERR_OR_ZERO(pd); +} +subsys_initcall(register_gop_device); diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile new file mode 100644 index 000000000000..a3a137709650 --- /dev/null +++ b/arch/loongarch/mm/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux/LoongArch-specific parts of the memory manager. +# + +obj-y += init.o cache.o tlb.o tlbex.o extable.o \ + fault.o ioremap.o maccess.o mmap.o pgtable.o page.o + +obj-$(CONFIG_64BIT) += pgtable-64.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c new file mode 100644 index 000000000000..9ba646c7a3f2 --- /dev/null +++ b/arch/loongarch/mm/cache.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * LoongArch maintains ICache/DCache coherency by hardware, + * we just need "ibar" to avoid instruction hazard here. + */ +void local_flush_icache_range(unsigned long start, unsigned long end) +{ + asm volatile ("\tibar 0\n"::); +} +EXPORT_SYMBOL(local_flush_icache_range); + +void cache_error_setup(void) +{ + extern char __weak except_vec_cex; + set_merr_handler(0x0, &except_vec_cex, 0x80); +} + +static unsigned long icache_size __read_mostly; +static unsigned long dcache_size __read_mostly; +static unsigned long vcache_size __read_mostly; +static unsigned long scache_size __read_mostly; + +static char *way_string[] = { NULL, "direct mapped", "2-way", + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", +}; + +static void probe_pcache(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + unsigned int lsize, sets, ways; + unsigned int config; + + config = read_cpucfg(LOONGARCH_CPUCFG17); + lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); + sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); + ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; + + c->icache.linesz = lsize; + c->icache.sets = sets; + c->icache.ways = ways; + icache_size = sets * ways * lsize; + c->icache.waysize = icache_size / c->icache.ways; + + config = read_cpucfg(LOONGARCH_CPUCFG18); + lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); + sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); + ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; + + c->dcache.linesz = lsize; + c->dcache.sets = sets; + c->dcache.ways = ways; + dcache_size = sets * ways * lsize; + c->dcache.waysize = dcache_size / c->dcache.ways; + + c->options |= LOONGARCH_CPU_PREFETCH; + + pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", + icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz); + + pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", + dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz); +} + +static void probe_vcache(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + unsigned int lsize, sets, ways; + unsigned int config; + + config = read_cpucfg(LOONGARCH_CPUCFG19); + lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); + sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); + ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; + + c->vcache.linesz = lsize; + c->vcache.sets = sets; + c->vcache.ways = ways; + vcache_size = lsize * sets * ways; + c->vcache.waysize = vcache_size / c->vcache.ways; + + pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", + vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); +} + +static void probe_scache(void) +{ + struct cpuinfo_loongarch *c = ¤t_cpu_data; + unsigned int lsize, sets, ways; + unsigned int config; + + config = read_cpucfg(LOONGARCH_CPUCFG20); + lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); + sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); + ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; + + c->scache.linesz = lsize; + c->scache.sets = sets; + c->scache.ways = ways; + /* 4 cores. scaches are shared */ + scache_size = lsize * sets * ways; + c->scache.waysize = scache_size / c->scache.ways; + + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); +} + +void cpu_cache_init(void) +{ + probe_pcache(); + probe_vcache(); + probe_scache(); + + shm_align_mask = PAGE_SIZE - 1; +} diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c new file mode 100644 index 000000000000..f31aabb4b9c6 --- /dev/null +++ b/arch/loongarch/mm/extable.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(exception_era(regs)); + if (fixup) { + regs->csr_era = fixup->fixup; + + return 1; + } + + return 0; +} diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c new file mode 100644 index 000000000000..605579b19a00 --- /dev/null +++ b/arch/loongarch/mm/fault.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + * + * Derived from MIPS: + * Copyright (C) 1995 - 2000 by Ralf Baechle + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +int show_unhandled_signals = 1; + +static void __kprobes no_context(struct pt_regs *regs, unsigned long address) +{ + const int field = sizeof(unsigned long) * 2; + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + + pr_alert("CPU %d Unable to handle kernel paging request at " + "virtual address %0*lx, era == %0*lx, ra == %0*lx\n", + raw_smp_processor_id(), field, address, field, regs->csr_era, + field, regs->regs[1]); + die("Oops", regs); +} + +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address) +{ + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + if (!user_mode(regs)) { + no_context(regs, address); + return; + } + pagefault_out_of_memory(); +} + +static void __kprobes do_sigbus(struct pt_regs *regs, + unsigned long write, unsigned long address, int si_code) +{ + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, address); + return; + } + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + current->thread.csr_badvaddr = address; + current->thread.trap_nr = read_csr_excode(); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); +} + +static void __kprobes do_sigsegv(struct pt_regs *regs, + unsigned long write, unsigned long address, int si_code) +{ + const int field = sizeof(unsigned long) * 2; + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) { + no_context(regs, address); + return; + } + + /* User mode accesses just cause a SIGSEGV */ + current->thread.csr_badvaddr = address; + if (!write) + current->thread.error_code = 1; + else + current->thread.error_code = 2; + current->thread.trap_nr = read_csr_excode(); + + if (show_unhandled_signals && + unhandled_signal(current, SIGSEGV) && __ratelimit(&ratelimit_state)) { + pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", + current->comm, + write ? "write access to" : "read access from", + field, address); + pr_info("era = %0*lx in", field, + (unsigned long) regs->csr_era); + print_vma_addr(KERN_CONT " ", regs->csr_era); + pr_cont("\n"); + pr_info("ra = %0*lx in", field, + (unsigned long) regs->regs[1]); + print_vma_addr(KERN_CONT " ", regs->regs[1]); + pr_cont("\n"); + } + force_sig_fault(SIGSEGV, si_code, (void __user *)address); +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +static void __kprobes __do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address) +{ + int si_code = SEGV_MAPERR; + unsigned int flags = FAULT_FLAG_DEFAULT; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + struct vm_area_struct *vma = NULL; + vm_fault_t fault; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (address & __UA_LIMIT) { + if (!user_mode(regs)) + no_context(regs, address); + else + do_sigsegv(regs, write, address, si_code); + return; + } + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) { + do_sigsegv(regs, write, address, si_code); + return; + } + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); +retry: + mmap_read_lock(mm); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (!expand_stack(vma, address)) + goto good_area; +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + mmap_read_unlock(mm); + do_sigsegv(regs, write, address, si_code); + return; + +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + + if (write) { + flags |= FAULT_FLAG_WRITE; + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + if (!(vma->vm_flags & VM_READ) && address != exception_era(regs)) + goto bad_area; + if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs)) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(vma, address, flags, regs); + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + no_context(regs, address); + return; + } + + if (unlikely(fault & VM_FAULT_RETRY)) { + flags |= FAULT_FLAG_TRIED; + + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; + } + if (unlikely(fault & VM_FAULT_ERROR)) { + mmap_read_unlock(mm); + if (fault & VM_FAULT_OOM) { + do_out_of_memory(regs, address); + return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_sigsegv(regs, write, address, si_code); + return; + } else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + do_sigbus(regs, write, address, si_code); + return; + } + BUG(); + } + + mmap_read_unlock(mm); +} + +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address) +{ + irqentry_state_t state = irqentry_enter(regs); + + /* Enable interrupt if enabled in parent context */ + if (likely(regs->csr_prmd & CSR_PRMD_PIE)) + local_irq_enable(); + + __do_page_fault(regs, write, address); + + local_irq_disable(); + + irqentry_exit(regs, state); +} diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c new file mode 100644 index 000000000000..bfc6f4fd2f98 --- /dev/null +++ b/arch/loongarch/mm/hugetlbpage.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + p4d = p4d_offset(pgd, addr); + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +int pmd_huge(pmd_t pmd) +{ + return (pmd_val(pmd) & _PAGE_HUGE) != 0; +} + +int pud_huge(pud_t pud) +{ + return (pud_val(pud) & _PAGE_HUGE) != 0; +} + +uint64_t pmd_to_entrylo(unsigned long pmd_val) +{ + uint64_t val; + /* PMD as PTE. Must be huge page */ + if (!pmd_huge(__pmd(pmd_val))) + panic("%s", __func__); + + val = pmd_val ^ _PAGE_HUGE; + val |= ((val & _PAGE_HGLOBAL) >> + (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)); + + return val; +} diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c new file mode 100644 index 000000000000..438aba59ea78 --- /dev/null +++ b/arch/loongarch/mm/init.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * We have up to 8 empty zeroed pages so we can map one of the right colour + * when needed. Since page is never written to after the initialization we + * don't have to care about aliases on other CPUs. + */ +unsigned long empty_zero_page, zero_page_mask; +EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); + +void setup_zero_pages(void) +{ + unsigned int order, i; + struct page *page; + + order = 0; + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Oh boy, that early out of memory?"); + + page = virt_to_page((void *)empty_zero_page); + split_page(page, order); + for (i = 0; i < (1 << order); i++, page++) + mark_page_reserved(page); + + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *vfrom, *vto; + + vto = kmap_atomic(to); + vfrom = kmap_atomic(from); + copy_page(vto, vfrom); + kunmap_atomic(vfrom); + kunmap_atomic(vto); + /* Make sure this page is cleared on other CPU's too before using it */ + smp_wmb(); +} + +int __ref page_is_ram(unsigned long pfn) +{ + unsigned long addr = PFN_PHYS(pfn); + + return memblock_is_memory(addr) && !memblock_is_reserved(addr); +} + +void __init paging_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; +#endif +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; +#endif + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + + free_area_init(max_zone_pfns); +} + +void __init mem_init(void) +{ + max_mapnr = max_low_pfn; + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + + memblock_free_all(); + setup_zero_pages(); /* Setup zeroed pages. */ + mem_init_print_info(NULL); +} + +void __ref free_initmem(void) +{ + free_initmem_default(POISON_FREE_INITMEM); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + ret = __add_pages(nid, start_pfn, nr_pages, params); + + if (ret) + printk("%s: Problem encountered in __add_pages() as ret=%d\n", + __func__, ret); + + return ret; +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +void arch_remove_memory(int nid, u64 start, + u64 size, struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + + /* With altmap the first mapped page is offset from @start */ + if (altmap) + page += vmem_altmap_offset(altmap); + __remove_pages(start_pfn, nr_pages, altmap); +} +#endif +#endif + +/* + * Align swapper_pg_dir in to 64K, allows its address to be loaded + * with a single LUI instruction in the TLB handlers. If we used + * __aligned(64K), its size would get rounded up to the alignment + * size, and waste space. So we place it in its own section and align + * it in the linker script. + */ +pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); + +pgd_t invalid_pg_dir[_PTRS_PER_PGD] __page_aligned_bss; +#ifndef __PAGETABLE_PUD_FOLDED +pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; +#endif +#ifndef __PAGETABLE_PMD_FOLDED +pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; +EXPORT_SYMBOL_GPL(invalid_pmd_table); +#endif +pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c new file mode 100644 index 000000000000..ed732b96d0a5 --- /dev/null +++ b/arch/loongarch/mm/ioremap.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) +{ + return ((void __iomem *)TO_CACHE(phys_addr)); +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ + +} + +void *early_memremap_ro(resource_size_t phys_addr, unsigned long size) +{ + return early_memremap(phys_addr, size); +} + +void *early_memremap_prot(resource_size_t phys_addr, unsigned long size, + unsigned long prot_val) +{ + return early_memremap(phys_addr, size); +} diff --git a/arch/loongarch/mm/maccess.c b/arch/loongarch/mm/maccess.c new file mode 100644 index 000000000000..58173842c6be --- /dev/null +++ b/arch/loongarch/mm/maccess.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + /* highest bit set means kernel space */ + return (unsigned long)unsafe_src >> (BITS_PER_LONG - 1); +} diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c new file mode 100644 index 000000000000..744f06902daa --- /dev/null +++ b/arch/loongarch/mm/mmap.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ +EXPORT_SYMBOL(shm_align_mask); + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + shm_align_mask) & ~shm_align_mask) + \ + (((pgoff) << PAGE_SHIFT) & shm_align_mask)) + +enum mmap_allocation_direction {UP, DOWN}; + +static unsigned long arch_get_unmapped_area_common(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags, enum mmap_allocation_direction dir) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long addr = addr0; + int do_color_align; + struct vm_unmapped_area_info info; + + if (unlikely(len > TASK_SIZE)) + return -ENOMEM; + + if (flags & MAP_FIXED) { + /* Even MAP_FIXED mappings must reside within TASK_SIZE */ + if (TASK_SIZE - len < addr) + return -EINVAL; + + /* + * We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + return -EINVAL; + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.length = len; + info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + + if (dir == DOWN) { + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + addr = vm_unmapped_area(&info); + + if (!(addr & ~PAGE_MASK)) + return addr; + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + } + + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + return vm_unmapped_area(&info); +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, UP); +} + +/* + * There is no need to export this but sched.h declares the function as + * extern so making it static here results in an error. + */ +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, DOWN); +} + +int __virt_addr_valid(volatile void *kaddr) +{ + unsigned long vaddr = (unsigned long)kaddr; + + if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) + return 0; + + return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); +} +EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S new file mode 100644 index 000000000000..bf77d2815449 --- /dev/null +++ b/arch/loongarch/mm/page.S @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include + + .align 5 +SYM_FUNC_START(clear_page) + lu12i.w t0, 1 << (PAGE_SHIFT - 12) + add.d t0, t0, a0 +1: + st.d zero, a0, 0 + st.d zero, a0, 8 + st.d zero, a0, 16 + st.d zero, a0, 24 + st.d zero, a0, 32 + st.d zero, a0, 40 + st.d zero, a0, 48 + st.d zero, a0, 56 + addi.d a0, a0, 128 + st.d zero, a0, -64 + st.d zero, a0, -56 + st.d zero, a0, -48 + st.d zero, a0, -40 + st.d zero, a0, -32 + st.d zero, a0, -24 + st.d zero, a0, -16 + st.d zero, a0, -8 + bne t0, a0, 1b + + jirl $r0, ra, 0 +SYM_FUNC_END(clear_page) +EXPORT_SYMBOL(clear_page) + +.align 5 +SYM_FUNC_START(copy_page) + lu12i.w t8, 1 << (PAGE_SHIFT - 12) + add.d t8, t8, a0 +1: + ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + + st.d t0, a0, 0 + st.d t1, a0, 8 + ld.d t0, a1, 64 + ld.d t1, a1, 72 + st.d t2, a0, 16 + st.d t3, a0, 24 + ld.d t2, a1, 80 + ld.d t3, a1, 88 + st.d t4, a0, 32 + st.d t5, a0, 40 + ld.d t4, a1, 96 + ld.d t5, a1, 104 + st.d t6, a0, 48 + st.d t7, a0, 56 + ld.d t6, a1, 112 + ld.d t7, a1, 120 + addi.d a0, a0, 128 + addi.d a1, a1, 128 + + st.d t0, a0, -64 + st.d t1, a0, -56 + st.d t2, a0, -48 + st.d t3, a0, -40 + st.d t4, a0, -32 + st.d t5, a0, -24 + st.d t6, a0, -16 + st.d t7, a0, -8 + + bne t8, a0, 1b + jirl $r0, ra, 0 +SYM_FUNC_END(copy_page) +EXPORT_SYMBOL(copy_page) diff --git a/arch/loongarch/mm/pgtable-64.c b/arch/loongarch/mm/pgtable-64.c new file mode 100644 index 000000000000..ac112f2bf072 --- /dev/null +++ b/arch/loongarch/mm/pgtable-64.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +void pgd_init(unsigned long page) +{ + unsigned long *p, *end; + unsigned long entry; + +#if !defined(__PAGETABLE_PUD_FOLDED) + entry = (unsigned long)invalid_pud_table; +#elif !defined(__PAGETABLE_PMD_FOLDED) + entry = (unsigned long)invalid_pmd_table; +#else + entry = (unsigned long)invalid_pte_table; +#endif + + p = (unsigned long *) page; + end = p + PTRS_PER_PGD; + + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; + p += 8; + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); +} +EXPORT_SYMBOL_GPL(pgd_init); + +#ifndef __PAGETABLE_PMD_FOLDED +void pmd_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *) addr; + end = p + PTRS_PER_PMD; + + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +EXPORT_SYMBOL_GPL(pmd_init); +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +void pud_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *)addr; + end = p + PTRS_PER_PUD; + + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +#endif + +pmd_t mk_pmd(struct page *page, pgprot_t prot) +{ + pmd_t pmd; + + pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + + return pmd; +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; + flush_tlb_all(); +} + +void __init pagetable_init(void) +{ + /* Initialize the entire pgd. */ + pgd_init((unsigned long)swapper_pg_dir); + pgd_init((unsigned long)invalid_pg_dir); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table); +#endif +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); +#endif +} diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c new file mode 100644 index 000000000000..648d8d59cea6 --- /dev/null +++ b/arch/loongarch/mm/pgtable.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret, *init; + + ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER); + if (ret) { + init = pgd_offset(&init_mm, 0UL); + pgd_init((unsigned long)ret); + memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + + return ret; +} +EXPORT_SYMBOL_GPL(pgd_alloc); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c new file mode 100644 index 000000000000..12268fb4b55a --- /dev/null +++ b/arch/loongarch/mm/tlb.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +void local_flush_tlb_all(void) +{ + invtlb_all(INVTLB_CURRENT_ALL, 0, 0); +} +EXPORT_SYMBOL(local_flush_tlb_all); + +void local_flush_tlb_user(void) +{ + invtlb_all(INVTLB_CURRENT_GFALSE, 0, 0); +} +EXPORT_SYMBOL(local_flush_tlb_user); + +void local_flush_tlb_kernel(void) +{ + invtlb_all(INVTLB_CURRENT_GTRUE, 0, 0); +} +EXPORT_SYMBOL(local_flush_tlb_kernel); + +/* All entries common to a mm share an asid. To effectively flush + these entries, we just bump the asid. */ +void local_flush_tlb_mm(struct mm_struct *mm) +{ + int cpu; + + preempt_disable(); + + cpu = smp_processor_id(); + + if (asid_valid(mm, cpu)) { + drop_mmu_context(mm, cpu); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } + + preempt_enable(); +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (asid_valid(mm, cpu)) { + unsigned long size, flags; + + local_irq_save(flags); + start = round_down(start, PAGE_SIZE << 1); + end = round_up(end, PAGE_SIZE << 1); + size = (end - start) >> (PAGE_SHIFT + 1); + if (size <= (current_cpu_data.tlbsizestlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + + int asid = cpu_asid(cpu, mm); + while (start < end) { + invtlb(INVTLB_ADDR_GFALSE_AND_ASID, asid, start); + start += (PAGE_SIZE << 1); + } + } else { + drop_mmu_context(mm, cpu); + } + local_irq_restore(flags); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long size, flags; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= (current_cpu_data.tlbsizestlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + + while (start < end) { + invtlb_addr(INVTLB_ADDR_GTRUE_OR_ASID, 0, start); + start += (PAGE_SIZE << 1); + } + } else { + local_flush_tlb_kernel(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + int cpu = smp_processor_id(); + + if (asid_valid(vma->vm_mm, cpu)) { + int newpid; + + newpid = cpu_asid(cpu, vma->vm_mm); + page &= (PAGE_MASK << 1); + invtlb(INVTLB_ADDR_GFALSE_AND_ASID, newpid, page); + } else { + cpumask_clear_cpu(cpu, mm_cpumask(vma->vm_mm)); + } +} + +/* + * This one is only used for pages with the global bit set so we don't care + * much about the ASID. + */ +void local_flush_tlb_one(unsigned long page) +{ + page &= (PAGE_MASK << 1); + invtlb_addr(INVTLB_ADDR_GTRUE_OR_ASID, 0, page); +} + +static void __update_hugetlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ +#ifdef CONFIG_HUGETLB_PAGE + int idx; + unsigned long lo; + unsigned long flags; + + local_irq_save(flags); + + address &= (PAGE_MASK << 1); + write_csr_entryhi(address); + tlb_probe(); + idx = read_csr_tlbidx(); + write_csr_pagesize(PS_HUGE_SIZE); + lo = pmd_to_entrylo(pte_val(*ptep)); + write_csr_entrylo0(lo); + write_csr_entrylo1(lo + (HPAGE_SIZE >> 1)); + + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + write_csr_pagesize(PS_DEFAULT_SIZE); + + local_irq_restore(flags); +#endif +} + +void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + int idx; + unsigned long flags; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + if (pte_val(*ptep) & _PAGE_HUGE) { + __update_hugetlb(vma, address, ptep); + return; + } + + local_irq_save(flags); + + if ((unsigned long)ptep & sizeof(pte_t)) + ptep--; + + address &= (PAGE_MASK << 1); + write_csr_entryhi(address); + tlb_probe(); + idx = read_csr_tlbidx(); + write_csr_pagesize(PS_DEFAULT_SIZE); + write_csr_entrylo0(pte_val(*ptep++)); + write_csr_entrylo1(pte_val(*ptep)); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + + local_irq_restore(flags); +} + +static void setup_ptwalker(void) +{ + unsigned long pwctl0, pwctl1; + unsigned long pgd_i = 0, pgd_w = 0; + unsigned long pud_i = 0, pud_w = 0; + unsigned long pmd_i = 0, pmd_w = 0; + unsigned long pte_i = 0, pte_w = 0; + + pgd_i = PGDIR_SHIFT; + pgd_w = PAGE_SHIFT - 3; +#if CONFIG_PGTABLE_LEVELS > 3 + pud_i = PUD_SHIFT; + pud_w = PAGE_SHIFT - 3; +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + pmd_i = PMD_SHIFT; + pmd_w = PAGE_SHIFT - 3; +#endif + pte_i = PAGE_SHIFT; + pte_w = PAGE_SHIFT - 3; + + pwctl0 = pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15 | pud_i << 20 | pud_w << 25; + pwctl1 = pgd_i | pgd_w << 6; + + csr_write64(pwctl0, LOONGARCH_CSR_PWCTL0); + csr_write64(pwctl1, LOONGARCH_CSR_PWCTL1); + csr_write64((long)swapper_pg_dir, LOONGARCH_CSR_PGDH); + csr_write64((long)invalid_pg_dir, LOONGARCH_CSR_PGDL); + csr_write64((long)smp_processor_id(), LOONGARCH_CSR_TMID); +} + +static void output_pgtable_bits_defines(void) +{ +#define pr_define(fmt, ...) \ + pr_debug("#define " fmt, ##__VA_ARGS__) + + pr_debug("#include \n"); + pr_debug("#include \n"); + pr_debug("\n"); + + pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); + pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); + pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); + pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); + pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); + pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); + pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); + pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); + pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); + pr_debug("\n"); +} + +void setup_tlb_handler(void) +{ + static int run_once = 0; + + setup_ptwalker(); + output_pgtable_bits_defines(); + + /* The tlb handlers are generated only once */ + if (!run_once) { + memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); + local_flush_icache_range(tlbrentry, tlbrentry + 0x80); + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); + set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); + set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); + run_once++; + } +} + +void tlb_init(void) +{ + write_csr_pagesize(PS_DEFAULT_SIZE); + write_csr_stlbpgsize(PS_DEFAULT_SIZE); + write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE); + setup_tlb_handler(); + local_flush_tlb_all(); +} diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S new file mode 100644 index 000000000000..4ca1f1d54190 --- /dev/null +++ b/arch/loongarch/mm/tlbex.S @@ -0,0 +1,481 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_64BIT +#include +#endif + + .macro tlb_do_page_fault, write + SYM_FUNC_START(tlb_do_page_fault_\write) + SAVE_ALL + csrrd a2, LOONGARCH_CSR_BADV + move a0, sp + REG_S a2, sp, PT_BVADDR + li.w a1, \write + la.abs t0, do_page_fault + jirl ra, t0, 0 + RESTORE_ALL_AND_RET + SYM_FUNC_END(tlb_do_page_fault_\write) + .endm + + tlb_do_page_fault 0 + tlb_do_page_fault 1 + +SYM_FUNC_START(handle_tlb_protect) + BACKUP_T0T1 + SAVE_ALL + move a0, sp + move a1, zero + csrrd a2, LOONGARCH_CSR_BADV + REG_S a2, sp, PT_BVADDR + la.abs t0, do_page_fault + jirl ra, t0, 0 + RESTORE_ALL_AND_RET +SYM_FUNC_END(handle_tlb_protect) + +SYM_FUNC_START(handle_tlb_load) + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 + + /* + * The vmalloc handling is not in the hotpath. + */ + csrrd t0, LOONGARCH_CSR_BADV + blt t0, $r0, vmalloc_load + csrrd t1, LOONGARCH_CSR_PGDL + +vmalloc_done_load: + /* Get PGD offset in bytes */ + srli.d t0, t0, PGDIR_SHIFT + andi t0, t0, (PTRS_PER_PGD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#if CONFIG_PGTABLE_LEVELS > 3 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PUD_SHIFT + andi t0, t0, (PTRS_PER_PUD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PMD_SHIFT + andi t0, t0, (PTRS_PER_PMD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif + ld.d ra, t1, 0 + + /* + * For huge tlb entries, pmde doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + andi t0, ra, _PAGE_HUGE + bne t0, $r0, tlb_huge_update_load + + csrrd t0, LOONGARCH_CSR_BADV + srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) + andi t0, t0, (PTRS_PER_PTE - 1) + slli.d t0, t0, _PTE_T_LOG2 + add.d t1, ra, t0 + + ld.d t0, t1, 0 + tlbsrch + + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_load + + ori t0, t0, _PAGE_VALID + st.d t0, t1, 0 + ori t1, t1, 8 + xori t1, t1, 8 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbwr +leave_load: + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn +#ifdef CONFIG_64BIT +vmalloc_load: + la.abs t1, swapper_pg_dir + b vmalloc_done_load +#endif + + /* + * This is the entry point when build_tlbchange_handler_head + * spots a huge page. + */ +tlb_huge_update_load: + ld.d t0, t1, 0 + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_load + tlbsrch + + ori t0, t0, _PAGE_VALID + st.d t0, t1, 0 + addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) + addi.d ra, t1, 0 + csrxchg ra, t1, LOONGARCH_CSR_TLBIDX + tlbwr + + csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX + + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + /* Huge page: Move Global bit */ + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 + + addi.d ra, t0, 0 + csrwr t0, LOONGARCH_CSR_TLBELO0 + addi.d t0, ra, 0 + + /* Convert to entrylo1 */ + addi.d t1, $r0, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 + + /* Set huge page tlb entry size */ + addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + tlbfill + + addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + +nopage_tlb_load: + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_0 + jirl $r0, t0, 0 +SYM_FUNC_END(handle_tlb_load) + +SYM_FUNC_START(handle_tlb_store) + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 + + /* + * The vmalloc handling is not in the hotpath. + */ + csrrd t0, LOONGARCH_CSR_BADV + blt t0, $r0, vmalloc_store + csrrd t1, LOONGARCH_CSR_PGDL + +vmalloc_done_store: + /* Get PGD offset in bytes */ + srli.d t0, t0, PGDIR_SHIFT + andi t0, t0, (PTRS_PER_PGD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 + +#if CONFIG_PGTABLE_LEVELS > 3 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PUD_SHIFT + andi t0, t0, (PTRS_PER_PUD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PMD_SHIFT + andi t0, t0, (PTRS_PER_PMD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif + ld.d ra, t1, 0 + + /* + * For huge tlb entries, pmde doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + andi t0, ra, _PAGE_HUGE + bne t0, $r0, tlb_huge_update_store + + csrrd t0, LOONGARCH_CSR_BADV + srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) + andi t0, t0, (PTRS_PER_PTE - 1) + slli.d t0, t0, _PTE_T_LOG2 + add.d t1, ra, t0 + + ld.d t0, t1, 0 + tlbsrch + + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + bne ra, $r0, nopage_tlb_store + + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 + + ori t1, t1, 8 + xori t1, t1, 8 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbwr +leave_store: + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn +#ifdef CONFIG_64BIT +vmalloc_store: + la.abs t1, swapper_pg_dir + b vmalloc_done_store +#endif + + /* + * This is the entry point when build_tlbchange_handler_head + * spots a huge page. + */ +tlb_huge_update_store: + ld.d t0, t1, 0 + srli.d ra, t0, _PAGE_PRESENT_SHIFT + andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) + bne ra, $r0, nopage_tlb_store + + tlbsrch + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + + st.d t0, t1, 0 + addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) + addi.d ra, t1, 0 + csrxchg ra, t1, LOONGARCH_CSR_TLBIDX + tlbwr + + csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + /* Huge page: Move Global bit */ + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 + + addi.d ra, t0, 0 + csrwr t0, LOONGARCH_CSR_TLBELO0 + addi.d t0, ra, 0 + + /* Convert to entrylo1 */ + addi.d t1, $r0, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 + + /* Set huge page tlb entry size */ + addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + tlbfill + + /* Reset default page size */ + addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + +nopage_tlb_store: + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jirl $r0, t0, 0 +SYM_FUNC_END(handle_tlb_store) + +SYM_FUNC_START(handle_tlb_modify) + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 + + /* + * The vmalloc handling is not in the hotpath. + */ + csrrd t0, LOONGARCH_CSR_BADV + blt t0, $r0, vmalloc_modify + csrrd t1, LOONGARCH_CSR_PGDL + +vmalloc_done_modify: + /* Get PGD offset in bytes */ + srli.d t0, t0, PGDIR_SHIFT + andi t0, t0, (PTRS_PER_PGD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#if CONFIG_PGTABLE_LEVELS > 3 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PUD_SHIFT + andi t0, t0, (PTRS_PER_PUD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + csrrd t0, LOONGARCH_CSR_BADV + ld.d t1, t1, 0 + srli.d t0, t0, PMD_SHIFT + andi t0, t0, (PTRS_PER_PMD - 1) + slli.d t0, t0, 3 + add.d t1, t1, t0 +#endif + ld.d ra, t1, 0 + + /* + * For huge tlb entries, pmde doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + andi t0, ra, _PAGE_HUGE + bne t0, $r0, tlb_huge_update_modify + + csrrd t0, LOONGARCH_CSR_BADV + srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) + andi t0, t0, (PTRS_PER_PTE - 1) + slli.d t0, t0, _PTE_T_LOG2 + add.d t1, ra, t0 + + ld.d t0, t1, 0 + tlbsrch + + srli.d ra, t0, _PAGE_WRITE_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_modify + + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 + ori t1, t1, 8 + xori t1, t1, 8 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbwr +leave_modify: + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn +#ifdef CONFIG_64BIT +vmalloc_modify: + la.abs t1, swapper_pg_dir + b vmalloc_done_modify +#endif + + /* + * This is the entry point when + * build_tlbchange_handler_head spots a huge page. + */ +tlb_huge_update_modify: + ld.d t0, t1, 0 + + srli.d ra, t0, _PAGE_WRITE_SHIFT + andi ra, ra, 1 + beq ra, $r0, nopage_tlb_modify + + tlbsrch + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + + st.d t0, t1, 0 + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + /* Huge page: Move Global bit */ + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 + + addi.d ra, t0, 0 + csrwr t0, LOONGARCH_CSR_TLBELO0 + addi.d t0, ra, 0 + + /* Convert to entrylo1 */ + addi.d t1, $r0, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 + + /* Set huge page tlb entry size */ + addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + tlbwr + + /* Reset default page size */ + addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + +nopage_tlb_modify: + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jirl $r0, t0, 0 +SYM_FUNC_END(handle_tlb_modify) + +SYM_FUNC_START(handle_tlb_refill) + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 +#if CONFIG_PGTABLE_LEVELS > 3 + lddir t0, t0, 2 +#endif +#if CONFIG_PGTABLE_LEVELS > 2 + lddir t0, t0, 1 +#endif + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn +SYM_FUNC_END(handle_tlb_refill) diff --git a/arch/loongarch/pci/Makefile b/arch/loongarch/pci/Makefile new file mode 100644 index 000000000000..8101ef3df71c --- /dev/null +++ b/arch/loongarch/pci/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the PCI specific kernel interface routines under Linux. +# + +obj-y += pci.o +obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c new file mode 100644 index 000000000000..f692017fec46 --- /dev/null +++ b/arch/loongarch/pci/acpi.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct pci_root_info { + struct acpi_pci_root_info common; + struct pci_config_window *cfg; +}; + +void pcibios_add_bus(struct pci_bus *bus) +{ + acpi_pci_add_bus(bus); +} + +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_config_window *cfg = bridge->bus->sysdata; + struct acpi_device *adev = to_acpi_device(cfg->parent); + + ACPI_COMPANION_SET(&bridge->dev, adev); + + return 0; +} + +int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + struct acpi_device *adev = to_acpi_device(cfg->parent); + struct acpi_pci_root *root = acpi_driver_data(adev); + + return root->segment; +} + +static void acpi_release_root_info(struct acpi_pci_root_info *ci) +{ + struct pci_root_info *info; + + info = container_of(ci, struct pci_root_info, common); + pci_ecam_free(info->cfg); + kfree(ci->ops); + kfree(info); +} + +static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) +{ + struct acpi_device *device = ci->bridge; + struct resource_entry *entry, *tmp; + int status; + + status = acpi_pci_probe_root_resources(ci); + if (status > 0) { + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + if (entry->res->flags & IORESOURCE_MEM) { + entry->offset = ci->root->mcfg_addr & GENMASK_ULL(63, 40); + entry->res->start |= entry->offset; + entry->res->end |= entry->offset; + } + } + return status; + } + + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + dev_printk(KERN_DEBUG, &device->dev, + "host bridge window %pR (ignored)\n", entry->res); + resource_list_destroy_entry(entry); + } + + return 0; +} + +/* + * Lookup the bus range for the domain in MCFG, and set up config space + * mapping. + */ +static struct pci_config_window * +pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) +{ + int ret, bus_shift; + u16 seg = root->segment; + struct device *dev = &root->device->dev; + struct resource cfgres; + struct resource *bus_res = &root->secondary; + struct pci_config_window *cfg; + const struct pci_ecam_ops *ecam_ops; + + ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops); + if (ret < 0) { + dev_err(dev, "%04x:%pR ECAM region not found, use default value\n", seg, bus_res); + ecam_ops = &loongson_pci_ecam_ops; + root->mcfg_addr = mcfg_addr_init(0); + } + + bus_shift = ecam_ops->bus_shift ? : 20; + + cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); + cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; + cfgres.flags = IORESOURCE_MEM; + + cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + if (IS_ERR(cfg)) { + dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg)); + return NULL; + } + + return cfg; +} + +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) +{ + struct pci_bus *bus; + struct pci_root_info *info; + struct acpi_pci_root_ops *root_ops; + int domain = root->segment; + int busnum = root->secondary.start; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); + return NULL; + } + + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); + if (!root_ops) { + kfree(info); + return NULL; + } + + info->cfg = pci_acpi_setup_ecam_mapping(root); + if (!info->cfg) { + kfree(info); + kfree(root_ops); + return NULL; + } + + root_ops->release_info = acpi_release_root_info; + root_ops->prepare_resources = acpi_prepare_root_resources; + root_ops->pci_ops = (struct pci_ops *)&info->cfg->ops->pci_ops; + + bus = pci_find_bus(domain, busnum); + if (bus) { + memcpy(bus->sysdata, info->cfg, sizeof(struct pci_config_window)); + kfree(info); + } else { + struct pci_bus *child; + + bus = acpi_pci_root_create(root, root_ops, + &info->common, info->cfg); + if (!bus) { + kfree(info); + kfree(root_ops); + return NULL; + } + + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + } + + return bus; +} diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c new file mode 100644 index 000000000000..f843b351e471 --- /dev/null +++ b/arch/loongarch/pci/pci.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define PCI_DEVICE_ID_LOONGSON_HOSTBRIDGE 0x7a00 + +int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,int reg, int len, u32 *val) +{ + struct pci_bus * bus_tmp = pci_find_bus(domain, bus); + if (bus_tmp) + return bus_tmp->ops->read(bus_tmp, devfn, reg, len, val); + return -EINVAL; +} + +int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val) +{ + struct pci_bus * bus_tmp = pci_find_bus(domain, bus); + if (bus_tmp) + return bus_tmp->ops->write(bus_tmp, devfn, reg, len, val); + return -EINVAL; +} + +phys_addr_t mcfg_addr_init(int node) +{ + return (((u64)node << 44) | MCFG_EXT_PCICFG_BASE); +} + +static int __init pcibios_init(void) +{ + unsigned int lsize; + + /* + * Set PCI cacheline size to that of the highest level in the + * cache hierarchy. + */ + lsize = cpu_dcache_line_size(); + lsize = cpu_vcache_line_size() ? : lsize; + lsize = cpu_scache_line_size() ? : lsize; + + BUG_ON(!lsize); + + pci_dfl_cache_line_size = lsize >> 2; + + pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize); + + return 0; +} + +subsys_initcall(pcibios_init); + +int pcibios_add_device(struct pci_dev *dev) +{ + int id = pci_domain_nr(dev->bus); + + dev_set_msi_domain(&dev->dev, pch_msi_domain[id]); + + return 0; +} + +int pcibios_alloc_irq(struct pci_dev *dev) +{ + if (acpi_disabled) + return 0; + if (pci_dev_msi_enabled(dev)) + return 0; + return acpi_pci_irq_enable(dev); +} + +static void pci_fixup_vgadev(struct pci_dev *pdev) +{ + struct pci_dev *devp = NULL; + + while ((devp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, devp))) { + if (devp->vendor != PCI_VENDOR_ID_LOONGSON) { + vga_set_default_device(devp); + dev_info(&pdev->dev, + "Overriding boot device as %X:%X\n", + devp->vendor, devp->device); + } + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev); diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile new file mode 100644 index 000000000000..6b6e16732c60 --- /dev/null +++ b/arch/loongarch/vdso/Makefile @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: GPL-2.0 +# Objects to go into the VDSO. + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT +include $(srctree)/lib/vdso/Makefile + +obj-vdso-y := elf.o vgettimeofday.o sigreturn.o + +# Common compiler flags between ABIs. +ccflags-vdso := \ + $(filter -I%,$(KBUILD_CFLAGS)) \ + $(filter -E%,$(KBUILD_CFLAGS)) \ + $(filter -march=%,$(KBUILD_CFLAGS)) \ + $(filter -m%-float,$(KBUILD_CFLAGS)) \ + -D__VDSO__ + +ifeq ($(cc-name),clang) +ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) +endif + +cflags-vdso := $(ccflags-vdso) \ + $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ + -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ + -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ + $(call cc-option, -fno-asynchronous-unwind-tables) \ + $(call cc-option, -fno-stack-protector) +aflags-vdso := $(ccflags-vdso) \ + -D__ASSEMBLY__ -Wa,-gdwarf-2 + +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) +endif + +# VDSO linker flags. +ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ + $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ + --hash-style=sysv --build-id -T + +GCOV_PROFILE := n + +# +# Shared build commands. +# + +quiet_cmd_vdsold_and_vdso_check = LD $@ + cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check) + +quiet_cmd_vdsoas_o_S = AS $@ + cmd_vdsoas_o_S = $(CC) $(a_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# +# Build native VDSO. +# + +native-abi := $(filter -mabi=%,$(KBUILD_CFLAGS)) + +targets += $(obj-vdso-y) +targets += vdso.lds vdso.so.dbg vdso.so + +obj-vdso := $(obj-vdso-y:%.o=$(obj)/%.o) + +$(obj-vdso): KBUILD_CFLAGS := $(cflags-vdso) $(native-abi) +$(obj-vdso): KBUILD_AFLAGS := $(aflags-vdso) $(native-abi) + +$(obj)/vdso.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) $(native-abi) + +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold_and_vdso_check) + +$(obj)/vdso.so: OBJCOPYFLAGS := -S +$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE + $(call if_changed,objcopy) + +obj-y += vdso.o + +$(obj)/vdso.o : $(obj)/vdso.so + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/loongarch/vdso/elf.S b/arch/loongarch/vdso/elf.S new file mode 100644 index 000000000000..d5f16a6e3c72 --- /dev/null +++ b/arch/loongarch/vdso/elf.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/loongarch/vdso/gen_vdso_offsets.sh b/arch/loongarch/vdso/gen_vdso_offsets.sh new file mode 100755 index 000000000000..1bb4e12642ff --- /dev/null +++ b/arch/loongarch/vdso/gen_vdso_offsets.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Derived from RISC-V and ARM64: +# Author: Will Deacon +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# + +LC_ALL=C sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S new file mode 100644 index 000000000000..98f765b20ba9 --- /dev/null +++ b/arch/loongarch/vdso/sigreturn.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015 Imagination Technologies + * Author: Alex Smith + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include + +#include +#include + +#include +#include + + .section .text + .cfi_sections .debug_frame + +SYM_FUNC_START(__vdso_rt_sigreturn) + + li.w a7, __NR_rt_sigreturn + syscall 0 + +SYM_FUNC_END(__vdso_rt_sigreturn) diff --git a/arch/loongarch/vdso/vdso.S b/arch/loongarch/vdso/vdso.S new file mode 100644 index 000000000000..41951232ef45 --- /dev/null +++ b/arch/loongarch/vdso/vdso.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * Derived from RISC-V: + * Copyright (C) 2014 Regents of the University of California + */ + +#include +#include +#include + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin "arch/loongarch/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S new file mode 100644 index 000000000000..d3811bba0abf --- /dev/null +++ b/arch/loongarch/vdso/vdso.lds.S @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +OUTPUT_FORMAT("elf64-loongarch", "elf64-loongarch", "elf64-loongarch") + +OUTPUT_ARCH(loongarch) + +SECTIONS +{ + PROVIDE(_start = .); + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .text : { *(.text*) } :text + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + _end = .; + PROVIDE(end = .); + + /DISCARD/ : { + *(.gnu.attributes) + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +VERSION +{ + LINUX_4.8 { + global: + __vdso_clock_getres; + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_rt_sigreturn; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigreturn = __vdso_rt_sigreturn; diff --git a/arch/loongarch/vdso/vgettimeofday.c b/arch/loongarch/vdso/vgettimeofday.c new file mode 100644 index 000000000000..c54489a630cf --- /dev/null +++ b/arch/loongarch/vdso/vgettimeofday.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2020 Loongson Technologies + * + */ +#include +#include + +int __vdso_clock_gettime(clockid_t clock, + struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 60dfe63301d0..937464e329f5 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -994,6 +994,9 @@ static int __init acpi_bus_init_irq(void) case ACPI_IRQ_MODEL_PLATFORM: message = "platform specific model"; break; + case ACPI_IRQ_MODEL_LPIC: + message = "LPIC"; + break; default: printk(KERN_WARNING PREFIX "Unknown interrupt routing model\n"); return -ENODEV; diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 14ee631cb7cf..b3e4680003f5 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -405,7 +405,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev) * controller and must therefore be considered active high * as default. */ - int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? + int polarity = (acpi_irq_model == ACPI_IRQ_MODEL_GIC || + acpi_irq_model == ACPI_IRQ_MODEL_LPIC) ? ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; char *link = NULL; char link_desc[16]; diff --git a/drivers/acpi/pci_mcfg.c b/drivers/acpi/pci_mcfg.c index 2709ef2b0351..09c3b24938fe 100644 --- a/drivers/acpi/pci_mcfg.c +++ b/drivers/acpi/pci_mcfg.c @@ -165,6 +165,16 @@ static struct mcfg_fixup mcfg_quirks[] = { ALTRA_ECAM_QUIRK(1, 14), ALTRA_ECAM_QUIRK(1, 15), #endif /* ARM64 */ + +#ifdef CONFIG_LOONGARCH +#define LOONGSON_ECAM_MCFG(table_id, seg) \ + { "LOONGS", table_id, 1, seg, MCFG_BUS_ANY, &loongson_pci_ecam_ops } + + LOONGSON_ECAM_MCFG("\0", 0), + LOONGSON_ECAM_MCFG("LOONGSON", 0), + LOONGSON_ECAM_MCFG("\0", 1), + LOONGSON_ECAM_MCFG("LOONGSON", 1), +#endif /* LOONGARCH */ }; static char mcfg_oem_id[ACPI_OEM_ID_SIZE]; diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 64e2f5e379aa..5c4e184ac9dc 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -291,7 +291,7 @@ config PCI_HYPERV_INTERFACE config PCI_LOONGSON bool "LOONGSON PCI Controller" depends on MACH_LOONGSON64 || COMPILE_TEST - depends on OF + depends on OF || ACPI depends on PCI_QUIRKS default MACH_LOONGSON64 help diff --git a/drivers/pci/controller/pci-loongson.c b/drivers/pci/controller/pci-loongson.c index 1273838487f4..330c2bcec962 100644 --- a/drivers/pci/controller/pci-loongson.c +++ b/drivers/pci/controller/pci-loongson.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "../pci.h" @@ -30,11 +32,16 @@ #define FLAG_CFG1 BIT(1) #define FLAG_DEV_FIX BIT(2) +struct pci_controller_data { + u32 flags; + struct pci_ops *ops; +}; + struct loongson_pci { void __iomem *cfg0_base; void __iomem *cfg1_base; struct platform_device *pdev; - u32 flags; + struct pci_controller_data *data; }; /* Fixup wrong class code in PCIe bridges */ @@ -116,55 +123,78 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GNET, loongson_pci_pin_quirk); -static void __iomem *cfg1_map(struct loongson_pci *priv, int bus, - unsigned int devfn, int where) +static struct loongson_pci *pci_bus_to_loongson_pci(struct pci_bus *bus) { - unsigned long addroff = 0x0; + struct pci_config_window *cfg; - if (bus != 0) - addroff |= BIT(28); /* Type 1 Access */ - addroff |= (where & 0xff) | ((where & 0xf00) << 16); - addroff |= (bus << 16) | (devfn << 8); - return priv->cfg1_base + addroff; + if (acpi_disabled) + return (struct loongson_pci *)(bus->sysdata); + + cfg = bus->sysdata; + return (struct loongson_pci *)(cfg->priv); } -static void __iomem *cfg0_map(struct loongson_pci *priv, int bus, - unsigned int devfn, int where) +static void __iomem *cfg0_map(struct loongson_pci *priv, + struct pci_bus *bus, unsigned int devfn, int where) { unsigned long addroff = 0x0; + unsigned char busnum = bus->number; - if (bus != 0) + if (!pci_is_root_bus(bus)) { addroff |= BIT(24); /* Type 1 Access */ - addroff |= (bus << 16) | (devfn << 8) | where; + addroff |= (busnum << 16); + } + addroff |= (devfn << 8) | where; return priv->cfg0_base + addroff; } +static void __iomem *cfg1_map(struct loongson_pci *priv, + struct pci_bus *bus, unsigned int devfn, int where) +{ + unsigned long addroff = 0x0; + unsigned char busnum = bus->number; + + if (!pci_is_root_bus(bus)) { + addroff |= BIT(28); /* Type 1 Access */ + addroff |= (busnum << 16); + } + addroff |= (devfn << 8) | (where & 0xff) | ((where & 0xf00) << 16); + return priv->cfg1_base + addroff; +} + static void __iomem *pci_loongson_map_bus(struct pci_bus *bus, unsigned int devfn, int where) { - unsigned char busnum = bus->number; - struct pci_host_bridge *bridge = pci_find_host_bridge(bus); - struct loongson_pci *priv = pci_host_bridge_priv(bridge); + unsigned int device = PCI_SLOT(devfn); + unsigned int function = PCI_FUNC(devfn); + struct loongson_pci *priv = pci_bus_to_loongson_pci(bus); /* * Do not read more than one device on the bus other than * the host bus. For our hardware the root bus is always bus 0. */ - if (priv->flags & FLAG_DEV_FIX && busnum != 0 && - PCI_SLOT(devfn) > 0) + if ((priv->data->flags & FLAG_DEV_FIX) && bus->self) { + if (!pci_is_root_bus(bus) && (device > 0)) + return NULL; + } + + /* Don't access unexisting devices */ + if (pci_is_root_bus(bus) && (device >= 9 && device <= 20 && function > 0)) return NULL; /* CFG0 can only access standard space */ if (where < PCI_CFG_SPACE_SIZE && priv->cfg0_base) - return cfg0_map(priv, busnum, devfn, where); + return cfg0_map(priv, bus, devfn, where); /* CFG1 can access extended space */ if (where < PCI_CFG_SPACE_EXP_SIZE && priv->cfg1_base) - return cfg1_map(priv, busnum, devfn, where); + return cfg1_map(priv, bus, devfn, where); return NULL; } +#ifdef CONFIG_OF + static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq; @@ -183,27 +213,42 @@ static int loongson_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) return val; } -/* LS2K/LS7A accept 8/16/32-bit PCI operations */ +/* LS2K/LS7A accept 8/16/32-bit PCI config operations */ static struct pci_ops loongson_pci_ops = { .map_bus = pci_loongson_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, }; -/* RS780/SR5690 only accept 32-bit PCI operations */ +/* RS780/SR5690 only accept 32-bit PCI config operations */ static struct pci_ops loongson_pci_ops32 = { .map_bus = pci_loongson_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write32, }; +static const struct pci_controller_data ls2k_pci_data = { + .flags = FLAG_CFG1 | FLAG_DEV_FIX, + .ops = &loongson_pci_ops, +}; + +static const struct pci_controller_data ls7a_pci_data = { + .flags = FLAG_CFG1 | FLAG_DEV_FIX, + .ops = &loongson_pci_ops, +}; + +static const struct pci_controller_data rs780e_pci_data = { + .flags = FLAG_CFG0, + .ops = &loongson_pci_ops32, +}; + static const struct of_device_id loongson_pci_of_match[] = { { .compatible = "loongson,ls2k-pci", - .data = (void *)(FLAG_CFG0 | FLAG_CFG1 | FLAG_DEV_FIX), }, + .data = (void *)&ls2k_pci_data, }, { .compatible = "loongson,ls7a-pci", - .data = (void *)(FLAG_CFG0 | FLAG_CFG1 | FLAG_DEV_FIX), }, + .data = (void *)&ls7a_pci_data, }, { .compatible = "loongson,rs780e-pci", - .data = (void *)(FLAG_CFG0), }, + .data = (void *)&rs780e_pci_data, }, {} }; @@ -224,20 +269,20 @@ static int loongson_pci_probe(struct platform_device *pdev) priv = pci_host_bridge_priv(bridge); priv->pdev = pdev; - priv->flags = (unsigned long)of_device_get_match_data(dev); + priv->data = (struct pci_controller_data *)of_device_get_match_data(dev); - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!regs) { - dev_err(dev, "missing mem resources for cfg0\n"); - return -EINVAL; + if (priv->data->flags & FLAG_CFG0) { + regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!regs) + dev_err(dev, "missing mem resources for cfg0\n"); + else { + priv->cfg0_base = devm_pci_remap_cfg_resource(dev, regs); + if (IS_ERR(priv->cfg0_base)) + return PTR_ERR(priv->cfg0_base); + } } - priv->cfg0_base = devm_pci_remap_cfg_resource(dev, regs); - if (IS_ERR(priv->cfg0_base)) - return PTR_ERR(priv->cfg0_base); - - /* CFG1 is optional */ - if (priv->flags & FLAG_CFG1) { + if (priv->data->flags & FLAG_CFG1) { regs = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!regs) dev_info(dev, "missing mem resource for cfg1\n"); @@ -249,11 +294,8 @@ static int loongson_pci_probe(struct platform_device *pdev) } bridge->sysdata = priv; + bridge->ops = priv->data->ops; bridge->map_irq = loongson_map_irq; - if (!of_device_is_compatible(node, "loongson,rs780e-pci")) - bridge->ops = &loongson_pci_ops; - else - bridge->ops = &loongson_pci_ops32; return pci_host_probe(bridge); } @@ -266,3 +308,41 @@ static struct platform_driver loongson_pci_driver = { .probe = loongson_pci_probe, }; builtin_platform_driver(loongson_pci_driver); + +#endif + +#ifdef CONFIG_ACPI + +static int loongson_pci_ecam_init(struct pci_config_window *cfg) +{ + struct device *dev = cfg->parent; + struct loongson_pci *priv; + struct pci_controller_data *data; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + cfg->priv = priv; + data->flags = FLAG_CFG1; + priv->data = data; + priv->cfg1_base = cfg->win - (cfg->busr.start << 16); + + return 0; +} + +const struct pci_ecam_ops loongson_pci_ecam_ops = { + .bus_shift = 16, + .init = loongson_pci_ecam_init, + .pci_ops = { + .map_bus = pci_loongson_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, + } +}; + +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fb86b13b54a6..a3afd20e523a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -105,6 +105,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_IOSAPIC, ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, + ACPI_IRQ_MODEL_LPIC, ACPI_IRQ_MODEL_COUNT }; diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 033ce74f02e8..3e28f275a889 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -58,6 +58,7 @@ extern const struct pci_ecam_ops pci_thunder_ecam_ops; /* Cavium ThunderX 1.x */ extern const struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern const struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ +extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #endif #if IS_ENABLED(CONFIG_PCI_HOST_COMMON) diff --git a/scripts/subarch.include b/scripts/subarch.include index 650682821126..c79e0d0b1a19 100644 --- a/scripts/subarch.include +++ b/scripts/subarch.include @@ -10,4 +10,4 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ - -e s/riscv.*/riscv/) + -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/) -- Gitee From 64662d0058c26955c1d4555a9193a3295e58258f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 1 Jun 2021 08:50:39 +0800 Subject: [PATCH 036/241] LoongArch: Add basic SMP support Change-Id: Ic18db0590ae56df6c5e7938e3267a380023717a5 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 76 ++- arch/loongarch/include/asm/atomic.h | 4 + arch/loongarch/include/asm/barrier.h | 108 ++++ arch/loongarch/include/asm/cmpxchg.h | 1 + arch/loongarch/include/asm/futex.h | 1 + arch/loongarch/include/asm/hardirq.h | 2 + .../include/asm/mach-loongson64/irq.h | 4 + arch/loongarch/include/asm/percpu.h | 202 +++++++ arch/loongarch/include/asm/pgtable.h | 21 + arch/loongarch/include/asm/smp.h | 140 +++++ arch/loongarch/include/asm/stackframe.h | 17 +- arch/loongarch/include/asm/tlbflush.h | 24 +- arch/loongarch/include/asm/topology.h | 7 +- arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/acpi.c | 70 ++- arch/loongarch/kernel/asm-offsets.c | 10 + arch/loongarch/kernel/cmpxchg.c | 3 + arch/loongarch/kernel/head.S | 30 + arch/loongarch/kernel/irq.c | 3 + arch/loongarch/kernel/proc.c | 5 + arch/loongarch/kernel/reset.c | 12 + arch/loongarch/kernel/setup.c | 26 + arch/loongarch/kernel/smp.c | 419 ++++++++++++++ arch/loongarch/kernel/topology.c | 43 +- arch/loongarch/kernel/vmlinux.lds.S | 3 + arch/loongarch/loongson64/Makefile | 2 + arch/loongarch/loongson64/init.c | 3 + arch/loongarch/loongson64/irq.c | 10 + arch/loongarch/loongson64/smp.c | 513 ++++++++++++++++++ arch/loongarch/mm/tlbex.S | 69 +++ include/linux/cpuhotplug.h | 1 + 31 files changed, 1814 insertions(+), 17 deletions(-) create mode 100644 arch/loongarch/include/asm/smp.h create mode 100644 arch/loongarch/kernel/smp.c create mode 100644 arch/loongarch/loongson64/smp.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index de4ca3a51147..3d70ab225f5b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -59,6 +59,7 @@ config LOONGARCH select GENERIC_LIB_UCMPDI2 select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK + select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL @@ -91,7 +92,7 @@ config LOONGARCH select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ - select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT + select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA if MODULES select OF @@ -122,6 +123,8 @@ config MACH_LOONGSON64 select NR_CPUS_DEFAULT_64 select SPARSE_IRQ select SYS_HAS_CPU_LOONGSON64 + select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_64BIT_KERNEL select ZONE_DMA32 help @@ -145,6 +148,9 @@ config SCHED_OMIT_FRAME_POINTER bool default y +config SYS_SUPPORTS_HOTPLUG_CPU + bool + config GENERIC_CSUM def_bool y @@ -398,6 +404,74 @@ config EFI This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). +config SMP + bool "Multi-Processing support" + depends on SYS_SUPPORTS_SMP + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on uni- and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + uniprocessor machines. On a uniprocessor machine, the kernel + will run faster if you say N here. + + See also the SMP-HOWTO available at . + + If you don't know what to do here, say N. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + select GENERIC_IRQ_MIGRATION + depends on SMP && SYS_SUPPORTS_HOTPLUG_CPU + help + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + (Note: power management support will enable this option + automatically on SMP systems. ) + Say N if you want to disable CPU hotplug. + +config SYS_SUPPORTS_SMP + bool + +config NR_CPUS_DEFAULT_4 + bool + +config NR_CPUS_DEFAULT_8 + bool + +config NR_CPUS_DEFAULT_16 + bool + +config NR_CPUS_DEFAULT_32 + bool + +config NR_CPUS_DEFAULT_64 + bool + +config NR_CPUS + int "Maximum number of CPUs (2-256)" + range 2 256 + depends on SMP + default "4" if NR_CPUS_DEFAULT_4 + default "8" if NR_CPUS_DEFAULT_8 + default "16" if NR_CPUS_DEFAULT_16 + default "32" if NR_CPUS_DEFAULT_32 + default "64" if NR_CPUS_DEFAULT_64 + help + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 32 for 32-bit + kernel and 64 for 64-bit kernels; the minimum value which makes + sense is 1 for Qemu (useful only for kernel debugging purposes) + and 2 for all others. + + This is purely to save memory - each supported CPU adds + approximately eight kilobytes to the kernel image. For best + performance should round up your number of processors to the next + power of two. + source "kernel/Kconfig.hz" config SECCOMP diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 81fa9f97eb51..8f510c90c1e1 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -171,6 +171,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " sc.w %1, %2 \n" " beq $zero, %1, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "I" (-i)); @@ -183,6 +184,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " sc.w %1, %2 \n" " beq $zero, %1, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "r" (i)); @@ -333,6 +335,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " sc.d %1, %2 \n" " beq %1, $zero, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "I" (-i)); @@ -345,6 +348,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " sc.d %1, %2 \n" " beq %1, $zero, 1b \n" "2: \n" + __WEAK_LLSC_MB : "=&r" (result), "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) : "r" (i)); diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h index 03b97918e750..d9e3e1e6b9c2 100644 --- a/arch/loongarch/include/asm/barrier.h +++ b/arch/loongarch/include/asm/barrier.h @@ -20,6 +20,19 @@ #define mb() fast_mb() #define iob() fast_iob() +#define __smp_mb() __asm__ __volatile__("dbar 0" : : :"memory") +#define __smp_rmb() __asm__ __volatile__("dbar 0" : : :"memory") +#define __smp_wmb() __asm__ __volatile__("dbar 0" : : :"memory") + +#ifdef CONFIG_SMP +#define __WEAK_LLSC_MB " dbar 0 \n" +#else +#define __WEAK_LLSC_MB " \n" +#endif + +#define __smp_mb__before_atomic() barrier() +#define __smp_mb__after_atomic() barrier() + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -48,6 +61,101 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, return mask; } +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + unsigned long __tmp = 0; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + *(__u8 *)__u.__c = *(volatile __u8 *)p; \ + __smp_mb(); \ + break; \ + case 2: \ + *(__u16 *)__u.__c = *(volatile __u16 *)p; \ + __smp_mb(); \ + break; \ + case 4: \ + __asm__ __volatile__( \ + "amor_db.w %[val], %[tmp], %[mem] \n" \ + : [val] "=&r" (*(__u32 *)__u.__c) \ + : [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp) \ + : "memory"); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "amor_db.d %[val], %[tmp], %[mem] \n" \ + : [val] "=&r" (*(__u64 *)__u.__c) \ + : [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp) \ + : "memory"); \ + break; \ + } \ + (typeof(*p))__u.__val; \ +}) + +#define __smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + unsigned long __tmp; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + __smp_mb(); \ + *(volatile __u8 *)p = *(__u8 *)__u.__c; \ + break; \ + case 2: \ + __smp_mb(); \ + *(volatile __u16 *)p = *(__u16 *)__u.__c; \ + break; \ + case 4: \ + __asm__ __volatile__( \ + "amswap_db.w %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u32 *)__u.__c) \ + : ); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "amswap_db.d %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u64 *)__u.__c) \ + : ); \ + break; \ + } \ +} while (0) + +#define __smp_store_mb(p, v) \ +do { \ + union { typeof(p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(p)) (v) }; \ + unsigned long __tmp; \ + switch (sizeof(p)) { \ + case 1: \ + *(volatile __u8 *)&p = *(__u8 *)__u.__c; \ + __smp_mb(); \ + break; \ + case 2: \ + *(volatile __u16 *)&p = *(__u16 *)__u.__c; \ + __smp_mb(); \ + break; \ + case 4: \ + __asm__ __volatile__( \ + "amswap_db.w %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u32 *)&p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u32 *)__u.__c) \ + : ); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "amswap_db.d %[tmp], %[val], %[mem] \n" \ + : [mem] "+ZB" (*(u64 *)&p), [tmp] "=&r" (__tmp) \ + : [val] "r" (*(__u64 *)__u.__c) \ + : ); \ + break; \ + } \ +} while (0) + #include #endif /* __ASM_BARRIER_H */ diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 63e60e272771..70ebf9ca41b7 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -66,6 +66,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, " " st " $t0, %1 \n" \ " beq $zero, $t0, 1b \n" \ "2: \n" \ + __WEAK_LLSC_MB \ : "=&r" (__ret), "=ZB"(*m) \ : "ZB"(*m), "Jr" (old), "Jr" (new) \ : "t0", "memory"); \ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index fa0ccf9ea024..04e2893cf77d 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -86,6 +86,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "2: sc.w $t0, %2 \n" " beq $zero, $t0, 1b \n" "3: \n" + __WEAK_LLSC_MB " .section .fixup,\"ax\" \n" "4: li.d %0, %6 \n" " b 3b \n" diff --git a/arch/loongarch/include/asm/hardirq.h b/arch/loongarch/include/asm/hardirq.h index 5effe88fce3e..a16d425a0367 100644 --- a/arch/loongarch/include/asm/hardirq.h +++ b/arch/loongarch/include/asm/hardirq.h @@ -21,4 +21,6 @@ typedef struct { #include /* Standard mappings for irq_cpustat_t above */ +#define __ARCH_IRQ_STAT + #endif /* _ASM_HARDIRQ_H */ diff --git a/arch/loongarch/include/asm/mach-loongson64/irq.h b/arch/loongarch/include/asm/mach-loongson64/irq.h index 124cec11d01f..e549199eb20b 100644 --- a/arch/loongarch/include/asm/mach-loongson64/irq.h +++ b/arch/loongarch/include/asm/mach-loongson64/irq.h @@ -2,6 +2,8 @@ #ifndef __ASM_MACH_LOONGSON64_IRQ_H_ #define __ASM_MACH_LOONGSON64_IRQ_H_ +#include + #define MAX_IO_PICS 2 #define NR_IRQS (64 + (256 * MAX_IO_PICS)) @@ -80,4 +82,6 @@ extern struct irq_domain *pch_lpc_domain; extern struct irq_domain *pch_msi_domain[MAX_IO_PICS]; extern struct irq_domain *pch_pic_domain[MAX_IO_PICS]; +extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); + #endif /* __ASM_MACH_LOONGSON64_IRQ_H_ */ diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 28fd8b6f2188..166778c3db05 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -5,6 +5,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include + /* Use r21 for fast access */ register unsigned long __my_cpu_offset __asm__("$r21"); @@ -15,6 +17,206 @@ static inline void set_my_cpu_offset(unsigned long off) } #define __my_cpu_offset __my_cpu_offset +#define PERCPU_OP(op, asm_op, c_op) \ +static inline unsigned long __percpu_##op(void *ptr, \ + unsigned long val, int size) \ +{ \ + unsigned long ret; \ + \ + switch (size) { \ + case 4: \ + __asm__ __volatile__( \ + "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ + : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \ + : [val] "r" (val)); \ + break; \ + case 8: \ + __asm__ __volatile__( \ + "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ + : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \ + : [val] "r" (val)); \ + break; \ + default: \ + ret = 0; \ + BUILD_BUG(); \ + } \ + \ + return ret c_op val; \ +} + +PERCPU_OP(add, add, +) +PERCPU_OP(and, and, &) +PERCPU_OP(or, or, |) +#undef PERCPU_OP + +static inline unsigned long __percpu_read(void *ptr, int size) +{ + unsigned long ret; + + switch (size) { + case 1: + __asm__ __volatile__ ("ldx.b %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + case 2: + __asm__ __volatile__ ("ldx.h %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + case 4: + __asm__ __volatile__ ("ldx.w %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + case 8: + __asm__ __volatile__ ("ldx.d %[ret], $r21, %[ptr] \n" + : [ret] "=&r"(ret) + : [ptr] "r"(ptr) + : "memory"); + break; + default: + ret = 0; + BUILD_BUG(); + } + + return ret; +} + +static inline void __percpu_write(void *ptr, unsigned long val, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("stx.b %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + case 2: + __asm__ __volatile__("stx.h %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + case 4: + __asm__ __volatile__("stx.w %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + case 8: + __asm__ __volatile__("stx.d %[val], $r21, %[ptr] \n" + : + : [val] "r" (val), [ptr] "r" (ptr) + : "memory"); + break; + default: + BUILD_BUG(); + } +} + +static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) +{ + switch (size) { + case 1: + case 2: + return __xchg_small((volatile void *)ptr, val, size); + + case 4: + return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val); + + case 8: + return __xchg_asm("amswap.d", (volatile u64 *)ptr, (u64)val); + + default: + BUILD_BUG(); + } + + return 0; +} + +/* this_cpu_cmpxchg */ +#define _protect_cmpxchg_local(pcp, o, n) \ +({ \ + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ + preempt_disable_notrace(); \ + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ + preempt_enable_notrace(); \ + __ret; \ +}) + +#define _percpu_read(pcp) \ +({ \ + typeof(pcp) __retval; \ + __retval = (typeof(pcp))__percpu_read(&(pcp), sizeof(pcp)); \ + __retval; \ +}) + +#define _percpu_write(pcp, val) \ +do { \ + __percpu_write(&(pcp), (unsigned long)(val), sizeof(pcp)); \ +} while (0) \ + +#define _pcp_protect(operation, pcp, val) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable_notrace(); \ + __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ + (val), sizeof(pcp)); \ + preempt_enable_notrace(); \ + __retval; \ +}) + +#define _percpu_add(pcp, val) \ + _pcp_protect(__percpu_add, pcp, val) + +#define _percpu_add_return(pcp, val) _percpu_add(pcp, val) + +#define _percpu_and(pcp, val) \ + _pcp_protect(__percpu_and, pcp, val) + +#define _percpu_or(pcp, val) \ + _pcp_protect(__percpu_or, pcp, val) + +#define _percpu_xchg(pcp, val) ((typeof(pcp)) \ + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))) + +#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val) + +#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val) + +#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val) + +#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) + +#define this_cpu_read_1(pcp) _percpu_read(pcp) +#define this_cpu_read_2(pcp) _percpu_read(pcp) +#define this_cpu_read_4(pcp) _percpu_read(pcp) +#define this_cpu_read_8(pcp) _percpu_read(pcp) + +#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) + +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) + +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) + #include #endif /* __ASM_PERCPU_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 99438f209e0d..687b1a5fe2a2 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -41,8 +41,29 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) * Make sure the buddy is global too (if it's !none, * it better already be global) */ +#ifdef CONFIG_SMP + /* + * For SMP, multiple CPUs can race, so we need to do + * this atomically. + */ + unsigned long page_global = _PAGE_GLOBAL; + unsigned long tmp; + + __asm__ __volatile__ ( + "1:" __LL "%[tmp], %[buddy] \n" + " bnez %[tmp], 2f \n" + " or %[tmp], %[tmp], %[global] \n" + __SC "%[tmp], %[buddy] \n" + " beqz %[tmp], 1b \n" + " nop \n" + "2: \n" + __WEAK_LLSC_MB + : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) + : [global] "r" (page_global)); +#else /* !CONFIG_SMP */ if (pte_none(*buddy)) pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; +#endif /* CONFIG_SMP */ } } diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h new file mode 100644 index 000000000000..d638b88e8744 --- /dev/null +++ b/arch/loongarch/include/asm/smp.h @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +struct task_struct; + +struct plat_smp_ops { + void (*send_ipi_single)(int cpu, unsigned int action); + void (*send_ipi_mask)(const struct cpumask *mask, unsigned int action); + void (*smp_setup)(void); + void (*prepare_cpus)(unsigned int max_cpus); + int (*boot_secondary)(int cpu, struct task_struct *idle); + void (*init_secondary)(void); + void (*smp_finish)(void); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(void); + void (*cpu_die)(unsigned int cpu); +#endif +}; + +extern const struct plat_smp_ops *mp_ops; +void register_smp_ops(const struct plat_smp_ops *ops); + +static inline void plat_smp_setup(void) +{ + mp_ops->smp_setup(); +} + +#else /* !CONFIG_SMP */ + +struct plat_smp_ops; + +static inline void plat_smp_setup(void) +{ + /* UP, nothing to do ... */ +} + +static inline void register_smp_ops(const struct plat_smp_ops *ops) +{ +} + +#endif /* !CONFIG_SMP */ + +extern int smp_num_siblings; +extern int num_processors; +extern int disabled_cpus; +extern cpumask_t cpu_sibling_map[]; +extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map[]; + +static inline int raw_smp_processor_id(void) +{ +#if defined(__VDSO__) + extern int vdso_smp_processor_id(void) + __compiletime_error("VDSO should not call smp_processor_id()"); + return vdso_smp_processor_id(); +#else + return current_thread_info()->cpu; +#endif +} +#define raw_smp_processor_id raw_smp_processor_id + +/* Map from cpu id to sequential logical cpu number. This will only + not be idempotent when cpus failed to come on-line. */ +extern int __cpu_number_map[NR_CPUS]; +#define cpu_number_map(cpu) __cpu_number_map[cpu] + +/* The reverse map from sequential logical cpu number to cpu id. */ +extern int __cpu_logical_map[NR_CPUS]; +#define cpu_logical_map(cpu) __cpu_logical_map[cpu] + +#define cpu_physical_id(cpu) cpu_logical_map(cpu) + +#define SMP_BOOT_CPU 0x1 +#define SMP_RESCHEDULE 0x2 +#define SMP_CALL_FUNCTION 0x4 + +struct secondary_data { + unsigned long stack; + unsigned long thread_info; +}; +extern struct secondary_data cpuboot_data; + +extern asmlinkage void smpboot_entry(void); + +extern void calculate_cpu_foreign_map(void); + +/* + * Generate IPI list text + */ +extern void show_ipi_list(struct seq_file *p, int prec); + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +static inline void smp_send_reschedule(int cpu) +{ + mp_ops->send_ipi_single(cpu, SMP_RESCHEDULE); +} + +#ifdef CONFIG_HOTPLUG_CPU +static inline int __cpu_disable(void) +{ + return mp_ops->cpu_disable(); +} + +static inline void __cpu_die(unsigned int cpu) +{ + mp_ops->cpu_die(cpu); +} + +extern void play_dead(void); +#endif + +static inline void arch_send_call_function_single_ipi(int cpu) +{ + mp_ops->send_ipi_single(cpu, SMP_CALL_FUNCTION); +} + +static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION); +} + +#endif /* __ASM_SMP_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 552ccf1f93f0..f9ed30ab1a1f 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -77,17 +77,24 @@ * new value in sp. */ .macro get_saved_sp docfi=0 - la.abs t1, kernelsp - move t0, sp + la.abs t1, kernelsp +#ifdef CONFIG_SMP + csrrd t0, PERCPU_BASE_KS + LONG_ADD t1, t1, t0 +#endif + move t0, sp .if \docfi .cfi_register sp, t0 .endif - LONG_L sp, t1, 0 + LONG_L sp, t1, 0 .endm .macro set_saved_sp stackp temp temp2 - la.abs \temp, kernelsp - LONG_S \stackp, \temp, 0 + la.abs \temp, kernelsp +#ifdef CONFIG_SMP + LONG_ADD \temp, \temp, u0 +#endif + LONG_S \stackp, \temp, 0 .endm .macro SAVE_SOME docfi=0 diff --git a/arch/loongarch/include/asm/tlbflush.h b/arch/loongarch/include/asm/tlbflush.h index 67e08f8ad794..784ef1750ed8 100644 --- a/arch/loongarch/include/asm/tlbflush.h +++ b/arch/loongarch/include/asm/tlbflush.h @@ -20,19 +20,29 @@ extern void local_flush_tlb_all(void); extern void local_flush_tlb_user(void); extern void local_flush_tlb_kernel(void); extern void local_flush_tlb_mm(struct mm_struct *mm); -extern void local_flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end); -extern void local_flush_tlb_kernel_range(unsigned long start, - unsigned long end); -extern void local_flush_tlb_page(struct vm_area_struct *vma, - unsigned long page); +extern void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); +extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void local_flush_tlb_one(unsigned long vaddr); +#ifdef CONFIG_SMP + +extern void flush_tlb_all(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long, unsigned long); +extern void flush_tlb_kernel_range(unsigned long, unsigned long); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); +extern void flush_tlb_one(unsigned long vaddr); + +#else /* CONFIG_SMP */ + #define flush_tlb_all() local_flush_tlb_all() #define flush_tlb_mm(mm) local_flush_tlb_mm(mm) #define flush_tlb_range(vma, vmaddr, end) local_flush_tlb_range(vma, vmaddr, end) -#define flush_tlb_kernel_range(vmaddr,end) local_flush_tlb_kernel_range(vmaddr, end) +#define flush_tlb_kernel_range(vmaddr,end) local_flush_tlb_kernel_range(vmaddr, end) #define flush_tlb_page(vma, page) local_flush_tlb_page(vma, page) #define flush_tlb_one(vaddr) local_flush_tlb_one(vaddr) +#endif /* CONFIG_SMP */ + #endif /* __ASM_TLBFLUSH_H */ diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index 51feaa64339a..9d3ababc7677 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -7,7 +7,12 @@ #include -#define cpu_logical_map(cpu) 0 +#ifdef CONFIG_SMP +#define topology_physical_package_id(cpu) (cpu_data[cpu].package) +#define topology_core_id(cpu) (cpu_data[cpu].core) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_sibling_cpumask(cpu) (&cpu_sibling_map[cpu]) +#endif #include diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index a68623bf57ed..840c1a8a8de2 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -17,6 +17,8 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o +obj-$(CONFIG_SMP) += smp.o + obj-$(CONFIG_PROC_FS) += proc.o CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 4e9c25ae8e54..5ffe869cac31 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -140,6 +140,35 @@ void __init acpi_boot_table_init(void) } } +static int set_processor_mask(u32 id, u32 flags) +{ + + int cpu, cpuid = id; + + if (num_processors >= nr_cpu_ids) { + pr_warn(PREFIX "nr_cpus/possible_cpus limit of %i reached." + " processor 0x%x ignored.\n", nr_cpu_ids, cpuid); + + return -ENODEV; + + } + if (cpuid == loongson_sysconf.boot_cpu_id) + cpu = 0; + else + cpu = cpumask_next_zero(-1, cpu_present_mask); + + if (flags & ACPI_MADT_ENABLED) { + num_processors++; + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); + __cpu_number_map[cpuid] = cpu; + __cpu_logical_map[cpu] = cpuid; + } else + disabled_cpus++; + + return cpu; +} + static int __init acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) { @@ -150,6 +179,7 @@ acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) return -EINVAL; acpi_table_print_madt_entry(&header->common); + set_processor_mask(processor->core_id, processor->flags); return 0; } @@ -251,7 +281,12 @@ acpi_parse_pch_lpc(union acpi_subtable_headers *header, const unsigned long end) static void __init acpi_process_madt(void) { - int error; + int i, error; + + for (i = 0; i < NR_CPUS; i++) { + __cpu_number_map[i] = -1; + __cpu_logical_map[i] = -1; + } /* Parse MADT CPUINTC entries */ error = acpi_table_parse_madt(ACPI_MADT_TYPE_CORE_PIC, acpi_parse_cpuintc, MAX_CORE_PIC); @@ -337,3 +372,36 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) { memblock_reserve(addr, size); } + +#ifdef CONFIG_ACPI_HOTPLUG_CPU + +#include + +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) +{ + int cpu; + + cpu = set_processor_mask(physid, ACPI_MADT_ENABLED); + if (cpu < 0) { + pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); + return cpu; + } + + *pcpu = cpu; + + return 0; +} +EXPORT_SYMBOL(acpi_map_cpu); + +int acpi_unmap_cpu(int cpu) +{ + set_cpu_present(cpu, false); + num_processors--; + + pr_info("cpu%d hot remove!\n", cpu); + + return 0; +} +EXPORT_SYMBOL(acpi_unmap_cpu); + +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index b4f7949ca87f..afe584bdfd88 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -252,3 +252,13 @@ void output_signal_defines(void) DEFINE(_SIGXFSZ, SIGXFSZ); BLANK(); } + +#ifdef CONFIG_SMP +void output_smpboot_defines(void) +{ + COMMENT("Linux smp cpu boot offsets."); + OFFSET(CPU_BOOT_STACK, secondary_data, stack); + OFFSET(CPU_BOOT_TINFO, secondary_data, thread_info); + BLANK(); +} +#endif diff --git a/arch/loongarch/kernel/cmpxchg.c b/arch/loongarch/kernel/cmpxchg.c index 5deb293aa0ec..fc0bbb231ccc 100644 --- a/arch/loongarch/kernel/cmpxchg.c +++ b/arch/loongarch/kernel/cmpxchg.c @@ -92,7 +92,10 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, " or %1, %1, %6 \n" " sc.w %1, %2 \n" " beqz %1, 1b \n" + " b 3f \n" "2: \n" + __WEAK_LLSC_MB + "3: \n" : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) : "memory"); diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index d3cafd1a8d90..2c120cd619b5 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -67,4 +67,34 @@ SYM_CODE_START(kernel_entry) # kernel entry point SYM_CODE_END(kernel_entry) +#ifdef CONFIG_SMP + +/* + * SMP slave cpus entry point. Board specific code for bootstrap calls this + * function after setting up the stack and tp registers. + */ +SYM_CODE_START(smpboot_entry) + li.d t0, CSR_DMW0_INIT # UC, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN1 + li.w t0, 0xb0 # PLV=0, IE=0, PG=1 + csrwr t0, LOONGARCH_CSR_CRMD + li.w t0, 0x04 # PLV=0, PIE=1, PWE=0 + csrwr t0, LOONGARCH_CSR_PRMD + li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 + csrwr t0, LOONGARCH_CSR_EUEN + + la.abs t0, cpuboot_data + ld.d sp, t0, CPU_BOOT_STACK + ld.d tp, t0, CPU_BOOT_TINFO + + la.abs t0, 0f + jirl zero, t0, 0 +0: + bl start_secondary +SYM_CODE_END(smpboot_entry) + +#endif /* CONFIG_SMP */ + SYM_ENTRY(kernel_entry_end, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index b56f43582283..0b83d1ea590f 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -34,6 +34,9 @@ atomic_t irq_err_count; int arch_show_interrupts(struct seq_file *p, int prec) { +#ifdef CONFIG_SMP + show_ipi_list(p, prec); +#endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); return 0; } diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index be461efeb7a6..e0b5f3b031b1 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -35,6 +35,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int fp_version = cpu_data[n].fpu_vers; struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + /* * For the first processor also print the system type */ diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index f39d911254a2..d22de0669b4b 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -36,16 +36,28 @@ EXPORT_SYMBOL(pm_power_off); void machine_halt(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif machine_hang(); } void machine_power_off(void) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif pm_power_off(); } void machine_restart(char *command) { +#ifdef CONFIG_SMP + preempt_disable(); + smp_send_stop(); +#endif do_kernel_restart(command); pm_restart(); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index cc9a2d335715..6d5cebfe1085 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -24,6 +24,7 @@ #include #include #include +#include DEFINE_PER_CPU(unsigned long, kernelsp); unsigned long fw_arg0, fw_arg1, fw_arg2; @@ -225,6 +226,29 @@ static int __init reserve_memblock_reserved_regions(void) } arch_initcall(reserve_memblock_reserved_regions); +#ifdef CONFIG_SMP +static void __init prefill_possible_map(void) +{ + int i, possible; + + possible = num_processors + disabled_cpus; + if (possible > nr_cpu_ids) + possible = nr_cpu_ids; + + pr_info("SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, max((possible - num_processors), 0)); + + for (i = 0; i < possible; i++) + set_cpu_possible(i, true); + for (; i < NR_CPUS; i++) + set_cpu_possible(i, false); + + nr_cpu_ids = possible; +} +#else +static inline void prefill_possible_map(void) {} +#endif + void __init setup_arch(char **cmdline_p) { cpu_probe(); @@ -238,6 +262,8 @@ void __init setup_arch(char **cmdline_p) arch_mem_init(cmdline_p); resource_init(); + plat_smp_setup(); + prefill_possible_map(); paging_init(); } diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c new file mode 100644 index 000000000000..39b7967795e5 --- /dev/null +++ b/arch/loongarch/kernel/smp.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ +EXPORT_SYMBOL(__cpu_number_map); + +int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ +EXPORT_SYMBOL(__cpu_logical_map); + +/* Number of threads (siblings) per CPU core */ +int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); + +/* Representing the threads (siblings) of each logical CPU */ +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_sibling_map); + +/* Representing the core map of multi-core chips of each logical CPU */ +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_core_map); + +static DECLARE_COMPLETION(cpu_starting); +static DECLARE_COMPLETION(cpu_running); + +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + +/* representing cpus for which core maps can be computed */ +static cpumask_t cpu_core_setup_map; + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_sibling_setup_map); + + if (smp_num_siblings > 1) { + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); + } + } + } else + cpumask_set_cpu(cpu, &cpu_sibling_map[cpu]); +} + +static inline void set_cpu_core_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_core_setup_map); + + for_each_cpu(i, &cpu_core_setup_map) { + if (cpu_data[cpu].package == cpu_data[i].package) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } + } +} + +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpus_are_siblings(i, k)) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + for_each_online_cpu(i) + cpumask_andnot(&cpu_foreign_map[i], + &temp_foreign_map, &cpu_sibling_map[i]); +} + +const struct plat_smp_ops *mp_ops; +EXPORT_SYMBOL(mp_ops); + +void register_smp_ops(const struct plat_smp_ops *ops) +{ + if (mp_ops) + printk(KERN_WARNING "Overriding previously set SMP ops\n"); + + mp_ops = ops; +} + +/* + * First C code run on the secondary CPUs after being started up by + * the master. + */ +asmlinkage void start_secondary(void) +{ + unsigned int cpu; + + sync_counter(); + cpu = smp_processor_id(); + set_my_cpu_offset(per_cpu_offset(cpu)); + + cpu_probe(); + constant_clockevent_init(); + mp_ops->init_secondary(); + + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + + notify_cpu_starting(cpu); + + /* Notify boot CPU that we're starting */ + complete(&cpu_starting); + + /* The CPU is running, now mark it online */ + set_cpu_online(cpu, true); + + calculate_cpu_foreign_map(); + + /* + * Notify boot CPU that we're up & online and it can safely return + * from __cpu_up + */ + complete(&cpu_running); + + /* + * irq will be enabled in ->smp_finish(), enabling it too early + * is dangerous. + */ + WARN_ON_ONCE(!irqs_disabled()); + mp_ops->smp_finish(); + + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} + +static void stop_this_cpu(void *dummy) +{ + /* + * Remove this CPU: + */ + + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); + local_irq_disable(); + while (1); +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 0); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* called from main before smp_init() */ +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + init_new_context(current, &init_mm); + current_thread_info()->cpu = 0; + mp_ops->prepare_cpus(max_cpus); + set_cpu_sibling_map(0); + set_cpu_core_map(0); + calculate_cpu_foreign_map(); +#ifndef CONFIG_HOTPLUG_CPU + init_cpu_present(cpu_possible_mask); +#endif +} + +/* Preload SMP state for boot cpu */ +void smp_prepare_boot_cpu(void) +{ + unsigned int cpu; + + set_cpu_possible(0, true); + set_cpu_online(0, true); + set_my_cpu_offset(per_cpu_offset(0)); + + for_each_possible_cpu(cpu) + set_cpu_numa_node(cpu, 0); +} + +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + int err; + + err = mp_ops->boot_secondary(cpu, tidle); + if (err) + return err; + + /* Wait for CPU to start and be ready to sync counters */ + if (!wait_for_completion_timeout(&cpu_starting, + msecs_to_jiffies(5000))) { + pr_crit("CPU%u: failed to start\n", cpu); + return -EIO; + } + + /* Wait for CPU to finish startup & mark itself online before return */ + wait_for_completion(&cpu_running); + return 0; +} + +/* Not really SMP stuff ... */ +int setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +static void flush_tlb_all_ipi(void *info) +{ + local_flush_tlb_all(); +} + +void flush_tlb_all(void) +{ + on_each_cpu(flush_tlb_all_ipi, NULL, 1); +} + +static void flush_tlb_mm_ipi(void *mm) +{ + local_flush_tlb_mm((struct mm_struct *)mm); +} + +/* + * Special Variant of smp_call_function for use by TLB functions: + * + * o No return value + * o collapses to normal function call on UP kernels + * o collapses to normal function call on systems with a single shared + * primary cache. + */ +static inline void smp_on_other_tlbs(void (*func) (void *info), void *info) +{ + smp_call_function(func, info, 1); +} + +static inline void smp_on_each_tlb(void (*func) (void *info), void *info) +{ + preempt_disable(); + + smp_on_other_tlbs(func, info); + func(info); + + preempt_enable(); +} + +/* + * The following tlb flush calls are invoked when old translations are + * being torn down, or pte attributes are changing. For single threaded + * address spaces, a new context is obtained on the current cpu, and tlb + * context on other cpus are invalidated to force a new context allocation + * at switch_mm time, should the mm ever be used on other cpus. For + * multithreaded address spaces, intercpu interrupts have to be sent. + * Another case where intercpu interrupts are required is when the target + * mm might be active on another cpu (eg debuggers doing the flushes on + * behalf of debugees, kswapd stealing pages from another process etc). + */ + +void flush_tlb_mm(struct mm_struct *mm) +{ + if (atomic_read(&mm->mm_users) == 0) + return; /* happens as a result of exit_mmap() */ + + preempt_disable(); + + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { + on_each_cpu_mask(mm_cpumask(mm), flush_tlb_mm_ipi, mm, 1); + } else { + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id() && cpu_context(cpu, mm)) + cpu_context(cpu, mm) = 0; + } + local_flush_tlb_mm(mm); + } + + preempt_enable(); +} + +struct flush_tlb_data { + struct vm_area_struct *vma; + unsigned long addr1; + unsigned long addr2; +}; + +static void flush_tlb_range_ipi(void *info) +{ + struct flush_tlb_data *fd = info; + + local_flush_tlb_range(fd->vma, fd->addr1, fd->addr2); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + preempt_disable(); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { + struct flush_tlb_data fd = { + .vma = vma, + .addr1 = start, + .addr2 = end, + }; + + on_each_cpu_mask(mm_cpumask(mm), flush_tlb_range_ipi, &fd, 1); + } else { + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id() && cpu_context(cpu, mm)) + cpu_context(cpu, mm) = 0; + } + local_flush_tlb_range(vma, start, end); + } + preempt_enable(); +} + +static void flush_tlb_kernel_range_ipi(void *info) +{ + struct flush_tlb_data *fd = info; + + local_flush_tlb_kernel_range(fd->addr1, fd->addr2); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct flush_tlb_data fd = { + .addr1 = start, + .addr2 = end, + }; + + on_each_cpu(flush_tlb_kernel_range_ipi, &fd, 1); +} + +static void flush_tlb_page_ipi(void *info) +{ + struct flush_tlb_data *fd = info; + + local_flush_tlb_page(fd->vma, fd->addr1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + preempt_disable(); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { + struct flush_tlb_data fd = { + .vma = vma, + .addr1 = page, + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), flush_tlb_page_ipi, &fd, 1); + } else { + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id() && cpu_context(cpu, vma->vm_mm)) + cpu_context(cpu, vma->vm_mm) = 0; + } + local_flush_tlb_page(vma, page); + } + preempt_enable(); +} + +static void flush_tlb_one_ipi(void *info) +{ + unsigned long vaddr = (unsigned long) info; + + local_flush_tlb_one(vaddr); +} + +void flush_tlb_one(unsigned long vaddr) +{ + smp_on_each_tlb(flush_tlb_one_ipi, (void *) vaddr); +} + +EXPORT_SYMBOL(flush_tlb_page); +EXPORT_SYMBOL(flush_tlb_one); diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index 3b2cbb95875b..ab1a75c0b5a6 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -1,13 +1,52 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include +#include +#include #include -static struct cpu cpu_device; +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +#ifdef CONFIG_HOTPLUG_CPU +int arch_register_cpu(int cpu) +{ + int ret; + struct cpu *c = &per_cpu(cpu_devices, cpu); + + c->hotpluggable = 1; + ret = register_cpu(c, cpu); + if (ret < 0) + pr_warn("register_cpu %d failed (%d)\n", cpu, ret); + + return ret; +} +EXPORT_SYMBOL(arch_register_cpu); + +void arch_unregister_cpu(int cpu) +{ + struct cpu *c = &per_cpu(cpu_devices, cpu); + + c->hotpluggable = 0; + unregister_cpu(c); +} +EXPORT_SYMBOL(arch_unregister_cpu); +#endif static int __init topology_init(void) { - return register_cpu(&cpu_device, 0); + int i, ret; + + for_each_present_cpu(i) { + struct cpu *c = &per_cpu(cpu_devices, i); + + c->hotpluggable = !!i; + ret = register_cpu(c, i); + if (ret < 0) + pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret); + } + + return 0; } subsys_initcall(topology_init); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 1c88fe808286..6348a28aac5a 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -72,6 +72,9 @@ SECTIONS .exit.data : { EXIT_DATA } +#ifdef CONFIG_SMP + PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT) +#endif /* * Align to 64K in attempt to eliminate holes before the diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile index d89d3c8fe382..134ebebaf5fb 100644 --- a/arch/loongarch/loongson64/Makefile +++ b/arch/loongarch/loongson64/Makefile @@ -3,3 +3,5 @@ # obj-y += setup.o init.o env.o reset.o irq.o mem.o rtc.o + +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 42f0988a1e24..cabb6049fc59 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -127,4 +128,6 @@ void __init platform_init(void) pr_info("The BIOS Version: %s\n",b_info.bios_version); efi_runtime_init(); + + register_smp_ops(&loongson3_smp_ops); } diff --git a/arch/loongarch/loongson64/irq.c b/arch/loongarch/loongson64/irq.c index 981f6447a7f9..f360bad20e4d 100644 --- a/arch/loongarch/loongson64/irq.c +++ b/arch/loongarch/loongson64/irq.c @@ -79,10 +79,20 @@ void __init setup_IRQ(void) void __init arch_init_irq(void) { + int r, ipi_irq; + static int ipi_dummy_dev; + clear_csr_ecfg(ECFG0_IM); clear_csr_estat(ESTATF_IP); setup_IRQ(); +#ifdef CONFIG_SMP + ipi_irq = get_ipi_irq(); + irq_set_percpu_devid(ipi_irq); + r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + if (r < 0) + panic("IPI IRQ request failed\n"); +#endif set_csr_ecfg(ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | ECFGF_IPI | ECFGF_PMC); } diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c new file mode 100644 index 000000000000..8ee38cced428 --- /dev/null +++ b/arch/loongarch/loongson64/smp.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct secondary_data cpuboot_data; + +static DEFINE_PER_CPU(int, cpu_state); +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + +#define MAX_CPUS 64 + +#define STATUS 0x00 +#define EN 0x04 +#define SET 0x08 +#define CLEAR 0x0c +#define MBUF 0x20 + +extern unsigned long long smp_group[MAX_PACKAGES]; +static u32 core_offsets[4] = {0x000, 0x100, 0x200, 0x300}; + +static volatile void *ipi_set_regs[MAX_CPUS]; +static volatile void *ipi_clear_regs[MAX_CPUS]; +static volatile void *ipi_status_regs[MAX_CPUS]; +static volatile void *ipi_en_regs[MAX_CPUS]; +static volatile void *ipi_mailbox_buf[MAX_CPUS]; + +static u32 (*ipi_read_clear)(int cpu); +static void (*ipi_write_action)(int cpu, u32 action); + +enum ipi_msg_type { + IPI_RESCHEDULE, + IPI_CALL_FUNCTION, +}; + +static const char *ipi_types[NR_IPI] __tracepoint_string = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNCTION] = "Function call interrupts", +}; + +void show_ipi_list(struct seq_file *p, int prec) +{ + unsigned int cpu, i; + + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).ipi_irqs[i]); + seq_printf(p, " LoongArch %d %s\n", i + 1, ipi_types[i]); + } +} + +/* Send mail buffer via Mail_Send */ +static void csr_mail_send(uint64_t data, int cpu, int mailbox) +{ + uint64_t val; + + /* Send high 32 bits */ + val = IOCSR_MBUF_SEND_BLOCKING; + val |= (IOCSR_MBUF_SEND_BOX_HI(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT); + val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT); + val |= (data & IOCSR_MBUF_SEND_H32_MASK); + iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND); + + /* Send low 32 bits */ + val = IOCSR_MBUF_SEND_BLOCKING; + val |= (IOCSR_MBUF_SEND_BOX_LO(mailbox) << IOCSR_MBUF_SEND_BOX_SHIFT); + val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT); + val |= (data << IOCSR_MBUF_SEND_BUF_SHIFT); + iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND); +}; + +static u32 csr_ipi_read_clear(int cpu) +{ + u32 action; + + /* Load the ipi register to figure out what we're supposed to do */ + action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS); + /* Clear the ipi register to clear the interrupt */ + iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR); + + return action; +} + +static void csr_ipi_write_action(int cpu, u32 action) +{ + unsigned int irq = 0; + + while ((irq = ffs(action))) { + uint32_t val = IOCSR_IPI_SEND_BLOCKING; + val |= (irq - 1); + val |= (cpu << IOCSR_IPI_SEND_CPU_SHIFT); + iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND); + action &= ~BIT(irq - 1); + } +} + +static u32 legacy_ipi_read_clear(int cpu) +{ + u32 action; + + /* Load the ipi register to figure out what we're supposed to do */ + action = xconf_readl(ipi_status_regs[cpu]); + /* Clear the ipi register to clear the interrupt */ + xconf_writel(action, ipi_clear_regs[cpu]); + + return action; +} + +static void legacy_ipi_write_action(int cpu, u32 action) +{ + xconf_writel((u32)action, ipi_set_regs[cpu]); +} + +static void ipi_method_init(void) +{ + if (cpu_has_csripi) { + ipi_read_clear = csr_ipi_read_clear; + ipi_write_action = csr_ipi_write_action; + } else { + ipi_read_clear = legacy_ipi_read_clear; + ipi_write_action = legacy_ipi_write_action; + } +} + +static void ipi_regaddrs_init(void) +{ + int i, node, core; + + for (i = 0; i< MAX_CPUS; i++) { + node = i / 4; + core = i % 4; + ipi_set_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + SET); + ipi_clear_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + CLEAR); + ipi_status_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + STATUS); + ipi_en_regs[i] = (void *) + (smp_group[node] + core_offsets[core] + EN); + ipi_mailbox_buf[i] = (void *) + (smp_group[node] + core_offsets[core] + MBUF); + } +} + +/* + * Simple enough, just poke the appropriate ipi register + */ +static void loongson3_send_ipi_single(int cpu, unsigned int action) +{ + ipi_write_action(cpu_logical_map(cpu), (u32)action); +} + +static void +loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +{ + unsigned int i; + + for_each_cpu(i, mask) + ipi_write_action(cpu_logical_map(i), (u32)action); +} + +irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +{ + unsigned int action; + unsigned int cpu = smp_processor_id(); + + action = ipi_read_clear(cpu_logical_map(cpu)); + + smp_mb(); + + if (action & SMP_RESCHEDULE) { + scheduler_ipi(); + per_cpu(irq_stat, cpu).ipi_irqs[IPI_RESCHEDULE]++; + } + + if (action & SMP_CALL_FUNCTION) { + generic_smp_call_function_interrupt(); + per_cpu(irq_stat, cpu).ipi_irqs[IPI_CALL_FUNCTION]++; + } + + return IRQ_HANDLED; +} + +/* + * SMP init and finish on secondary CPUs + */ +static void loongson3_init_secondary(void) +{ + unsigned int cpu = smp_processor_id(); + unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | + ECFGF_IPI | ECFGF_PMC | ECFGF_TIMER; + + change_csr_ecfg(ECFG0_IM, imask); + + if (cpu_has_csripi) + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); + else + xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(cpu)]); + + per_cpu(cpu_state, cpu) = CPU_ONLINE; + cpu_data[cpu].core = + cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; + cpu_data[cpu].package = + cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; +} + +static void loongson3_smp_finish(void) +{ + int cpu = smp_processor_id(); + + local_irq_enable(); + + if (cpu_has_csripi) + iocsr_write64(0, LOONGARCH_IOCSR_MBUF0); + else + xconf_writeq(0, ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0); + + pr_info("CPU#%d finished\n", smp_processor_id()); +} + +static void __init loongson3_smp_setup(void) +{ + ipi_method_init(); + ipi_regaddrs_init(); + + if (cpu_has_csripi) + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); + else + xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(0)]); + + pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); + + cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; + cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; +} + +static void __init loongson3_prepare_cpus(unsigned int max_cpus) +{ + int i = 0; + + for (i = 0; i < loongson_sysconf.nr_cpus; i++) { + set_cpu_present(i, true); + + if (cpu_has_csripi) + csr_mail_send(0, __cpu_logical_map[i], 0); + else + xconf_writeq(0, ipi_mailbox_buf[__cpu_logical_map[i]]+0x0); + } + + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; +} + +/* + * Setup the PC, SP, and TP of a secondary processor and start it runing! + */ +static int loongson3_boot_secondary(int cpu, struct task_struct *idle) +{ + unsigned long entry; + + pr_info("Booting CPU#%d...\n", cpu); + + entry = __pa_symbol((unsigned long)&smpboot_entry); + cpuboot_data.stack = (unsigned long)__KSTK_TOS(idle); + cpuboot_data.thread_info = (unsigned long)task_thread_info(idle); + + if (cpu_has_csripi) + csr_mail_send(entry, cpu_logical_map(cpu), 0); + else + xconf_writeq(entry, ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0); + + loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU + +static bool io_master(int cpu) +{ + int i, node, master; + + if (cpu == 0) + return true; + + for (i = 1; i < loongson_sysconf.nr_io_pics; i++) { + node = eiointc_get_node(i); + master = cpu_number_map(node * CORES_PER_EIO_NODE); + if (cpu == master) + return true; + } + + return false; +} + +static int loongson3_cpu_disable(void) +{ + unsigned long flags; + unsigned int cpu = smp_processor_id(); + + if (io_master(cpu)) + return -EBUSY; + + set_cpu_online(cpu, false); + calculate_cpu_foreign_map(); + local_irq_save(flags); + irq_migrate_all_off_this_cpu(); + clear_csr_ecfg(ECFG0_IM); + local_irq_restore(flags); + local_flush_tlb_all(); + + return 0; +} + + +static void loongson3_cpu_die(unsigned int cpu) +{ + while (per_cpu(cpu_state, cpu) != CPU_DEAD) + cpu_relax(); + + mb(); +} + +/* To shutdown a core in Loongson 3, the target core should go to XKPRANGE + * and flush all L1 entries at first. Then, another core (usually Core 0) + * can safely disable the clock of the target core. loongson3_play_dead() + * is called via XKPRANGE (uncached and unmmaped) */ +static void loongson3_play_dead(int *state_addr) +{ + register int val; + register long cpuid, core, node, count; + register void *addr, *base; + register void (*init_fn)(void); + + __asm__ __volatile__( + " li.d %[addr], 0x8000000000000000\n" + "1: cacop 0x8, %[addr], 0 \n" /* flush L1 ICache */ + " cacop 0x8, %[addr], 1 \n" + " cacop 0x8, %[addr], 2 \n" + " cacop 0x8, %[addr], 3 \n" + " cacop 0x9, %[addr], 0 \n" /* flush L1 DCache */ + " cacop 0x9, %[addr], 1 \n" + " cacop 0x9, %[addr], 2 \n" + " cacop 0x9, %[addr], 3 \n" + " addi.w %[sets], %[sets], -1 \n" + " addi.d %[addr], %[addr], 0x40 \n" + " bnez %[sets], 1b \n" + " li.d %[addr], 0x8000000000000000\n" + "2: cacop 0xa, %[addr], 0 \n" /* flush L1 VCache */ + " cacop 0xa, %[addr], 1 \n" + " cacop 0xa, %[addr], 2 \n" + " cacop 0xa, %[addr], 3 \n" + " cacop 0xa, %[addr], 4 \n" + " cacop 0xa, %[addr], 5 \n" + " cacop 0xa, %[addr], 6 \n" + " cacop 0xa, %[addr], 7 \n" + " cacop 0xa, %[addr], 8 \n" + " cacop 0xa, %[addr], 9 \n" + " cacop 0xa, %[addr], 10 \n" + " cacop 0xa, %[addr], 11 \n" + " cacop 0xa, %[addr], 12 \n" + " cacop 0xa, %[addr], 13 \n" + " cacop 0xa, %[addr], 14 \n" + " cacop 0xa, %[addr], 15 \n" + " addi.w %[vsets], %[vsets], -1 \n" + " addi.d %[addr], %[addr], 0x40 \n" + " bnez %[vsets], 2b \n" + " li.w %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ + " st.w %[val], %[state_addr], 0 \n" + " dbar 0 \n" + " cacop 0x11, %[state_addr], 0 \n" /* flush entry of *state_addr */ + : [addr] "=&r" (addr), [val] "=&r" (val) + : [state_addr] "r" (state_addr), + [sets] "r" (cpu_data[smp_processor_id()].dcache.sets), + [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets)); + + local_irq_enable(); + change_csr_ecfg(ECFG0_IM, ECFGF_IPI); + + __asm__ __volatile__( + " idle 0 \n" + " csrrd %[cpuid], 0x20 \n" + " andi %[cpuid], %[cpuid], 0x1ff \n" + " li.d %[base], 0x800000001fe01000 \n" + " andi %[core], %[cpuid], 0x3 \n" + " slli.w %[core], %[core], 8 \n" /* Get core id */ + " or %[base], %[base], %[core] \n" + " andi %[node], %[cpuid], 0x3c \n" + " slli.d %[node], %[node], 42 \n" /* Get node id */ + " or %[base], %[base], %[node] \n" + " ld.d %[init_fn], %[base], 0x20 \n" /* Get init PC */ + " nop \n" + : [core] "=&r" (core), [node] "=&r" (node), + [base] "=&r" (base), [cpuid] "=&r" (cpuid), + [count] "=&r" (count), [init_fn] "=&r" (addr) + : /* No Input */ + : "a1"); + init_fn = __va(addr); + + init_fn(); + unreachable(); +} + +void play_dead(void) +{ + int *state_addr; + unsigned int cpu = smp_processor_id(); + void (*play_dead_uncached)(int *); + + idle_task_exit(); + play_dead_uncached = (void *)TO_UNCACHE(__pa((unsigned long)loongson3_play_dead)); + state_addr = &per_cpu(cpu_state, cpu); + mb(); + play_dead_uncached(state_addr); +} + +static int loongson3_disable_clock(unsigned int cpu) +{ + uint64_t core_id = cpu_data[cpu].core; + uint64_t package_id = cpu_data[cpu].package; + + LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3)); + + return 0; +} + +static int loongson3_enable_clock(unsigned int cpu) +{ + uint64_t core_id = cpu_data[cpu].core; + uint64_t package_id = cpu_data[cpu].package; + + LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3); + + return 0; +} + +static int register_loongson3_notifier(void) +{ + return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE, + "loongarch/loongson:prepare", + loongson3_enable_clock, + loongson3_disable_clock); +} +early_initcall(register_loongson3_notifier); + +#endif + +const struct plat_smp_ops loongson3_smp_ops = { + .send_ipi_single = loongson3_send_ipi_single, + .send_ipi_mask = loongson3_send_ipi_mask, + .smp_setup = loongson3_smp_setup, + .prepare_cpus = loongson3_prepare_cpus, + .boot_secondary = loongson3_boot_secondary, + .init_secondary = loongson3_init_secondary, + .smp_finish = loongson3_smp_finish, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = loongson3_cpu_disable, + .cpu_die = loongson3_cpu_die, +#endif +}; + +/* + * Power management + */ +#ifdef CONFIG_PM + +static int loongson3_ipi_suspend(void) +{ + return 0; +} + +static void loongson3_ipi_resume(void) +{ + if (cpu_has_csripi) + iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); + else + xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(0)]); +} + +static struct syscore_ops loongson3_ipi_syscore_ops = { + .resume = loongson3_ipi_resume, + .suspend = loongson3_ipi_suspend, +}; + +/* + * Enable boot cpu ipi before enabling nonboot cpus + * during syscore_resume. + * */ +static int __init ipi_pm_init(void) +{ + register_syscore_ops(&loongson3_ipi_syscore_ops); + return 0; +} + +core_initcall(ipi_pm_init); +#endif diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 4ca1f1d54190..c8a6e0595593 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -92,7 +92,14 @@ vmalloc_done_load: slli.d t0, t0, _PTE_T_LOG2 add.d t1, ra, t0 +#ifdef CONFIG_SMP +smp_pgtable_change_load: +#endif +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif tlbsrch srli.d ra, t0, _PAGE_PRESENT_SHIFT @@ -100,7 +107,12 @@ vmalloc_done_load: beq ra, $r0, nopage_tlb_load ori t0, t0, _PAGE_VALID +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, smp_pgtable_change_load +#else st.d t0, t1, 0 +#endif ori t1, t1, 8 xori t1, t1, 8 ld.d t0, t1, 0 @@ -124,14 +136,24 @@ vmalloc_load: * spots a huge page. */ tlb_huge_update_load: +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif srli.d ra, t0, _PAGE_PRESENT_SHIFT andi ra, ra, 1 beq ra, $r0, nopage_tlb_load tlbsrch ori t0, t0, _PAGE_VALID +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, tlb_huge_update_load + ld.d t0, t1, 0 +#else st.d t0, t1, 0 +#endif addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) addi.d ra, t1, 0 csrxchg ra, t1, LOONGARCH_CSR_TLBIDX @@ -177,6 +199,7 @@ tlb_huge_update_load: csrxchg t1, t0, LOONGARCH_CSR_TLBIDX nopage_tlb_load: + dbar 0 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_0 jirl $r0, t0, 0 @@ -233,7 +256,14 @@ vmalloc_done_store: slli.d t0, t0, _PTE_T_LOG2 add.d t1, ra, t0 +#ifdef CONFIG_SMP +smp_pgtable_change_store: +#endif +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif tlbsrch srli.d ra, t0, _PAGE_PRESENT_SHIFT @@ -242,7 +272,12 @@ vmalloc_done_store: bne ra, $r0, nopage_tlb_store ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, smp_pgtable_change_store +#else st.d t0, t1, 0 +#endif ori t1, t1, 8 xori t1, t1, 8 @@ -267,7 +302,11 @@ vmalloc_store: * spots a huge page. */ tlb_huge_update_store: +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif srli.d ra, t0, _PAGE_PRESENT_SHIFT andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) @@ -276,7 +315,13 @@ tlb_huge_update_store: tlbsrch ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, tlb_huge_update_store + ld.d t0, t1, 0 +#else st.d t0, t1, 0 +#endif addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) addi.d ra, t1, 0 csrxchg ra, t1, LOONGARCH_CSR_TLBIDX @@ -322,6 +367,7 @@ tlb_huge_update_store: csrxchg t1, t0, LOONGARCH_CSR_TLBIDX nopage_tlb_store: + dbar 0 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jirl $r0, t0, 0 @@ -377,7 +423,14 @@ vmalloc_done_modify: slli.d t0, t0, _PTE_T_LOG2 add.d t1, ra, t0 +#ifdef CONFIG_SMP +smp_pgtable_change_modify: +#endif +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif tlbsrch srli.d ra, t0, _PAGE_WRITE_SHIFT @@ -385,7 +438,12 @@ vmalloc_done_modify: beq ra, $r0, nopage_tlb_modify ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, smp_pgtable_change_modify +#else st.d t0, t1, 0 +#endif ori t1, t1, 8 xori t1, t1, 8 ld.d t0, t1, 0 @@ -409,7 +467,11 @@ vmalloc_modify: * build_tlbchange_handler_head spots a huge page. */ tlb_huge_update_modify: +#ifdef CONFIG_SMP + ll.d t0, t1, 0 +#else ld.d t0, t1, 0 +#endif srli.d ra, t0, _PAGE_WRITE_SHIFT andi ra, ra, 1 @@ -418,7 +480,13 @@ tlb_huge_update_modify: tlbsrch ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) +#ifdef CONFIG_SMP + sc.d t0, t1, 0 + beq t0, $r0, tlb_huge_update_modify + ld.d t0, t1, 0 +#else st.d t0, t1, 0 +#endif /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -458,6 +526,7 @@ tlb_huge_update_modify: csrxchg t1, t0, LOONGARCH_CSR_TLBIDX nopage_tlb_modify: + dbar 0 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jirl $r0, t0, 0 diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 16122da6f950..793f36684a79 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -94,6 +94,7 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, + CPUHP_LOONGARCH_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, -- Gitee From 66fb243c4f14d03fe0de5e0d29494de31d452370 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 037/241] LoongArch: Add basic NUMA support Change-Id: I06d5c7e6940f682550edf235a13074212a8ff3c9 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 38 ++ arch/loongarch/include/asm/bootinfo.h | 2 + .../include/asm/mach-loongson64/topology.h | 26 + arch/loongarch/include/asm/mmzone.h | 18 + arch/loongarch/include/asm/numa.h | 69 +++ arch/loongarch/include/asm/pgtable.h | 12 + arch/loongarch/include/asm/topology.h | 1 + arch/loongarch/kernel/acpi.c | 96 ++++ arch/loongarch/kernel/module.c | 1 + arch/loongarch/kernel/setup.c | 8 +- arch/loongarch/kernel/smp.c | 29 +- arch/loongarch/kernel/topology.c | 5 + arch/loongarch/kernel/traps.c | 4 +- arch/loongarch/loongson64/Makefile | 4 +- arch/loongarch/loongson64/dma.c | 46 ++ arch/loongarch/loongson64/init.c | 3 + arch/loongarch/loongson64/irq.c | 1 + arch/loongarch/loongson64/numa.c | 476 ++++++++++++++++++ arch/loongarch/loongson64/smp.c | 6 + arch/loongarch/mm/init.c | 13 + arch/loongarch/mm/tlb.c | 37 +- arch/loongarch/pci/acpi.c | 3 + 22 files changed, 883 insertions(+), 15 deletions(-) create mode 100644 arch/loongarch/include/asm/mach-loongson64/topology.h create mode 100644 arch/loongarch/include/asm/mmzone.h create mode 100644 arch/loongarch/include/asm/numa.h create mode 100644 arch/loongarch/loongson64/dma.c create mode 100644 arch/loongarch/loongson64/numa.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 3d70ab225f5b..27b32771dfab 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -35,6 +35,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_SUPPORTS_ACPI + select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_QUEUED_RWLOCKS @@ -125,6 +126,7 @@ config MACH_LOONGSON64 select SYS_HAS_CPU_LOONGSON64 select SYS_SUPPORTS_SMP select SYS_SUPPORTS_HOTPLUG_CPU + select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL select ZONE_DMA32 help @@ -170,6 +172,7 @@ choice config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 + select ARCH_HAS_PHYS_TO_DMA select CPU_SUPPORTS_64BIT_KERNEL select GPIOLIB select SWIOTLB @@ -336,6 +339,7 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_FLATMEM_ENABLE def_bool y + depends on !NUMA config ARCH_SPARSEMEM_ENABLE def_bool y @@ -345,6 +349,40 @@ config ARCH_SPARSEMEM_ENABLE or have huge holes in the physical address space for other reasons. See for more. +config NUMA + bool "NUMA Support" + depends on SYS_SUPPORTS_NUMA + select ACPI_NUMA if ACPI + help + Say Y to compile the kernel with NUMA (Non-Uniform Memory Access) + support. This option improves performance on systems with more + than one NUMA node; on single node systems it is generally better + to leave it disabled. + +config SYS_SUPPORTS_NUMA + bool + +config NODES_SHIFT + int + default "6" + depends on NEED_MULTIPLE_NODES + +config USE_PERCPU_NUMA_NODE_ID + def_bool y + depends on NUMA + +config HAVE_SETUP_PER_CPU_AREA + def_bool y + depends on NUMA + +config NEED_PER_CPU_EMBED_FIRST_CHUNK + def_bool y + depends on NUMA + +config NEED_PER_CPU_PAGE_FIRST_CHUNK + def_bool y + depends on NUMA + config CMDLINE string "Built-in kernel command line" help diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 60a0c9f39a1e..2c1b4200a6ab 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -21,6 +21,8 @@ extern void early_init(void); extern void init_environ(void); extern void platform_init(void); extern void plat_mem_setup(void); +extern void plat_swiotlb_setup(void); +extern int __init init_numa_memory(void); struct loongson_board_info { int bios_size; diff --git a/arch/loongarch/include/asm/mach-loongson64/topology.h b/arch/loongarch/include/asm/mach-loongson64/topology.h new file mode 100644 index 000000000000..5cda49ca74ab --- /dev/null +++ b/arch/loongarch/include/asm/mach-loongson64/topology.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_MACH_TOPOLOGY_H +#define _ASM_MACH_TOPOLOGY_H + +#ifdef CONFIG_NUMA + +extern cpumask_t cpus_on_node[]; + +#define cpumask_of_node(node) (&cpus_on_node[node]) + +struct pci_bus; +extern int pcibus_to_node(struct pci_bus *); + +#define cpumask_of_pcibus(bus) (cpu_online_mask) + +extern unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES]; + +void numa_set_distance(int from, int to, int distance); + +#define node_distance(from, to) (node_distances[(from)][(to)]) + +#else +#define pcibus_to_node(bus) 0 +#endif + +#endif /* _ASM_MACH_TOPOLOGY_H */ diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h new file mode 100644 index 000000000000..db106f92dd9f --- /dev/null +++ b/arch/loongarch/include/asm/mmzone.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Written by Huacai Chen (chenhuacai@loongson.cn) + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_MMZONE_H_ +#define _ASM_MMZONE_H_ + +#include +#include + +extern struct pglist_data *node_data[]; + +#define NODE_DATA(nid) (node_data[(nid)]) + +extern void setup_zero_pages(void); + +#endif /* _ASM_MMZONE_H_ */ diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h new file mode 100644 index 000000000000..f45f6c09fdc5 --- /dev/null +++ b/arch/loongarch/include/asm/numa.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: lvjianmin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_LOONGARCH_NUMA_H +#define _ASM_LOONGARCH_NUMA_H + +#include + +#define NODE_ADDRSPACE_SHIFT 44 + +#define pa_to_nid(addr) (((addr) & 0xf00000000000) >> NODE_ADDRSPACE_SHIFT) +#define nid_to_addrbase(nid) (_ULCAST_(nid) << NODE_ADDRSPACE_SHIFT) + +#ifdef CONFIG_NUMA + +extern int numa_off; +extern s16 __cpuid_to_node[CONFIG_NR_CPUS]; +extern nodemask_t numa_nodes_parsed __initdata; + +struct numa_memblk { + u64 start; + u64 end; + int nid; +}; + +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) +struct numa_meminfo { + int nr_blks; + struct numa_memblk blk[NR_NODE_MEMBLKS]; +}; + +extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); + +extern void __init early_numa_add_cpu(int cpuid, s16 node); +extern void numa_add_cpu(unsigned int cpu); +extern void numa_remove_cpu(unsigned int cpu); + +static inline void numa_clear_node(int cpu) +{ +} + +static inline void set_cpuid_to_node(int cpuid, s16 node) +{ + __cpuid_to_node[cpuid] = node; +} + +extern int early_cpu_to_node(int cpu); + +#else + +static inline void early_numa_add_cpu(int cpuid, s16 node) { } +static inline void numa_add_cpu(unsigned int cpu) { } +static inline void numa_remove_cpu(unsigned int cpu) { } + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} + +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_LOONGARCH_NUMA_H */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 687b1a5fe2a2..d80cfb4cd17f 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -303,6 +303,18 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef CONFIG_NUMA_BALANCING +static inline long pte_protnone(pte_t pte) +{ + return (pte_val(pte) & _PAGE_PROTNONE); +} + +static inline long pmd_protnone(pmd_t pmd) +{ + return (pmd_val(pmd) & _PAGE_PROTNONE); +} +#endif /* CONFIG_NUMA_BALANCING */ + /* * We provide our own get_unmapped area to cope with the virtual aliasing * constraints placed on us by the cache architecture. diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h index 9d3ababc7677..3c48c64e1ca1 100644 --- a/arch/loongarch/include/asm/topology.h +++ b/arch/loongarch/include/asm/topology.h @@ -5,6 +5,7 @@ #ifndef __ASM_TOPOLOGY_H #define __ASM_TOPOLOGY_H +#include #include #ifdef CONFIG_SMP diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 5ffe869cac31..58d52f2b6271 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include int acpi_disabled; @@ -368,6 +369,80 @@ int __init acpi_boot_init(void) return 0; } +#ifdef CONFIG_ACPI_NUMA + +static __init int setup_node(int pxm) +{ + return acpi_map_pxm_to_node(pxm); +} + +/* + * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for + * I/O localities since SRAT does not list them. I/O localities are + * not supported at this point. + */ +unsigned int numa_distance_cnt; + +static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit) +{ + return slit->locality_count; +} + +void __init numa_set_distance(int from, int to, int distance) +{ + if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + from, to, distance); + return; + } + + node_distances[from][to] = distance; +} + +/* Callback for Proximity Domain -> CPUID mapping */ +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ + int pxm, node; + + if (srat_disabled()) + return; + if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) { + bad_srat(); + return; + } + if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0) + return; + pxm = pa->proximity_domain_lo; + if (acpi_srat_revision >= 2) { + pxm |= (pa->proximity_domain_hi[0] << 8); + pxm |= (pa->proximity_domain_hi[1] << 16); + pxm |= (pa->proximity_domain_hi[2] << 24); + } + node = setup_node(pxm); + if (node < 0) { + printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); + bad_srat(); + return; + } + + if (pa->apic_id >= CONFIG_NR_CPUS) { + printk(KERN_INFO "SRAT: PXM %u -> CPU 0x%02x -> Node %u skipped apicid that is too big\n", + pxm, pa->apic_id, node); + return; + } + + early_numa_add_cpu(pa->apic_id, node); + + set_cpuid_to_node(pa->apic_id, node); + node_set(node, numa_nodes_parsed); + printk(KERN_INFO "SRAT: PXM %u -> CPU 0x%02x -> Node %u\n", + pxm, pa->apic_id, node); +} + +void __init acpi_numa_arch_fixup(void) {} +#endif + void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) { memblock_reserve(addr, size); @@ -377,6 +452,22 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) #include +static int __ref acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +{ +#ifdef CONFIG_ACPI_NUMA + int nid; + + nid = acpi_get_node(handle); + if (nid != NUMA_NO_NODE) { + set_cpuid_to_node(physid, nid); + node_set(nid, numa_nodes_parsed); + set_cpu_numa_node(cpu, nid); + cpumask_set_cpu(cpu, cpumask_of_node(nid)); + } +#endif + return 0; +} + int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) { int cpu; @@ -387,6 +478,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu return cpu; } + acpi_map_cpu2node(handle, cpu, physid); + *pcpu = cpu; return 0; @@ -395,6 +488,9 @@ EXPORT_SYMBOL(acpi_map_cpu); int acpi_unmap_cpu(int cpu) { +#ifdef CONFIG_ACPI_NUMA + set_cpuid_to_node(cpu_logical_map(cpu), NUMA_NO_NODE); +#endif set_cpu_present(cpu, false); num_processors--; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index bbe7c763fbfb..5e8a5b7cc45c 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 6d5cebfe1085..8a5c34039c52 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -103,7 +104,10 @@ static int __init early_parse_mem(char *p) return -EINVAL; } - memblock_add(start, size); + if (!IS_ENABLED(CONFIG_NUMA)) + memblock_add(start, size); + else + memblock_add_node(start, size, pa_to_nid(start)); return 0; } @@ -144,7 +148,7 @@ static void __init arch_mem_init(char **cmdline_p) sparse_init(); memblock_set_bottom_up(true); - swiotlb_init(1); + plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 39b7967795e5..9e6e41944735 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -211,14 +212,36 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* Preload SMP state for boot cpu */ void smp_prepare_boot_cpu(void) { - unsigned int cpu; + unsigned int cpu, node, rr_node; set_cpu_possible(0, true); set_cpu_online(0, true); set_my_cpu_offset(per_cpu_offset(0)); - for_each_possible_cpu(cpu) - set_cpu_numa_node(cpu, 0); + rr_node = first_node(node_online_map); + for_each_possible_cpu(cpu) { + node = early_cpu_to_node(cpu); + + /* + * The mapping between present cpus and nodes has been + * built during MADT and SRAT parsing. + * + * If possible cpus = present cpus here, early_cpu_to_node + * will return valid node. + * + * If possible cpus > present cpus here (e.g. some possible + * cpus will be added by cpu-hotplug later), for possible but + * not present cpus, early_cpu_to_node will return NUMA_NO_NODE, + * and we just map them to online nodes in round-robin way. + * Once hotplugged, new correct mapping will be built for them. + * */ + if (node != NUMA_NO_NODE) + set_cpu_numa_node(cpu, node); + else { + set_cpu_numa_node(cpu, rr_node); + rr_node = next_node_in(rr_node, node_online_map); + } + } } int __cpu_up(unsigned int cpu, struct task_struct *tidle) diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index ab1a75c0b5a6..ae5f0e5414a3 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -37,6 +37,11 @@ static int __init topology_init(void) { int i, ret; +#ifdef CONFIG_NUMA + for_each_online_node(i) + register_one_node(i); +#endif /* CONFIG_NUMA */ + for_each_present_cpu(i) { struct cpu *c = &per_cpu(cpu_devices, i); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index ca6535e49bc6..334a682fb1b0 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -644,7 +644,7 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp) irqentry_exit(regs, state); } -extern void tlb_init(void); +extern void tlb_init(int cpu); extern void cache_error_setup(void); unsigned long eentry; @@ -685,7 +685,7 @@ void per_cpu_trap_init(int cpu) for (i = 0; i < 64; i++) set_handler(i * VECSIZE, handle_reserved, VECSIZE); - tlb_init(); + tlb_init(cpu); cpu_cache_init(); } diff --git a/arch/loongarch/loongson64/Makefile b/arch/loongarch/loongson64/Makefile index 134ebebaf5fb..3e54460d8d00 100644 --- a/arch/loongarch/loongson64/Makefile +++ b/arch/loongarch/loongson64/Makefile @@ -2,6 +2,8 @@ # All Loongson based systems # -obj-y += setup.o init.o env.o reset.o irq.o mem.o rtc.o +obj-y += setup.o init.o env.o reset.o irq.o mem.o dma.o rtc.o obj-$(CONFIG_SMP) += smp.o + +obj-$(CONFIG_NUMA) += numa.o diff --git a/arch/loongarch/loongson64/dma.c b/arch/loongarch/loongson64/dma.c new file mode 100644 index 000000000000..1743f3088de2 --- /dev/null +++ b/arch/loongarch/loongson64/dma.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int node_id_offset; + +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + /* We extract 4bit node id (bit 44~47) from Loongson-3's + * 48bit address space and embed it into 40bit */ + long nid = (paddr >> 44) & 0xf; + return ((nid << 44) ^ paddr) | (nid << node_id_offset); +} + +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + /* We extract 4bit node id (bit 44~47) from Loongson-3's + * 48bit address space and embed it into 40bit */ + long nid = (daddr >> node_id_offset) & 0xf; + return ((nid << node_id_offset) ^ daddr) | (nid << 44); +} + +void __init plat_swiotlb_setup(void) +{ + swiotlb_init(1); + node_id_offset = ((readl(LS7A_DMA_CFG) & LS7A_DMA_NODE_MASK) >> LS7A_DMA_NODE_SHF) + 36; +} diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index cabb6049fc59..6221d687d3ae 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -123,6 +123,9 @@ void __init platform_init(void) acpi_boot_init(); #endif +#ifdef CONFIG_NUMA + init_numa_memory(); +#endif dmi_setup(); smbios_parse(); pr_info("The BIOS Version: %s\n",b_info.bios_version); diff --git a/arch/loongarch/loongson64/irq.c b/arch/loongarch/loongson64/irq.c index f360bad20e4d..ac1294652f36 100644 --- a/arch/loongarch/loongson64/irq.c +++ b/arch/loongarch/loongson64/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c new file mode 100644 index 000000000000..9f951d32f137 --- /dev/null +++ b/arch/loongarch/loongson64/numa.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Xiang Gao + * Huacai Chen + * + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int numa_off; +struct pglist_data *node_data[MAX_NUMNODES]; +unsigned char node_distances[MAX_NUMNODES][MAX_NUMNODES]; + +EXPORT_SYMBOL(node_data); +EXPORT_SYMBOL(node_distances); + +static struct numa_meminfo numa_meminfo; +cpumask_t cpus_on_node[MAX_NUMNODES]; +cpumask_t phys_cpus_on_node[MAX_NUMNODES]; +EXPORT_SYMBOL(cpus_on_node); + +/* + * apicid, cpu, node mappings + */ +s16 __cpuid_to_node[CONFIG_NR_CPUS] = { + [0 ... CONFIG_NR_CPUS - 1] = NUMA_NO_NODE +}; +EXPORT_SYMBOL(__cpuid_to_node); + +nodemask_t numa_nodes_parsed __initdata; + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + if (early_cpu_to_node(from) == early_cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; +} + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, + size_t align) +{ + return memblock_alloc_try_nid(size, align, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, early_cpu_to_node(cpu)); +} + +static void __init pcpu_fc_free(void *ptr, size_t size) +{ + memblock_free_early(__pa(ptr), size); +} + +static void __init pcpu_populate_pte(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud; + pmd_t *pmd; + + if (p4d_none(*p4d)) { + pud_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); +#endif + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); +#endif + } + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc = -EINVAL; + + if (pcpu_chosen_fc == PCPU_FC_AUTO) { + if (nr_node_ids >= 8) + pcpu_chosen_fc = PCPU_FC_PAGE; + else + pcpu_chosen_fc = PCPU_FC_EMBED; + } + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + if (pcpu_chosen_fc != PCPU_FC_PAGE) { + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PMD_SIZE, + pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + pr_warn("%s allocator failed (%d), falling back to page size\n", + pcpu_fc_names[pcpu_chosen_fc], rc); + } + if (rc < 0) + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, + pcpu_fc_alloc, pcpu_fc_free, + pcpu_populate_pte); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + +/* + * Get nodeid by logical cpu number. + * __cpuid_to_node maps phyical cpu id to node, so we + * should use cpu_logical_map(cpu) to index it. + * + * This routine is only used in early phase during + * booting, after setup_per_cpu_areas calling and numa_node + * initialization, cpu_to_node will be used instead. + * */ +int early_cpu_to_node(int cpu) +{ + int physid = cpu_logical_map(cpu); + + if (physid < 0) + return NUMA_NO_NODE; + + return __cpuid_to_node[physid]; +} + +void __init early_numa_add_cpu(int cpuid, s16 node) +{ + int cpu = __cpu_number_map[cpuid]; + + if (cpu < 0) + return; + + cpumask_set_cpu(cpu, &cpus_on_node[node]); + cpumask_set_cpu(cpuid, &phys_cpus_on_node[node]); +} + +void numa_add_cpu(unsigned int cpu) +{ + int nid = cpu_to_node(cpu); + cpumask_set_cpu(cpu, &cpus_on_node[nid]); +} + +void numa_remove_cpu(unsigned int cpu) +{ + int nid = cpu_to_node(cpu); + cpumask_clear_cpu(cpu, &cpus_on_node[nid]); +} + +static int __init numa_add_memblk_to(int nid, u64 start, u64 end, + struct numa_meminfo *mi) +{ + /* ignore zero length blks */ + if (start == end) + return 0; + + /* whine about and ignore invalid blks */ + if (start > end || nid < 0 || nid >= MAX_NUMNODES) { + pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); + return 0; + } + + if (mi->nr_blks >= NR_NODE_MEMBLKS) { + pr_err("NUMA: too many memblk ranges\n"); + return -EINVAL; + } + + mi->blk[mi->nr_blks].start = PFN_ALIGN(start); + mi->blk[mi->nr_blks].end = PFN_ALIGN(end - PAGE_SIZE + 1); + mi->blk[mi->nr_blks].nid = nid; + mi->nr_blks++; + return 0; +} + +/** + * numa_add_memblk - Add one numa_memblk to numa_meminfo + * @nid: NUMA node ID of the new memblk + * @start: Start address of the new memblk + * @end: End address of the new memblk + * + * Add a new memblk to the default numa_meminfo. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init numa_add_memblk(int nid, u64 start, u64 end) +{ + return numa_add_memblk_to(nid, start, end, &numa_meminfo); +} + +static void __init alloc_node_data(int nid) +{ + void *nd; + unsigned long nd_pa; + size_t nd_sz = roundup(sizeof(pg_data_t), PAGE_SIZE); + + nd_pa = memblock_phys_alloc_try_nid(nd_sz, SMP_CACHE_BYTES, nid); + if (!nd_pa) { + pr_err("Cannot find %zu Byte for node_data (initial node: %d)\n", nd_sz, nid); + return; + } + + nd = __va(nd_pa); + + node_data[nid] = nd; + memset(nd, 0, sizeof(pg_data_t)); +} + +static void __init node_mem_init(unsigned int node) +{ + unsigned long start_pfn, end_pfn; + unsigned long node_addrspace_offset; + + node_addrspace_offset = nid_to_addrbase(node); + pr_info("Node%d's addrspace_offset is 0x%lx\n", + node, node_addrspace_offset); + + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n", + node, start_pfn, end_pfn); + + alloc_node_data(node); +} + +#ifdef CONFIG_ACPI_NUMA + +/* + * Sanity check to catch more bad NUMA configurations (they are amazingly + * common). Make sure the nodes cover all memory. + */ +static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) +{ + u64 numaram, biosram; + int i; + + numaram = 0; + for (i = 0; i < mi->nr_blks; i++) { + u64 s = mi->blk[i].start >> PAGE_SHIFT; + u64 e = mi->blk[i].end >> PAGE_SHIFT; + numaram += e - s; + numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); + if ((s64)numaram < 0) + numaram = 0; + } + max_pfn = max_low_pfn; + biosram = max_pfn - absent_pages_in_range(0, max_pfn); + + BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT))); + return true; +} + +static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type) +{ + static unsigned long num_physpages; + + num_physpages += (size >> PAGE_SHIFT); + pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + node, type, start, size); + pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", + start >> PAGE_SHIFT, (start + size) >> PAGE_SHIFT, num_physpages); + memblock_set_node(start, size, &memblock.memory, node); +} + +/* + * add_numamem_region + * + * Add a uasable memory region described by BIOS. The + * routine gets each intersection between BIOS's region + * and node's region, and adds them into node's memblock + * pool. + * + */ +static void __init add_numamem_region(u64 start, u64 end, u32 type) +{ + u32 i; + u64 ofs = start; + + if (start >= end) { + pr_debug("Invalid region: %016llx-%016llx\n", start, end); + return; + } + + for (i = 0; i < numa_meminfo.nr_blks; i++) { + struct numa_memblk *mb = &numa_meminfo.blk[i]; + + if (ofs > mb->end) + continue; + + if (end > mb->end) { + add_node_intersection(mb->nid, ofs, mb->end - ofs, type); + ofs = mb->end; + } else { + add_node_intersection(mb->nid, ofs, end - ofs, type); + break; + } + } +} + +static void __init init_node_memblock(void) +{ + u32 mem_type; + u64 mem_end, mem_start, mem_size; + efi_memory_desc_t *md; + + /* Parse memory information and activate */ + for_each_efi_memory_desc(md) { + mem_type = md->type; + mem_start = md->phys_addr; + mem_size = md->num_pages << EFI_PAGE_SHIFT; + mem_end = mem_start + mem_size; + + switch (mem_type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_PERSISTENT_MEMORY: + case EFI_CONVENTIONAL_MEMORY: + add_numamem_region(mem_start, mem_end, mem_type); + break; + case EFI_PAL_CODE: + case EFI_UNUSABLE_MEMORY: + case EFI_ACPI_RECLAIM_MEMORY: + add_numamem_region(mem_start, mem_end, mem_type); + fallthrough; + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + pr_info("Resvd: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx Bytes\n", + mem_type, mem_start, mem_size); + break; + } + } +} + +static void __init numa_default_distance(void) +{ + int row, col; + + for (row = 0; row < MAX_NUMNODES; row++) + for (col = 0; col < MAX_NUMNODES; col++) { + if (col == row) + node_distances[row][col] = LOCAL_DISTANCE; + else + /* We assume that one node per package here! + * + * A SLIT should be used for multiple nodes + * per package to override default setting. + */ + node_distances[row][col] = REMOTE_DISTANCE; + } +} + +int __init init_numa_memory(void) +{ + int i; + int ret; + int node; + + for (i = 0; i < NR_CPUS; i++) + set_cpuid_to_node(i, NUMA_NO_NODE); + + numa_default_distance(); + nodes_clear(numa_nodes_parsed); + nodes_clear(node_possible_map); + nodes_clear(node_online_map); + memset(&numa_meminfo, 0, sizeof(numa_meminfo)); + + /* Parse SRAT and SLIT if provided by firmware. */ + ret = acpi_numa_init(); + if (ret < 0) + return ret; + + node_possible_map = numa_nodes_parsed; + if (WARN_ON(nodes_empty(node_possible_map))) + return -EINVAL; + + init_node_memblock(); + if (numa_meminfo_cover_memory(&numa_meminfo) == false) + return -EINVAL; + + for_each_node_mask(node, node_possible_map) { + node_mem_init(node); + node_set_online(node); + } + max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + + setup_nr_node_ids(); + loongson_sysconf.nr_nodes = nr_node_ids; + loongson_sysconf.cores_per_node = cpumask_weight(&phys_cpus_on_node[0]); + + return 0; +} + +EXPORT_SYMBOL(init_numa_memory); +#endif + +void __init paging_init(void) +{ + unsigned int node; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; + + for_each_online_node(node) { + unsigned long start_pfn, end_pfn; + + get_pfn_range_for_nid(node, &start_pfn, &end_pfn); + + if (end_pfn > max_low_pfn) + max_low_pfn = end_pfn; + } +#ifdef CONFIG_ZONE_DMA32 + zones_size[ZONE_DMA32] = MAX_DMA32_PFN; +#endif + zones_size[ZONE_NORMAL] = max_low_pfn; + free_area_init(zones_size); +} + +void __init mem_init(void) +{ + high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + memblock_free_all(); + setup_zero_pages(); /* This comes from node 0 */ + mem_init_print_info(NULL); +} + +int pcibus_to_node(struct pci_bus *bus) +{ + return dev_to_node(&bus->dev); +} +EXPORT_SYMBOL(pcibus_to_node); diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 8ee38cced428..9ebd87a54435 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -215,6 +215,9 @@ static void loongson3_init_secondary(void) else xconf_writel(0xffffffff, ipi_en_regs[cpu_logical_map(cpu)]); +#ifdef CONFIG_NUMA + numa_add_cpu(cpu); +#endif per_cpu(cpu_state, cpu) = CPU_ONLINE; cpu_data[cpu].core = cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; @@ -318,6 +321,9 @@ static int loongson3_cpu_disable(void) if (io_master(cpu)) return -EBUSY; +#ifdef CONFIG_NUMA + numa_remove_cpu(cpu); +#endif set_cpu_online(cpu, false); calculate_cpu_foreign_map(); local_irq_save(flags); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 438aba59ea78..e41c9685af36 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -86,6 +86,7 @@ int __ref page_is_ram(unsigned long pfn) return memblock_is_memory(addr) && !memblock_is_reserved(addr); } +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -110,6 +111,7 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ mem_init_print_info(NULL); } +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __ref free_initmem(void) { @@ -132,6 +134,17 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) return ret; } +#ifdef CONFIG_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + int nid; + + nid = pa_to_nid(start); + return nid; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 12268fb4b55a..b5db16c9bade 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -249,15 +249,18 @@ static void output_pgtable_bits_defines(void) pr_debug("\n"); } -void setup_tlb_handler(void) -{ - static int run_once = 0; +#ifdef CONFIG_NUMA +static unsigned long pcpu_handlers[NR_CPUS]; +#endif +extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; +void setup_tlb_handler(int cpu) +{ setup_ptwalker(); output_pgtable_bits_defines(); /* The tlb handlers are generated only once */ - if (!run_once) { + if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); @@ -267,15 +270,35 @@ void setup_tlb_handler(void) set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); - run_once++; } +#ifdef CONFIG_NUMA + else { + void *addr; + struct page *page; + const int vec_sz = sizeof(exception_handlers); + + if (pcpu_handlers[cpu]) + return; + + page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz)); + if (!page) + return; + + addr = page_address(page); + pcpu_handlers[cpu] = virt_to_phys(addr); + memcpy((void *)addr, (void *)eentry, vec_sz); + local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY); + csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); + } +#endif } -void tlb_init(void) +void tlb_init(int cpu) { write_csr_pagesize(PS_DEFAULT_SIZE); write_csr_stlbpgsize(PS_DEFAULT_SIZE); write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE); - setup_tlb_handler(); + setup_tlb_handler(cpu); local_flush_tlb_all(); } diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index f692017fec46..0aba5e9efa2a 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -28,8 +29,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) { struct pci_config_window *cfg = bridge->bus->sysdata; struct acpi_device *adev = to_acpi_device(cfg->parent); + struct device *bus_dev = &bridge->bus->dev; ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, pa_to_nid(cfg->res.start)); return 0; } -- Gitee From de572e072067eab9dd0004077bf2fb94c82dabb4 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 13 Jun 2022 18:54:12 +0800 Subject: [PATCH 038/241] LoongArch: vmlinux.lds.S: Add missing ELF_DETAILS Commit c604abc3f6e ("vmlinux.lds.h: Split ELF_DETAILS from STABS_DEBUG") split ELF_DETAILS from STABS_DEBUG, resulting in missing ELF_DETAILS information in LoongArch architecture, so add it. Change-Id: Id068824164b69eb09f4be90e60799b930f1bde77 Fixes: c604abc3f6e ("vmlinux.lds.h: Split ELF_DETAILS from STABS_DEBUG") Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 6348a28aac5a..ac13bc28754f 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -96,6 +96,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + ELF_DETAILS /* These must appear regardless of . */ .gptab.sdata : { -- Gitee From 1b617a9cf77897b7274b43ad356f5bfb3e93a214 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 22 Jun 2022 16:48:02 +0800 Subject: [PATCH 039/241] LoongArch: Fix sleeping in atomic context in setup_tlb_handler() Since setup_tlb_handler() is executed in atomic context, we should use GFP_ATOMIC instead of GFP_KERNEL to alloc pages. Otherwise we will get a "sleeping in atomic context" error: [ 0.013118] BUG: sleeping function called from invalid context at mm/page_alloc.c:5158 [ 0.013126] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1 [ 0.013131] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.19-rc3+ #1008 1a223086d14d07967cc427f15d52139422271360 [ 0.013136] Hardware name: Loongson Loongson-3A5000-7A1000-1w-V0.1-CRB/Loongson-LS3A5000-7A1000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V2.0.04082-beta7 04/27 [ 0.013140] Stack : 90000000015fc990 9000000100493c18 9000000000df3370 9000000100490000 [ 0.013151] 9000000100493b50 0000000000000000 9000000100493b58 9000000001417ef0 [ 0.013160] 900000000199e54e 0000000000000040 9000000100493c18 90000000015f7a98 [ 0.013168] ffffffffffffffff 6de72f8b42179d1e 9000000100403b80 90000000015f7890 [ 0.013176] 0000000000000001 00000000fffff175 9000000000eb9860 9000000001530b4b [ 0.013184] 9000000000e99e60 0000000000000013 0000000006ecc000 0000000000000001 [ 0.013193] 90000000015f7a98 9000000001417ef0 0000000000000004 0000000000000000 [ 0.013201] 0000000000000cc0 0000000000000000 0000000000000001 90000000015fc990 [ 0.013209] 9000000000217e74 9000000001603b6b 9000000000208640 0000000000000000 [ 0.013217] 00000000000000b0 0000000000000004 0000000000000000 0000000000070000 [ 0.013225] ... [ 0.013229] Call Trace: [ 0.013230] [<9000000000208640>] show_stack+0x4c/0x14c [ 0.013240] [<9000000000df3370>] dump_stack_lvl+0x70/0xac [ 0.013246] [<9000000000270c8c>] ___might_sleep+0x104/0x124 [ 0.013253] [<9000000000477e84>] __alloc_pages+0x240/0x464 [ 0.013260] [<9000000000214214>] setup_tlb_handler+0x104/0x1e8 [ 0.013265] [<9000000000214324>] tlb_init+0x2c/0x3c [ 0.013270] [<9000000000208b74>] per_cpu_trap_init+0xec/0x108 [ 0.013275] [<9000000000202850>] cpu_probe+0x400/0x8a4 [ 0.013279] [<900000000020d160>] start_secondary+0x5c/0x3d4 Change-Id: Ia5313aac3577abea8c055f13b40958a2c39f8617 Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index b5db16c9bade..278be6228646 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -280,7 +280,7 @@ void setup_tlb_handler(int cpu) if (pcpu_handlers[cpu]) return; - page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz)); + page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz)); if (!page) return; -- Gitee From 6d071fa470b9076381d86171ec1416c384a119d6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 23 Jun 2022 16:11:54 +0800 Subject: [PATCH 040/241] LoongArch: Fix EENTRY/MERRENTRY setting in setup_tlb_handler() setup_tlb_handler() is expected to set per-cpu exception handlers, but it only set the TLBRENTRY successfully because of copy & paste errors, so fix it. Change-Id: I997fd577aa53fd6c2eb90c664fea2623197e7999 Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 278be6228646..7c027531ea70 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -285,10 +285,11 @@ void setup_tlb_handler(int cpu) return; addr = page_address(page); - pcpu_handlers[cpu] = virt_to_phys(addr); + pcpu_handlers[cpu] = (unsigned long)addr; memcpy((void *)addr, (void *)eentry, vec_sz); local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz); - csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); + csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); } #endif -- Gitee From 988d68adbc45ea18ba4de960413690f0ed766eb1 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 18 Jun 2022 12:50:31 +0800 Subject: [PATCH 041/241] LoongArch: Fix wrong fpu version According to the configuration information accessible by the CPUCFG instruction in LoongArch Reference Manual [1], FP_ver is stored in bit [5: 3] of CPUCFG2, the current code to get fpu version is wrong, use CPUCFG2_FPVERS to fix it. [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html Change-Id: If309939e6b2f5c26bece927790c864a651e1d331 Fixes: 628c3bb40e9a ("LoongArch: Add boot and setup routines") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index ee078ee003ce..30e73b6f6014 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -263,7 +263,7 @@ void cpu_probe(void) c->cputype = CPU_UNKNOWN; c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0); - c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3; + c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3; c->fpu_csr0 = FPU_CSR_RN; c->fpu_mask = FPU_CSR_RSVD; -- Gitee From 0e0c56951d424ed7f41db8df5062932643e047c6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 27 Jun 2022 14:57:35 +0800 Subject: [PATCH 042/241] LoongArch: Fix section mismatch warning init_numa_memory() is annotated __init and not used by any module, thus don't export it. Remove not needed EXPORT_SYMBOL for init_numa_memory() to fix the following section mismatch warning: MODPOST vmlinux.symvers WARNING: modpost: vmlinux.o(___ksymtab+init_numa_memory+0x0): Section mismatch in reference from the variable __ksymtab_init_numa_memory to the function .init.text:init_numa_memory() The symbol init_numa_memory is exported and annotated __init Fix this by removing the __init annotation of init_numa_memory or drop the export. This is build on Linux 5.19-rc4. Change-Id: I8650d400dc196d509a3cadea89116d1557e952ff Fixes: d4b6f1562a3c ("LoongArch: Add Non-Uniform Memory Access (NUMA) support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/numa.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c index 9f951d32f137..d875aeded754 100644 --- a/arch/loongarch/loongson64/numa.c +++ b/arch/loongarch/loongson64/numa.c @@ -438,7 +438,6 @@ int __init init_numa_memory(void) return 0; } -EXPORT_SYMBOL(init_numa_memory); #endif void __init paging_init(void) -- Gitee From 8094c2cc374ae1c0962ffc5bcf8fc528a941ecfa Mon Sep 17 00:00:00 2001 From: Qi Hu Date: Wed, 6 Jul 2022 19:29:37 +0800 Subject: [PATCH 043/241] LoongArch: Remove obsolete mentions of vcsr The `vcsr` only exists in the old hardware design, it isn't used in any shipped hardware from Loongson-3A5000 on. Both scalar FP and LSX/LASX instructions use the `fcsr` as their control and status registers now. For example, the RM control bit in fcsr0 is shared by FP, LSX and LASX instructions. Particularly, fcsr16 to fcsr31 are reserved for LSX/LASX now, access to these registers has no visible effect if LSX/LASX is enabled, and will cause SXD/ASXD exceptions if LSX/LASX is not enabled. So, mentions of vcsr are obsolete in the first place (it was just used for debugging), let's remove them. Change-Id: Id42d1a61de0a6819c160d13fe5c604f9ddb00d08 Reviewed-by: WANG Xuerui Signed-off-by: Qi Hu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/fpregdef.h | 1 - arch/loongarch/include/asm/processor.h | 2 -- arch/loongarch/kernel/asm-offsets.c | 1 - arch/loongarch/kernel/fpu.S | 10 ---------- 4 files changed, 14 deletions(-) diff --git a/arch/loongarch/include/asm/fpregdef.h b/arch/loongarch/include/asm/fpregdef.h index caf528ae6428..a7324cdcd755 100644 --- a/arch/loongarch/include/asm/fpregdef.h +++ b/arch/loongarch/include/asm/fpregdef.h @@ -52,6 +52,5 @@ #define fcsr1 $r1 #define fcsr2 $r2 #define fcsr3 $r3 -#define vcsr16 $r16 #endif /* _ASM_FPREGDEF_H */ diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 708af5acd506..d59db0f18c9f 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -80,7 +80,6 @@ BUILD_FPR_ACCESS(64) struct loongarch_fpu { unsigned int fcsr; - unsigned int vcsr; uint64_t fcc; /* 8x8 */ union fpureg fpr[NUM_FPU_REGS]; }; @@ -161,7 +160,6 @@ struct thread_struct { */ \ .fpu = { \ .fcsr = 0, \ - .vcsr = 0, \ .fcc = 0, \ .fpr = {{{0,},},}, \ }, \ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index afe584bdfd88..1945ff69c232 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -166,7 +166,6 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); - OFFSET(THREAD_VCSR, loongarch_fpu, vcsr); BLANK(); } diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 1ecf418bcefc..a7240e22e0c7 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -149,16 +149,6 @@ movgr2fcsr fcsr0, \tmp0 .endm - .macro sc_save_vcsr base, tmp0 - movfcsr2gr \tmp0, vcsr16 - EX st.w \tmp0, \base, 0 - .endm - - .macro sc_restore_vcsr base, tmp0 - EX ld.w \tmp0, \base, 0 - movgr2fcsr vcsr16, \tmp0 - .endm - /* * Save a thread's fp context. */ -- Gitee From 0da54512f4a4c4b000a975deb65f618cbd6e6e87 Mon Sep 17 00:00:00 2001 From: Jun Yi Date: Thu, 21 Jul 2022 19:10:49 +0800 Subject: [PATCH 044/241] LoongArch: Remove useless header compiler.h The content of LoongArch's compiler.h is trivial, with some unused anywhere, so inline the definitions and remove the header. Change-Id: I72ee7e80c3eae22c0bcf65d7d1f026fb3f9a063d Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 - arch/loongarch/include/asm/atomic.h | 13 ++++--------- arch/loongarch/include/asm/compiler.h | 19 ------------------- arch/loongarch/include/asm/futex.h | 5 ++--- arch/loongarch/include/asm/irqflags.h | 1 - arch/loongarch/include/asm/local.h | 1 - arch/loongarch/kernel/cmpxchg.c | 8 ++++---- arch/loongarch/kernel/entry.S | 1 - arch/loongarch/kernel/reset.c | 1 - arch/loongarch/lib/delay.c | 1 - 10 files changed, 10 insertions(+), 41 deletions(-) delete mode 100644 arch/loongarch/include/asm/compiler.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 27b32771dfab..436e318e09a3 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -66,7 +66,6 @@ config LOONGARCH select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL - select HAVE_ARCH_COMPILER_H select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 8f510c90c1e1..8927561c9a8d 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -17,7 +17,6 @@ #include #include -#include #include #include @@ -172,8 +171,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " beq $zero, %1, 1b \n" "2: \n" __WEAK_LLSC_MB - : "=&r" (result), "=&r" (temp), - "+" GCC_OFF_SMALL_ASM() (v->counter) + : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "I" (-i)); } else { __asm__ __volatile__( @@ -185,8 +183,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " beq $zero, %1, 1b \n" "2: \n" __WEAK_LLSC_MB - : "=&r" (result), "=&r" (temp), - "+" GCC_OFF_SMALL_ASM() (v->counter) + : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "r" (i)); } @@ -336,8 +333,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " beq %1, $zero, 1b \n" "2: \n" __WEAK_LLSC_MB - : "=&r" (result), "=&r" (temp), - "+" GCC_OFF_SMALL_ASM() (v->counter) + : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "I" (-i)); } else { __asm__ __volatile__( @@ -349,8 +345,7 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " beq %1, $zero, 1b \n" "2: \n" __WEAK_LLSC_MB - : "=&r" (result), "=&r" (temp), - "+" GCC_OFF_SMALL_ASM() (v->counter) + : "=&r" (result), "=&r" (temp), "+ZC" (v->counter) : "r" (i)); } diff --git a/arch/loongarch/include/asm/compiler.h b/arch/loongarch/include/asm/compiler.h deleted file mode 100644 index 2dc4d84b2a32..000000000000 --- a/arch/loongarch/include/asm/compiler.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020 Loongson Technology Co., Ltd. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#ifndef _ASM_COMPILER_H -#define _ASM_COMPILER_H - -#define GCC_OFF_SMALL_ASM() "ZC" - -#define LOONGARCH_ISA_LEVEL "loongarch" -#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch" -#define LOONGARCH_ISA_LEVEL_RAW loongarch -#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW - -#endif /* _ASM_COMPILER_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index 04e2893cf77d..d45f0b36dd7c 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ @@ -95,8 +94,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv " "__UA_ADDR "\t1b, 4b \n" " "__UA_ADDR "\t2b, 4b \n" " .previous \n" - : "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr) - : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), + : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) + : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory", "t0"); diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h index 8811bdf884e7..048d5a30ac29 100644 --- a/arch/loongarch/include/asm/irqflags.h +++ b/arch/loongarch/include/asm/irqflags.h @@ -9,7 +9,6 @@ #include #include -#include #include static inline void arch_local_irq_enable(void) diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h index 7eeff36450d1..769c809ea771 100644 --- a/arch/loongarch/include/asm/local.h +++ b/arch/loongarch/include/asm/local.h @@ -13,7 +13,6 @@ #include #include #include -#include typedef struct { diff --git a/arch/loongarch/kernel/cmpxchg.c b/arch/loongarch/kernel/cmpxchg.c index fc0bbb231ccc..e58c4c69a0bf 100644 --- a/arch/loongarch/kernel/cmpxchg.c +++ b/arch/loongarch/kernel/cmpxchg.c @@ -45,8 +45,8 @@ unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int s " or %1, %1, %5 \n" " sc.w %1, %2 \n" " beqz %1, 1b \n" - : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) - : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (mask), "Jr" (val << shift) + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (val << shift) : "memory"); return (old32 & mask) >> shift; @@ -96,8 +96,8 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, "2: \n" __WEAK_LLSC_MB "3: \n" - : "=&r" (old32), "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*ptr32) - : GCC_OFF_SMALL_ASM() (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) + : "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32) + : "ZC" (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new) : "memory"); return (old32 & mask) >> shift; diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 1803840381d3..69d927a138d1 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index d22de0669b4b..c98dbfafe1b3 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -10,7 +10,6 @@ #include #include -#include #include #include #include diff --git a/arch/loongarch/lib/delay.c b/arch/loongarch/lib/delay.c index 4a3c209bb335..208f274b9dba 100644 --- a/arch/loongarch/lib/delay.c +++ b/arch/loongarch/lib/delay.c @@ -7,7 +7,6 @@ #include #include -#include #include void __delay(unsigned long cycles) -- Gitee From 9fc38a80406f27531170293e9c117a3c59010263 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Jul 2022 15:21:51 +0800 Subject: [PATCH 045/241] LoongArch: Remove clock setting during cpu hotplug stage On physical machine we can save power by disabling clock of hot removed cpu. However as different platforms require different methods to configure clocks, the code is platform-specific, and probably belongs to firmware/pmu or cpu regulator, rather than generic arch/loongarch code. Also, there is no such register on QEMU virt machine since the clock/frequency regulation is not emulated. This patch removes the hard-coded clock register accesses in generic LoongArch cpu hotplug flow. Change-Id: Icfe8eaa431f191a71c28e6f2c326b872e614f5c0 Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/smp.c | 94 ++------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 4 insertions(+), 91 deletions(-) diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 9ebd87a54435..d8762e8ed215 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -344,62 +344,18 @@ static void loongson3_cpu_die(unsigned int cpu) mb(); } -/* To shutdown a core in Loongson 3, the target core should go to XKPRANGE - * and flush all L1 entries at first. Then, another core (usually Core 0) - * can safely disable the clock of the target core. loongson3_play_dead() - * is called via XKPRANGE (uncached and unmmaped) */ -static void loongson3_play_dead(int *state_addr) +void play_dead(void) { - register int val; register long cpuid, core, node, count; register void *addr, *base; register void (*init_fn)(void); - __asm__ __volatile__( - " li.d %[addr], 0x8000000000000000\n" - "1: cacop 0x8, %[addr], 0 \n" /* flush L1 ICache */ - " cacop 0x8, %[addr], 1 \n" - " cacop 0x8, %[addr], 2 \n" - " cacop 0x8, %[addr], 3 \n" - " cacop 0x9, %[addr], 0 \n" /* flush L1 DCache */ - " cacop 0x9, %[addr], 1 \n" - " cacop 0x9, %[addr], 2 \n" - " cacop 0x9, %[addr], 3 \n" - " addi.w %[sets], %[sets], -1 \n" - " addi.d %[addr], %[addr], 0x40 \n" - " bnez %[sets], 1b \n" - " li.d %[addr], 0x8000000000000000\n" - "2: cacop 0xa, %[addr], 0 \n" /* flush L1 VCache */ - " cacop 0xa, %[addr], 1 \n" - " cacop 0xa, %[addr], 2 \n" - " cacop 0xa, %[addr], 3 \n" - " cacop 0xa, %[addr], 4 \n" - " cacop 0xa, %[addr], 5 \n" - " cacop 0xa, %[addr], 6 \n" - " cacop 0xa, %[addr], 7 \n" - " cacop 0xa, %[addr], 8 \n" - " cacop 0xa, %[addr], 9 \n" - " cacop 0xa, %[addr], 10 \n" - " cacop 0xa, %[addr], 11 \n" - " cacop 0xa, %[addr], 12 \n" - " cacop 0xa, %[addr], 13 \n" - " cacop 0xa, %[addr], 14 \n" - " cacop 0xa, %[addr], 15 \n" - " addi.w %[vsets], %[vsets], -1 \n" - " addi.d %[addr], %[addr], 0x40 \n" - " bnez %[vsets], 2b \n" - " li.w %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */ - " st.w %[val], %[state_addr], 0 \n" - " dbar 0 \n" - " cacop 0x11, %[state_addr], 0 \n" /* flush entry of *state_addr */ - : [addr] "=&r" (addr), [val] "=&r" (val) - : [state_addr] "r" (state_addr), - [sets] "r" (cpu_data[smp_processor_id()].dcache.sets), - [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets)); - + idle_task_exit(); local_irq_enable(); change_csr_ecfg(ECFG0_IM, ECFGF_IPI); + __this_cpu_write(cpu_state, CPU_DEAD); + __smp_mb(); __asm__ __volatile__( " idle 0 \n" " csrrd %[cpuid], 0x20 \n" @@ -424,48 +380,6 @@ static void loongson3_play_dead(int *state_addr) unreachable(); } -void play_dead(void) -{ - int *state_addr; - unsigned int cpu = smp_processor_id(); - void (*play_dead_uncached)(int *); - - idle_task_exit(); - play_dead_uncached = (void *)TO_UNCACHE(__pa((unsigned long)loongson3_play_dead)); - state_addr = &per_cpu(cpu_state, cpu); - mb(); - play_dead_uncached(state_addr); -} - -static int loongson3_disable_clock(unsigned int cpu) -{ - uint64_t core_id = cpu_data[cpu].core; - uint64_t package_id = cpu_data[cpu].package; - - LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3)); - - return 0; -} - -static int loongson3_enable_clock(unsigned int cpu) -{ - uint64_t core_id = cpu_data[cpu].core; - uint64_t package_id = cpu_data[cpu].package; - - LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3); - - return 0; -} - -static int register_loongson3_notifier(void) -{ - return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE, - "loongarch/loongson:prepare", - loongson3_enable_clock, - loongson3_disable_clock); -} -early_initcall(register_loongson3_notifier); - #endif const struct plat_smp_ops loongson3_smp_ops = { diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 793f36684a79..16122da6f950 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -94,7 +94,6 @@ enum cpuhp_state { CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, - CPUHP_LOONGARCH_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, CPUHP_BRINGUP_CPU, -- Gitee From 83f52952079218ae36acf1d66347d1ab58d40a43 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 20 Jul 2022 15:21:52 +0800 Subject: [PATCH 046/241] LoongArch: Remove unused variables There are some variables never used or referenced, this patch removes these varaibles and make the code cleaner. Change-Id: Ic98c4d0368510184f5a1ea14c0820ae5fb525938 Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- .../loongarch/include/asm/mach-loongson64/loongson.h | 12 ------------ arch/loongarch/loongson64/env.c | 10 +--------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/arch/loongarch/include/asm/mach-loongson64/loongson.h b/arch/loongarch/include/asm/mach-loongson64/loongson.h index 559e0ce970fa..115f20d8ff16 100644 --- a/arch/loongarch/include/asm/mach-loongson64/loongson.h +++ b/arch/loongarch/include/asm/mach-loongson64/loongson.h @@ -39,18 +39,6 @@ extern const struct plat_smp_ops loongson3_smp_ops; #define MAX_PACKAGES 16 -/* Chip Config registor of each physical cpu package */ -extern u64 loongson_chipcfg[MAX_PACKAGES]; -#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id])) - -/* Chip Temperature registor of each physical cpu package */ -extern u64 loongson_chiptemp[MAX_PACKAGES]; -#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id])) - -/* Freq Control register of each physical cpu package */ -extern u64 loongson_freqctrl[MAX_PACKAGES]; -#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id])) - #define xconf_readl(addr) readl(addr) #define xconf_readq(addr) readq(addr) diff --git a/arch/loongarch/loongson64/env.c b/arch/loongarch/loongson64/env.c index cd1629c6ed4e..d35ac838308c 100644 --- a/arch/loongarch/loongson64/env.c +++ b/arch/loongarch/loongson64/env.c @@ -14,14 +14,10 @@ #include u64 efi_system_table; +unsigned long long smp_group[MAX_PACKAGES]; struct loongson_system_configuration loongson_sysconf; EXPORT_SYMBOL(loongson_sysconf); -u64 loongson_chipcfg[MAX_PACKAGES]; -u64 loongson_chiptemp[MAX_PACKAGES]; -u64 loongson_freqctrl[MAX_PACKAGES]; -unsigned long long smp_group[MAX_PACKAGES]; - static void __init register_addrs_set(u64 *registers, const u64 addr, int num) { u64 i; @@ -50,11 +46,7 @@ void __init init_environ(void) efi_memmap_init_early(&data); memblock_reserve(data.phys_map & PAGE_MASK, PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK))); - register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16); - register_addrs_set(loongson_chipcfg, TO_UNCACHE(0x1fe00180), 16); - register_addrs_set(loongson_chiptemp, TO_UNCACHE(0x1fe0019c), 16); - register_addrs_set(loongson_freqctrl, TO_UNCACHE(0x1fe001d0), 16); } static int __init init_cpu_fullname(void) -- Gitee From 73dd9d7b2f0b768330fd3e11687aa79afbc2aec3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 13 Jul 2022 18:00:41 +0800 Subject: [PATCH 047/241] LoongArch: Fix shared cache size calculation Current calculation of shared cache size is from the node (die) scope, but we hope 'lscpu' to show the shared cache size of the whole package for multi-die chips (e.g., Loongson-3C5000L, which contains 4 dies in one package). So fix it by multiplying nodes_per_package. Change-Id: Ib499e5c589ab80a413e8df7b58e01c0a09e0a051 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cacheinfo.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c index e4e8489b9dea..a23663ed75f7 100644 --- a/arch/loongarch/kernel/cacheinfo.c +++ b/arch/loongarch/kernel/cacheinfo.c @@ -5,7 +5,8 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ #include -#include +#include +#include /* Populates leaf and increments to next leaf */ #define populate_cache(cache, leaf, c_level, c_type) \ @@ -17,6 +18,8 @@ do { \ leaf->ways_of_associativity = c->cache.ways; \ leaf->size = c->cache.linesz * c->cache.sets * \ c->cache.ways; \ + if (leaf->level > 2) \ + leaf->size *= nodes_per_package; \ leaf++; \ } while (0) @@ -95,11 +98,15 @@ static void cache_cpumap_setup(unsigned int cpu) int populate_cache_leaves(unsigned int cpu) { - int level=1; + int level = 1, nodes_per_package = 1; struct cpuinfo_loongarch *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; + if (loongson_sysconf.nr_nodes > 1) + nodes_per_package = loongson_sysconf.cores_per_package + / loongson_sysconf.cores_per_node; + if (c->icache.waysize) { populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); -- Gitee From 765cb2580cb49441095e07c847aaa7f5bc28c1e9 Mon Sep 17 00:00:00 2001 From: Qi Hu Date: Thu, 14 Jul 2022 14:25:50 +0800 Subject: [PATCH 048/241] LoongArch: Fix missing fcsr in ptrace's fpr_set In file ptrace.c, function fpr_set does not copy fcsr data from ubuf to kbuf. That's the reason why fcsr cannot be modified by ptrace. This patch fixs this problem and allows users using ptrace to modify the fcsr. Change-Id: Ie670d583346d21e7e2b45c643dc96fad7288d941 Co-developed-by: Xu Li Signed-off-by: Qi Hu Signed-off-by: Huacai Chen --- arch/loongarch/kernel/ptrace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 0abbf43b7311..6d893db98d1d 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -187,7 +187,7 @@ static int fpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t); - const int fcc_end = fcc_start + sizeof(u64); + const int fcsr_start = fcc_start + sizeof(u64); int err; BUG_ON(count % sizeof(elf_fpreg_t)); @@ -203,10 +203,12 @@ static int fpr_set(struct task_struct *target, if (err) return err; - if (count > 0) - err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.fcc, - fcc_start, fcc_end); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcc, fcc_start, + fcc_start + sizeof(u64)); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fcsr, fcsr_start, + fcsr_start + sizeof(u32)); return err; } -- Gitee From 63bf1ba2444031cbb42de9f4fdd8b8641d1fb9cc Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 21 Jul 2022 17:53:01 +0800 Subject: [PATCH 049/241] LoongArch: Fix wrong "ROM Size" of boardinfo We can see the "ROM Size" is different in the following outputs: [root@linux loongson]# cat /sys/firmware/loongson/boardinfo BIOS Information Vendor : Loongson Version : vUDK2018-LoongArch-V2.0.pre-beta8 ROM Size : 63 KB Release Date : 06/15/2022 Board Information Manufacturer : Loongson Board Name : Loongson-LS3A5000-7A1000-1w-A2101 Family : LOONGSON64 [root@linux loongson]# dmidecode | head -11 ... Handle 0x0000, DMI type 0, 26 bytes BIOS Information Vendor: Loongson Version: vUDK2018-LoongArch-V2.0.pre-beta8 Release Date: 06/15/2022 ROM Size: 4 MB According to "BIOS Information (Type 0) structure" in the SMBIOS Reference Specification [1], it shows 64K * (n+1) is the size of the physical device containing the BIOS if the size is less than 16M. Additionally, we can see the related code in dmidecode [2]: u64 s = { .l = (code1 + 1) << 6 }; So the output of dmidecode is correct, the output of boardinfo is wrong, fix it. By the way, at present no need to consider the size is 16M or greater on LoongArch, because it is usually 4M or 8M which is enough to use. [1] https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.6.0.pdf [2] https://git.savannah.nongnu.org/cgit/dmidecode.git/tree/dmidecode.c#n347 Change-Id: Idbe1323723b6e28b951460237437e3efb52d5bd8 Fixes: 628c3bb40e9a ("LoongArch: Add boot and setup routines") Reviewed-by: WANG Xuerui Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 6221d687d3ae..66f972db2c6b 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -76,7 +76,7 @@ static void __init parse_bios_table(const struct dmi_header *dm) char *dmi_data = (char *)dm; bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET); - b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET); + b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6; if (bios_extern & LOONGSON_EFI_ENABLE) set_bit(EFI_BOOT, &efi.flags); -- Gitee From ddb92eab9cd94ea67897fdba86d355b5630d99c3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 12 Jul 2022 12:25:57 +0800 Subject: [PATCH 050/241] LoongArch: cpuinfo: Fix a warning for CONFIG_CPUMASK_OFFSTACK When CONFIG_CPUMASK_OFFSTACK and CONFIG_DEBUG_PER_CPU_MAPS is selected, cpu_max_bits_warn() generates a runtime warning similar as below while we show /proc/cpuinfo. Fix this by using nr_cpu_ids (the runtime limit) instead of NR_CPUS to iterate CPUs. [ 3.052463] ------------[ cut here ]------------ [ 3.059679] WARNING: CPU: 3 PID: 1 at include/linux/cpumask.h:108 show_cpuinfo+0x5e8/0x5f0 [ 3.070072] Modules linked in: efivarfs autofs4 [ 3.076257] CPU: 0 PID: 1 Comm: systemd Not tainted 5.19-rc5+ #1052 [ 3.084034] Hardware name: Loongson Loongson-3A5000-7A1000-1w-V0.1-CRB/Loongson-LS3A5000-7A1000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V2.0.04082-beta7 04/27 [ 3.099465] Stack : 9000000100157b08 9000000000f18530 9000000000cf846c 9000000100154000 [ 3.109127] 9000000100157a50 0000000000000000 9000000100157a58 9000000000ef7430 [ 3.118774] 90000001001578e8 0000000000000040 0000000000000020 ffffffffffffffff [ 3.128412] 0000000000aaaaaa 1ab25f00eec96a37 900000010021de80 900000000101c890 [ 3.138056] 0000000000000000 0000000000000000 0000000000000000 0000000000aaaaaa [ 3.147711] ffff8000339dc220 0000000000000001 0000000006ab4000 0000000000000000 [ 3.157364] 900000000101c998 0000000000000004 9000000000ef7430 0000000000000000 [ 3.167012] 0000000000000009 000000000000006c 0000000000000000 0000000000000000 [ 3.176641] 9000000000d3de08 9000000001639390 90000000002086d8 00007ffff0080286 [ 3.186260] 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c [ 3.195868] ... [ 3.199917] Call Trace: [ 3.203941] [<90000000002086d8>] show_stack+0x38/0x14c [ 3.210666] [<9000000000cf846c>] dump_stack_lvl+0x60/0x88 [ 3.217625] [<900000000023d268>] __warn+0xd0/0x100 [ 3.223958] [<9000000000cf3c90>] warn_slowpath_fmt+0x7c/0xcc [ 3.231150] [<9000000000210220>] show_cpuinfo+0x5e8/0x5f0 [ 3.238080] [<90000000004f578c>] seq_read_iter+0x354/0x4b4 [ 3.245098] [<90000000004c2e90>] new_sync_read+0x17c/0x1c4 [ 3.252114] [<90000000004c5174>] vfs_read+0x138/0x1d0 [ 3.258694] [<90000000004c55f8>] ksys_read+0x70/0x100 [ 3.265265] [<9000000000cfde9c>] do_syscall+0x7c/0x94 [ 3.271820] [<9000000000202fe4>] handle_syscall+0xc4/0x160 [ 3.281824] ---[ end trace 8b484262b4b8c24c ]--- Change-Id: I33aba8710cc7979a872fba1af4a740d40e08c763 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index e0b5f3b031b1..b12a1f21f864 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -106,7 +106,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) { unsigned long i = *pos; - return i < NR_CPUS ? (void *) (i + 1) : NULL; + return i < nr_cpu_ids ? (void *) (i + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) -- Gitee From 26f5f8cb18fb3ce02e3b7544c3d16f40bd3ab2b9 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Sat, 6 Aug 2022 15:19:33 +0800 Subject: [PATCH 051/241] LoongArch: Fix unsigned comparison with less than zero The return value from the call to get_timer_irq() is int, which can be a negative error code. However, the return value is being assigned to an unsigned int variable 'irq', so making 'irq' an int. Eliminate the following coccicheck warning: ./arch/loongarch/kernel/time.c:146:5-8: WARNING: Unsigned expression compared with zero: irq < 0 Change-Id: I1c776b755167555dae9afbf248979d2d348f6fd8 Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 06ad7fe7e6e1..eab3ce17781e 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -130,7 +130,7 @@ void sync_counter(void) int constant_clockevent_init(void) { - unsigned int irq; + int irq; unsigned int cpu = smp_processor_id(); unsigned long min_delta = 0x600; unsigned long max_delta = (1UL << 48) - 1; -- Gitee From d39c25f626cfd92d1cacbbac0422109d2acb4dba Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Sat, 6 Aug 2022 15:19:32 +0800 Subject: [PATCH 052/241] LoongArch: Requires __force attributes for any casts This fix a warning when "make C=2": arch/loongarch/kernel/ptrace.c: note: in included file (through include/linux/uaccess.h, include/linux/sched/task.h, include/linux/sched/signal.h, include/linux/ptrace.h, include/linux/audit.h): ./arch/loongarch/include/asm/uaccess.h:232:32: warning: incorrect type in argument 2 (different address spaces) ./arch/loongarch/include/asm/uaccess.h:232:32: expected void const *from ./arch/loongarch/include/asm/uaccess.h:232:32: got void const [noderef] __user *from Change-Id: Id7c197d0c6db2669e61be2961916487b894a6aba Reported-by: kernel test robot Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 3ecd73641922..9cdb48d4a475 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -338,13 +338,13 @@ extern unsigned long __copy_user(void *to, const void *from, __kernel_size_t n); static inline unsigned long __must_check raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - return __copy_user(to, from, n); + return __copy_user(to, (__force const void *)from, n); } static inline unsigned long __must_check raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - return __copy_user(to, from, n); + return __copy_user((__force void *)to, from, n); } #define INLINE_COPY_FROM_USER -- Gitee From 258a9323d5ea75e98a9a8488c33b03ec77658ffc Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 11 Aug 2022 20:52:12 +0800 Subject: [PATCH 053/241] LoongArch: Jump to the link address before enable PG The kernel entry point of both boot-cpu and nonboot-cpu may be physical address from BootLoader (in DA mode or identity-mapping PG mode). So we should jump to the link address before PG enabled (DA is disabled at the same time) and just after DMW configured. Change-Id: Ic7420f983353f6ad44e3f2b79a15df77dddf8dd0 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/head.S | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 2c120cd619b5..5c957a80d359 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -23,6 +23,12 @@ SYM_CODE_START(kernel_entry) # kernel entry point csrwr t0, LOONGARCH_CSR_DMWIN0 li.d t0, CSR_DMW1_INIT # CA, PLV0, 0x9000 xxxx xxxx xxxx csrwr t0, LOONGARCH_CSR_DMWIN1 + + /* We might not get launched at the address the kernel is linked to, + so we jump there. */ + la.abs t0, 0f + jirl zero, t0, 0 +0: /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD @@ -31,11 +37,6 @@ SYM_CODE_START(kernel_entry) # kernel entry point li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 csrwr t0, LOONGARCH_CSR_EUEN - /* We might not get launched at the address the kernel is linked to, - so we jump there. */ - la.abs t0, 0f - jirl zero, t0, 0 -0: la t0, __bss_start # clear .bss st.d zero, t0, 0 la t1, __bss_stop - LONGSIZE @@ -78,6 +79,11 @@ SYM_CODE_START(smpboot_entry) csrwr t0, LOONGARCH_CSR_DMWIN0 li.d t0, CSR_DMW1_INIT # CA, PLV0 csrwr t0, LOONGARCH_CSR_DMWIN1 + + la.abs t0, 0f + jirl zero, t0, 0 +0: + /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD li.w t0, 0x04 # PLV=0, PIE=1, PWE=0 @@ -89,9 +95,6 @@ SYM_CODE_START(smpboot_entry) ld.d sp, t0, CPU_BOOT_STACK ld.d tp, t0, CPU_BOOT_TINFO - la.abs t0, 0f - jirl zero, t0, 0 -0: bl start_secondary SYM_CODE_END(smpboot_entry) -- Gitee From 2ded7f13a3cf4c875a5fb6be1f294895952ba104 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 22 Aug 2022 19:27:11 +0800 Subject: [PATCH 054/241] LoongArch: Cleanup headers to avoid circular dependency When enable GENERIC_IOREMAP, there will be circular dependency to cause build errors. The root cause is that pgtable.h shouldn't include io.h but pgtable.h need some macros defined in io.h. So cleanup those macros and remove the unnecessary inclusions, as other architectures do. Change-Id: I6751d04d65532d640ece82d3e3174e7ae263c535 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/addrspace.h | 16 ++++++++++++++++ arch/loongarch/include/asm/io.h | 19 ------------------- arch/loongarch/include/asm/page.h | 2 +- arch/loongarch/include/asm/pgtable-64.h | 6 +++--- arch/loongarch/include/asm/pgtable.h | 1 - arch/loongarch/mm/mmap.c | 11 ++--------- 6 files changed, 22 insertions(+), 33 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index 8a32896d392b..f121e76d7e3f 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -104,4 +104,20 @@ extern unsigned long vm_map_base; */ #define PHYSADDR(a) ((_ACAST64_(a)) & TO_PHYS_MASK) +/* + * On LoongArch, I/O ports mappring is following: + * + * | .... | + * |-----------------------| + * | pci io ports(16K~32M) | + * |-----------------------| + * | isa io ports(0 ~16K) | + * PCI_IOBASE ->|-----------------------| + * | .... | + */ +#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) +#define PCI_IOSIZE SZ_32M +#define ISA_IOSIZE SZ_16K +#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) + #endif /* _ASM_ADDRSPACE_H */ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 79c6022f5747..dd82d5a11dd5 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -7,34 +7,15 @@ #define ARCH_HAS_IOREMAP_WC -#include #include #include #include -#include -#include #include #include #include #include -/* - * On LoongArch, I/O ports mappring is following: - * - * | .... | - * |-----------------------| - * | pci io ports(64K~32M) | - * |-----------------------| - * | isa io ports(0 ~16K) | - * PCI_IOBASE ->|-----------------------| - * | .... | - */ -#define PCI_IOBASE ((void __iomem *)(vm_map_base + (2 * PAGE_SIZE))) -#define PCI_IOSIZE SZ_32M -#define ISA_IOSIZE SZ_16K -#define IO_SPACE_LIMIT (PCI_IOSIZE - 1) - /* * Change "struct page" to physical address. */ diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index df8ce999c93e..64bb53444908 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -94,7 +94,7 @@ static inline int pfn_valid(unsigned long pfn) #endif -#define virt_to_pfn(kaddr) PFN_DOWN(virt_to_phys((void *)(kaddr))) +#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) extern int __virt_addr_valid(volatile void *kaddr); diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h index adb05170dc2b..d8e7b01c8127 100644 --- a/arch/loongarch/include/asm/pgtable-64.h +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -130,7 +130,7 @@ static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) *p4d = p4dval; } -#define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) +#define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) #define p4d_page(p4d) (pfn_to_page(p4d_phys(p4d) >> PAGE_SHIFT)) #endif @@ -173,7 +173,7 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) -#define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) +#define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) #endif @@ -206,7 +206,7 @@ static inline void pmd_clear(pmd_t *pmdp) #define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) -#define pmd_phys(pmd) virt_to_phys((void *)pmd_val(pmd)) +#define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT)) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index d80cfb4cd17f..25c9359e2e60 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -7,7 +7,6 @@ #include #include -#include #include #include diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index 744f06902daa..a3a609f589a9 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -2,16 +2,9 @@ /* * Copyright (C) 2020 Loongson Technology Corporation Limited */ -#include -#include -#include +#include #include #include -#include -#include -#include -#include -#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); @@ -120,6 +113,6 @@ int __virt_addr_valid(volatile void *kaddr) if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; - return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); + return pfn_valid(PFN_DOWN(PHYSADDR(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); -- Gitee From fae2d727df2656a1ccefb6c8bc84bbb5665b9845 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 24 Aug 2022 17:31:10 +0200 Subject: [PATCH 055/241] LoongArch: Avoid orphan input sections Ensure that all input sections are listed explicitly in the linker script, and issue a warning otherwise. This ensures that the binary image matches the PE/COFF and other image metadata exactly, which is important for things like code signing. Change-Id: I1464c5a0f80f7cf7936521d8b4823814bcbf6499 Signed-off-by: Ard Biesheuvel Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/kernel/vmlinux.lds.S | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 436e318e09a3..778cd955401d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -41,6 +41,7 @@ config LOONGARCH select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select ARCH_WANT_LD_ORPHAN_WARN select BUILDTIME_TABLE_SORT select COMMON_CLK select GENERIC_ATOMIC64 if !64BIT diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index ac13bc28754f..5f9cbc43a617 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -76,6 +76,8 @@ SECTIONS PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT) #endif + .rela.dyn : ALIGN(8) { *(.rela.dyn) *(.rela*) } + /* * Align to 64K in attempt to eliminate holes before the * .bss..swapper_pg_dir section at the start of .bss. This -- Gitee From a4d76d19830e2efd1074fbcc3ccdb4daeef80814 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 31 Aug 2022 14:22:43 +0800 Subject: [PATCH 056/241] LoongArch: Improve dump_tlb() output messages 1, Use nr/nx to replace ri/xi; 2, Add 0x prefix for hexadecimal data. Change-Id: Ie077967b5d283d987ae0697b4eeb8f2f0b9103af Signed-off-by: Huacai Chen --- arch/loongarch/lib/dump_tlb.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c index d6ef7d3c6709..3b8c77bcdd9a 100644 --- a/arch/loongarch/lib/dump_tlb.c +++ b/arch/loongarch/lib/dump_tlb.c @@ -14,11 +14,11 @@ void dump_tlb_regs(void) { const int field = 2 * sizeof(unsigned long); - pr_info("Index : %0x\n", read_csr_tlbidx()); - pr_info("PageSize : %0x\n", read_csr_pagesize()); - pr_info("EntryHi : %0*llx\n", field, read_csr_entryhi()); - pr_info("EntryLo0 : %0*llx\n", field, read_csr_entrylo0()); - pr_info("EntryLo1 : %0*llx\n", field, read_csr_entrylo1()); + pr_info("Index : 0x%0x\n", read_csr_tlbidx()); + pr_info("PageSize : 0x%0x\n", read_csr_pagesize()); + pr_info("EntryHi : 0x%0*llx\n", field, read_csr_entryhi()); + pr_info("EntryLo0 : 0x%0*llx\n", field, read_csr_entrylo0()); + pr_info("EntryLo1 : 0x%0*llx\n", field, read_csr_entrylo1()); } static void dump_tlb(int first, int last) @@ -29,8 +29,8 @@ static void dump_tlb(int first, int last) unsigned int s_index, s_asid; unsigned int pagesize, c0, c1, i; unsigned long asidmask = cpu_asid_mask(¤t_cpu_data); - int pwidth = 11; - int vwidth = 11; + int pwidth = 16; + int vwidth = 16; int asidwidth = DIV_ROUND_UP(ilog2(asidmask) + 1, 4); s_entryhi = read_csr_entryhi(); @@ -60,22 +60,22 @@ static void dump_tlb(int first, int last) /* * Only print entries in use */ - printk("Index: %2d pgsize=%x ", i, (1 << pagesize)); + printk("Index: %4d pgsize=0x%x ", i, (1 << pagesize)); c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; - pr_cont("va=%0*lx asid=%0*lx", + pr_cont("va=0x%0*lx asid=0x%0*lx", vwidth, (entryhi & ~0x1fffUL), asidwidth, asid & asidmask); /* NR/NX are in awkward places, so mask them off separately */ pa = entrylo0 & ~(ENTRYLO_NR | ENTRYLO_NX); pa = pa & PAGE_MASK; pr_cont("\n\t["); - pr_cont("ri=%d xi=%d ", + pr_cont("nr=%d nx=%d ", (entrylo0 & ENTRYLO_NR) ? 1 : 0, (entrylo0 & ENTRYLO_NX) ? 1 : 0); - pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld] [", + pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld] [", pwidth, pa, c0, (entrylo0 & ENTRYLO_D) ? 1 : 0, (entrylo0 & ENTRYLO_V) ? 1 : 0, @@ -84,10 +84,10 @@ static void dump_tlb(int first, int last) /* NR/NX are in awkward places, so mask them off separately */ pa = entrylo1 & ~(ENTRYLO_NR | ENTRYLO_NX); pa = pa & PAGE_MASK; - pr_cont("ri=%d xi=%d ", + pr_cont("nr=%d nx=%d ", (entrylo1 & ENTRYLO_NR) ? 1 : 0, (entrylo1 & ENTRYLO_NX) ? 1 : 0); - pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n", + pr_cont("pa=0x%0*llx c=%d d=%d v=%d g=%d plv=%lld]\n", pwidth, pa, c1, (entrylo1 & ENTRYLO_D) ? 1 : 0, (entrylo1 & ENTRYLO_V) ? 1 : 0, -- Gitee From cd114fd9047e8c07945a7dec3371ef52dada1fca Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 2 Sep 2022 22:33:42 +0800 Subject: [PATCH 057/241] LoongArch: Fix section mismatch due to acpi_os_ioremap() Now acpi_os_ioremap() is marked with __init because it calls memblock_ is_memory() which is also marked with __init in the !ARCH_KEEP_MEMBLOCK case. However, acpi_os_ioremap() is called by ordinary functions such as acpi_os_{read, write}_memory() and causes section mismatch warnings: WARNING: modpost: vmlinux.o: section mismatch in reference: acpi_os_read_memory (section: .text) -> acpi_os_ioremap (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: acpi_os_write_memory (section: .text) -> acpi_os_ioremap (section: .init.text) Fix these warnings by selecting ARCH_KEEP_MEMBLOCK unconditionally and removing the __init modifier of acpi_os_ioremap(). This can also give a chance to track "memory" and "reserved" memblocks after early boot. Change-Id: Ibdc6c741c91e8472273e0c50d740616cc406df1f Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/acpi.h | 2 +- arch/loongarch/kernel/acpi.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 778cd955401d..c71d0003d769 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -34,6 +34,7 @@ config LOONGARCH select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION + select ARCH_KEEP_MEMBLOCK select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 978ac2419d70..b6cf884c6da5 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -19,7 +19,7 @@ extern int acpi_pci_disabled; extern int acpi_noirq; #define acpi_os_ioremap acpi_os_ioremap -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); static inline void disable_acpi(void) { diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 58d52f2b6271..dda9f9f969f5 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -116,7 +116,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) early_memunmap(map, size); } -void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { if (!memblock_is_memory(phys)) return ioremap(phys, size); -- Gitee From 859ddc3fec5ef0e6e5a30d4cbb5a3a948c99a75e Mon Sep 17 00:00:00 2001 From: Yupeng Li Date: Wed, 31 Aug 2022 13:40:17 +0800 Subject: [PATCH 058/241] LoongArch: Fix arch_remove_memory() undefined build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kernel build error when unslected CONFIG_MEMORY_HOTREMOVE because arch_remove_memory() is needed by mm/memory_hotplug.c but undefined. Some build error messages like: LD vmlinux.o MODPOST vmlinux.symvers MODINFO modules.builtin.modinfo GEN modules.builtin LD .tmp_vmlinux.kallsyms1 loongarch64-linux-gnu-ld: mm/memory_hotplug.o: in function `.L242': memory_hotplug.c:(.ref.text+0x930): undefined reference to `arch_remove_memory' make: *** [Makefile:1169:vmlinux] 错误 1 Removed CONFIG_MEMORY_HOTREMOVE requirement and rearrange the file refer to the definitions of other platform architectures. Change-Id: I3c8fd9db6d5ca0ea312aee7c3c45daa84ca0096f Signed-off-by: Yupeng Li Signed-off-by: Caicai Signed-off-by: Huacai Chen --- arch/loongarch/mm/init.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index e41c9685af36..144b4949ee53 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -134,18 +134,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) return ret; } -#ifdef CONFIG_NUMA -int memory_add_physaddr_to_nid(u64 start) -{ - int nid; - - nid = pa_to_nid(start); - return nid; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif - -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -158,6 +146,16 @@ void arch_remove_memory(int nid, u64 start, page += vmem_altmap_offset(altmap); __remove_pages(start_pfn, nr_pages, altmap); } + +#ifdef CONFIG_NUMA +int memory_add_physaddr_to_nid(u64 start) +{ + int nid; + + nid = pa_to_nid(start); + return nid; +} +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif -- Gitee From eefbf9927dbf0e443c922d8d9115d4886169071e Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Fri, 26 Aug 2022 07:29:03 +0000 Subject: [PATCH 059/241] LoongArch: mm: Remove the unneeded result variable Return the value pa_to_nid() directly instead of storing it in another redundant variable. Change-Id: I415fe68525f0e7722681bc39333a8c485338016e Reported-by: Zeal Robot Signed-off-by: ye xingchen Signed-off-by: Huacai Chen --- arch/loongarch/mm/init.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 144b4949ee53..2c31ad9c6722 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -150,10 +150,7 @@ void arch_remove_memory(int nid, u64 start, #ifdef CONFIG_NUMA int memory_add_physaddr_to_nid(u64 start) { - int nid; - - nid = pa_to_nid(start); - return nid; + return pa_to_nid(start); } EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif -- Gitee From e06ef6455a5c512e2b452848d8a01d980b9526e4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 26 Sep 2022 22:25:29 +0800 Subject: [PATCH 060/241] LoongArch: Align the address of kernel_entry to 4KB Align the address of kernel_entry to 4KB, to avoid early tlb miss exception in case the entry code crosses page boundary. Change-Id: Ic6e7698e2c0d0e4b872b89d5609e260b42255b07 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/head.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 5c957a80d359..a968835b298d 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -16,6 +16,8 @@ SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE) __REF + .align 12 + SYM_CODE_START(kernel_entry) # kernel entry point /* Config direct window and set PG */ -- Gitee From 1ac09dffdd0efdc37681ae357fbe4bea5ac5e2be Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 26 Sep 2022 22:33:39 +0800 Subject: [PATCH 061/241] LoongArch: Fix and cleanup csr_era handling in do_ri() We don't emulate reserved instructions and just send a signal to the current process now. So we don't need to call compute_return_era() to add 4 (point to the next instruction) to csr_era in pt_regs. RA/ERA's backup/restore is cleaned up as well. Change-Id: I0903a0fdf8b9ca133f445e8381c0a74a19b05034 Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 334a682fb1b0..0697ff186b40 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -489,11 +489,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs) asmlinkage void noinstr do_ri(struct pt_regs *regs) { - int status = -1; + int status = SIGILL; unsigned int opcode = 0; unsigned int __user *era = (unsigned int __user *)exception_era(regs); - unsigned long old_era = regs->csr_era; - unsigned long old_ra = regs->regs[1]; irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); @@ -505,22 +503,12 @@ asmlinkage void noinstr do_ri(struct pt_regs *regs) die_if_kernel("Reserved instruction in kernel code", regs); - if (unlikely(compute_return_era(regs) < 0)) - goto out; - if (unlikely(get_user(opcode, era) < 0)) { status = SIGSEGV; current->thread.error_code = 1; } - if (status < 0) - status = SIGILL; - - if (unlikely(status > 0)) { - regs->csr_era = old_era; /* Undo skip-over. */ - regs->regs[1] = old_ra; - force_sig(status); - } + force_sig(status); out: local_irq_disable(); -- Gitee From 7e73cb7732de1294e90457bf1a0112677c2dc51d Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 29 Sep 2022 19:23:18 +0800 Subject: [PATCH 062/241] LoongArch: update local TLB if PTE entry exists Currently, the implementation of update_mmu_tlb() is empty if __HAVE_ARCH_UPDATE_MMU_TLB is not defined. Then if two threads concurrently fault at the same page, the second thread that did not win the race will give up and do nothing. In the LoongArch architecture, this second thread will trigger another fault, and only updates its local TLB. Instead of triggering another fault, it's better to implement update_mmu_tlb() to directly update the local TLB of the second thread. Just do it. Change-Id: Ic245558702cb2f70cba34c8a159260176821e13a Link: https://lkml.kernel.org/r/20220929112318.32393-3-zhengqi.arch@bytedance.com Signed-off-by: Qi Zheng Suggested-by: Bibo Mao Acked-by: Huacai Chen Cc: Chris Zankel Cc: David Hildenbrand Cc: Max Filippov Cc: Muchun Song Signed-off-by: Andrew Morton --- arch/loongarch/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 25c9359e2e60..68ea6961d154 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -179,6 +179,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, __update_tlb(vma, address, ptep); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { -- Gitee From c312690178ece6cb9793a2ff497fd36c37c976e3 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Thu, 29 Sep 2022 16:51:05 +0800 Subject: [PATCH 063/241] LoongArch: Fix cpu name after CPU-hotplug Don't overwrite the SMBIOS-provided CPU name on coming back from CPU- hotplug (including S3/S4) if it is already initialized. Change-Id: I29b2e3914f02c1a46471b734f90a96548b7dd96b Reviewed-by: WANG Xuerui Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cpu-probe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 30e73b6f6014..6a4ff8116f1c 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -187,7 +187,9 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]); uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]); - __cpu_full_name[cpu] = cpu_full_name; + if (!__cpu_full_name[cpu]) + __cpu_full_name[cpu] = cpu_full_name; + *vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR); *cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME); -- Gitee From 7909ab785bf5ec5bdddbefe5aed59bc6e906d8ba Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 12 Oct 2022 09:15:02 +0800 Subject: [PATCH 064/241] LoongArch: Flush TLB earlier at initialization Move local_flush_tlb_all() earlier (just after setup_ptwalker() and before page allocation). This can avoid stale TLB entries misguiding the later page allocation. Without this patch the second kernel of kexec/kdump fails to boot SMP. BTW, move output_pgtable_bits_defines() into tlb_init() since it has nothing to do with tlb handler setup. Change-Id: I4509549279ff950e695d7fee16331fb517e5136b Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 7c027531ea70..dc0c9bb6f252 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -257,7 +257,7 @@ extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; void setup_tlb_handler(int cpu) { setup_ptwalker(); - output_pgtable_bits_defines(); + local_flush_tlb_all(); /* The tlb handlers are generated only once */ if (cpu == 0) { @@ -300,6 +300,7 @@ void tlb_init(int cpu) write_csr_pagesize(PS_DEFAULT_SIZE); write_csr_stlbpgsize(PS_DEFAULT_SIZE); write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE); + setup_tlb_handler(cpu); - local_flush_tlb_all(); + output_pgtable_bits_defines(); } -- Gitee From c3fbf9b300e91f52778c561187769e95bda65fbe Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 8 Oct 2022 22:25:31 +0800 Subject: [PATCH 065/241] LoongArch: Mark __xchg() and __cmpxchg() as __always_inline Commit ac7c3e4ff401 ("compiler: enable CONFIG_OPTIMIZE_INLINING forcibly") allows compiler to uninline functions marked as 'inline'. In case of __xchg()/__cmpxchg() this would cause to reference BUILD_BUG(), which is an error case for catching bugs and will not happen for correct code, if __xchg()/__cmpxchg() is inlined. This bug can be produced with CONFIG_DEBUG_SECTION_MISMATCH enabled, and the solution is similar to below commits: 46f1619500d0225 ("MIPS: include: Mark __xchg as __always_inline"), 88356d09904bc60 ("MIPS: include: Mark __cmpxchg as __always_inline"). Change-Id: I1d605ca661623e43679a54914d93d6c5c352f7f0 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cmpxchg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h index 70ebf9ca41b7..ee79e10d8b21 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -24,8 +24,8 @@ extern unsigned long __xchg_small(volatile void *ptr, unsigned long x, unsigned int size); -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline unsigned long +__xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -77,8 +77,8 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size); -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { case 1: -- Gitee From 4effed90fcb297633e3e2563f8b9db13772cf185 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Thu, 27 Oct 2022 19:11:49 +0800 Subject: [PATCH 066/241] LoongArch: Remove unused kernel stack padding The current LoongArch kernel stack is padded as if obeying the MIPS o32 calling convention (32 bytes), signifying the port's MIPS lineage but no longer making sense. Remove the padding for clarity. Change-Id: I61139b21f09ce1371ffb3e414473ee10abd9e39b Reviewed-by: WANG Xuerui Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/processor.h | 2 +- arch/loongarch/include/asm/ptrace.h | 2 +- arch/loongarch/kernel/head.S | 3 +-- arch/loongarch/kernel/process.c | 2 +- arch/loongarch/kernel/switch.S | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index d59db0f18c9f..f0814b552f23 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -186,7 +186,7 @@ static inline void flush_thread(void) unsigned long get_wchan(struct task_struct *p); #define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \ - THREAD_SIZE - 32 - sizeof(struct pt_regs)) + THREAD_SIZE - sizeof(struct pt_regs)) #define task_pt_regs(tsk) ((struct pt_regs *)__KSTK_TOS(tsk)) #define KSTK_EIP(tsk) (task_pt_regs(tsk)->csr_era) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[3]) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index 5dd5cae4e52b..0a55aaad38e0 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -137,7 +137,7 @@ static inline void die_if_kernel(const char *str, struct pt_regs *regs) #define current_pt_regs() \ ({ \ unsigned long sp = (unsigned long)__builtin_frame_address(0); \ - (struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1 - 32) - 1; \ + (struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1) - 1; \ }) /* Helpers for working with the user stack pointer */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index a968835b298d..e10edaab1a69 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -61,10 +61,9 @@ SYM_CODE_START(kernel_entry) # kernel entry point la tp, init_thread_union /* Set the SP after an empty pt_regs. */ - PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) + PTR_LI sp, (_THREAD_SIZE - PT_SIZE) PTR_ADD sp, sp, tp set_saved_sp sp, t0, t1 - PTR_ADDI sp, sp, -4 * SZREG # init stack pointer bl start_kernel diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index def9edad44fc..816922df9183 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long childksp; struct pt_regs *childregs, *regs = current_pt_regs(); - childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; + childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE; /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index bf7002209600..3445cc59d730 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -29,7 +29,7 @@ SYM_FUNC_START(__switch_to) move tp, a2 cpu_restore_nonscratch a1 - li.w t0, _THREAD_SIZE - 32 + li.w t0, _THREAD_SIZE PTR_ADD t0, t0, tp set_saved_sp t0, t1, t2 -- Gitee From 86c4a1ad6123dfdba8174350dcf564f39f8a70df Mon Sep 17 00:00:00 2001 From: Yushan Zhou Date: Tue, 18 Oct 2022 17:47:42 +0800 Subject: [PATCH 067/241] LoongArch: Use flexible-array member instead of zero-length array Eliminate the following coccicheck warning: ./arch/loongarch/include/asm/ptrace.h:32:15-21: WARNING use flexible-array member instead Change-Id: Iaeb13019442c19f7cae84087dcd91f2ac1a77017 Reviewed-by: WANG Xuerui Signed-off-by: Yushan Zhou Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index 0a55aaad38e0..b394d9a74157 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -29,7 +29,7 @@ struct pt_regs { unsigned long csr_euen; unsigned long csr_ecfg; unsigned long csr_estat; - unsigned long __last[0]; + unsigned long __last[]; } __aligned(8); static inline int regs_irqs_disabled(struct pt_regs *regs) -- Gitee From e928f7cd531484d2ae15301c3566a505250a5ad0 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 15 Nov 2022 18:02:29 +0800 Subject: [PATCH 068/241] LoongArch: Makefile: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E Fix this up by changing the LoongArch Makefile to use "grep -E" instead. Change-Id: I4bd434989146098172c64b05b65761d4a41e110f Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 5cfcba7e8161..6b0394d87da5 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -71,7 +71,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif -- Gitee From 3f52a01408492df9135b89be719bd1cf46b6500b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 7 Nov 2022 14:25:41 +0800 Subject: [PATCH 069/241] LoongArch: Combine acpi_boot_table_init() and acpi_boot_init() Combine acpi_boot_table_init() and acpi_boot_init() since they are very simple, and we don't need to check the return value of acpi_boot_init(). Change-Id: I88c4f2e3bf9c1c44daa01e9cd6a85c7ec6097e11 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 31 ++++++++++--------------------- arch/loongarch/loongson64/init.c | 1 - 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index dda9f9f969f5..6c85a7560ffb 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -124,23 +124,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -void __init acpi_boot_table_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } -} - static int set_processor_mask(u32 id, u32 flags) { @@ -348,13 +331,21 @@ static void __init acpi_process_madt(void) } } -int __init acpi_boot_init(void) +void __init acpi_boot_table_init(void) { /* * If acpi_disabled, bail out */ if (acpi_disabled) - return -1; + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -365,8 +356,6 @@ int __init acpi_boot_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); - - return 0; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 66f972db2c6b..13778d3dd1b6 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -120,7 +120,6 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); - acpi_boot_init(); #endif #ifdef CONFIG_NUMA -- Gitee From 8edc2c5634d25c83da140755c3cd433a0a9cff7f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 18 Nov 2022 12:33:46 +0800 Subject: [PATCH 070/241] LoongArch: Clear FPU/SIMD thread info flags for kernel thread If a kernel thread is created by a user thread, it may carry FPU/SIMD thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be considered as a fpu owner and kernel try to save its FPU/SIMD context and cause such errors: [ 41.518931] do_fpu invoked from kernel context![#1]: [ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 [ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 [ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 [ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 [ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc [ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 [ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 [ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 [ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 [ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 [ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 [ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 [ 41.619160] CSR crmd: 000000b0 [ 41.619163] CSR prmd: 00000000 [ 41.622359] CSR euen: 00000000 [ 41.625558] CSR ecfg: 00071c1c [ 41.628756] CSR estat: 000f0000 [ 41.635239] ExcCode : f (SubCode 0) [ 41.638783] PrId : 0014c010 (Loongson-64bit) [ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs [ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) [ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 [ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 [ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 [ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 [ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 [ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 [ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 [ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 [ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 [ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 41.742745] ... [ 41.745252] Call Trace: [ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 [ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 [ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 [ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 [ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 [ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c This can be easily triggered by ltp testcase syscalls/io_uring02 and it can also be easily fixed by clearing the FPU/SIMD thread info flags for kernel threads in copy_thread(). Change-Id: Ib8dd21b605e995e81cb404ecdb7675decbc9f8cd Cc: stable@vger.kernel.org Reported-by: Qi Hu Signed-off-by: Huacai Chen --- arch/loongarch/kernel/process.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 816922df9183..ed0aebfe95d4 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -141,7 +141,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; } /* user thread */ @@ -159,14 +159,15 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, */ childregs->csr_euen = 0; + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); - if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; } -- Gitee From 31f2d0525e9ea3c26a4de6c48632ef2ee2d7e175 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 16 Nov 2022 22:27:17 +0800 Subject: [PATCH 071/241] LoongArch: Set _PAGE_DIRTY only if _PAGE_WRITE is set in {pmd,pte}_mkdirty() Now {pmd,pte}_mkdirty() set _PAGE_DIRTY bit unconditionally, this causes random segmentation fault after commit 0ccf7f168e17bb7e ("mm/thp: carry over dirty bit when thp splits on pmd"). The reason is: when fork(), parent process use pmd_wrprotect() to clear huge page's _PAGE_WRITE and _PAGE_DIRTY (for COW); then pte_mkdirty() set _PAGE_DIRTY as well as _PAGE_MODIFIED while splitting dirty huge pages; once _PAGE_DIRTY is set, there will be no tlb modify exception so the COW machanism fails; and at last memory corruption occurred between parent and child processes. So, we should set _PAGE_DIRTY only when _PAGE_WRITE is set in {pmd,pte}_ mkdirty(). Change-Id: Ib5365da09c30af9a0b5597d1c2883890f1e19c77 Cc: stable@vger.kernel.org Cc: Peter Xu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 68ea6961d154..553f4afdc79e 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -116,7 +116,9 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pte_val(pte) |= _PAGE_MODIFIED; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -240,7 +242,9 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pmd_val(pmd) |= _PAGE_MODIFIED; + if (pmd_val(pmd) & _PAGE_WRITE) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } -- Gitee From cdb83d4ee870204d8f5b19d4e6bd20beec97570c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 20 Nov 2022 09:11:16 +0800 Subject: [PATCH 072/241] LoongArch: Set _PAGE_DIRTY only if _PAGE_MODIFIED is set in {pmd,pte}_mkwrite() Set _PAGE_DIRTY only if _PAGE_MODIFIED is set in {pmd,pte}_mkwrite(). Otherwise, _PAGE_DIRTY silences the TLB modify exception and make us have no chance to mark a pmd/pte dirty (_PAGE_MODIFIED) for software. Change-Id: Ie5fc6a47b002817a0e006b290ea531b39a7e69a8 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 553f4afdc79e..45ef7f09fcdd 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -124,7 +124,9 @@ static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_MODIFIED) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -219,7 +221,9 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + pmd_val(pmd) |= _PAGE_WRITE; + if (pmd_val(pmd) & _PAGE_MODIFIED) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } -- Gitee From a91fc72ffcfccab4c033044b417e4da29736ed70 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 22 Dec 2022 15:07:08 +0800 Subject: [PATCH 073/241] LoongArch: Add HWCAP_LOONGARCH_CPUCFG to elf_hwcap HWCAP_LOONGARCH_CPUCFG is missing in elf_hwcap, so add it for glibc's later use. Change-Id: I65b2980fa24e48915473be9d6e4081b532cf42d4 Cc: stable@vger.kernel.org Reported-by: Yinyu Cai Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 6a4ff8116f1c..b249cbb807e7 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -94,7 +94,7 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; - elf_hwcap |= HWCAP_LOONGARCH_CRC32; + elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32; config = read_cpucfg(LOONGARCH_CPUCFG1); if (config & CPUCFG1_UAL) { -- Gitee From 749edaec3d786e9501d9926772ff0673bddd0d1e Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Wed, 1 Feb 2023 17:29:04 +0800 Subject: [PATCH 074/241] LoongArch: Fix Chinese comma in cpu.h Fix Chinese comma introduced by accident in cpu.h. Change-Id: I124c1bee19a6c73c9b03e1d5b29b3ed2020a27b2 Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 63d7094610be..443d8ee2f187 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -36,7 +36,7 @@ #define PRID_SERIES_LA132 0x8000 /* Loongson 32bit */ #define PRID_SERIES_LA264 0xa000 /* Loongson 64bit, 2-issue */ -#define PRID_SERIES_LA364 0xb000 /* Loongson 64bit,3-issue */ +#define PRID_SERIES_LA364 0xb000 /* Loongson 64bit, 3-issue */ #define PRID_SERIES_LA464 0xc000 /* Loongson 64bit, 4-issue */ #define PRID_SERIES_LA664 0xd000 /* Loongson 64bit, 6-issue */ -- Gitee From 0ff31dfada49f5cb1eececf9a540df8729a57507 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 18 Jun 2021 17:20:56 +0200 Subject: [PATCH 075/241] ACPI: arm64: Move DMA setup operations out of IORT Extract generic DMA setup code out of IORT, so it can be reused by VIOT. Keep it in drivers/acpi/arm64 for now, since it could break x86 platforms that haven't run this code so far, if they have invalid tables. Change-Id: Ia964a43f95e9c5564b88d1d56d517d56894680c3 Reviewed-by: Eric Auger Signed-off-by: Jean-Philippe Brucker Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/20210618152059.1194210-2-jean-philippe@linaro.org Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/dma.c | 50 ++++++++++++++++++++++++++++++++++ drivers/acpi/arm64/iort.c | 54 ++++++------------------------------- drivers/acpi/scan.c | 2 +- include/linux/acpi.h | 3 +++ include/linux/acpi_iort.h | 6 ++--- 6 files changed, 66 insertions(+), 50 deletions(-) create mode 100644 drivers/acpi/arm64/dma.c diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 6ff50f4ed947..66acbe77f46e 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o +obj-y += dma.o diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c new file mode 100644 index 000000000000..f16739ad3cc0 --- /dev/null +++ b/drivers/acpi/arm64/dma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include + +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +{ + int ret; + u64 end, mask; + u64 dmaaddr = 0, size = 0, offset = 0; + + /* + * If @dev is expected to be DMA-capable then the bus code that created + * it should have initialised its dma_mask pointer by this point. For + * now, we'll continue the legacy behaviour of coercing it to the + * coherent mask if not, but we'll no longer do so quietly. + */ + if (!dev->dma_mask) { + dev_warn(dev, "DMA mask not set\n"); + dev->dma_mask = &dev->coherent_dma_mask; + } + + if (dev->coherent_dma_mask) + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + else + size = 1ULL << 32; + + ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + if (ret == -ENODEV) + ret = iort_dma_get_ranges(dev, &size); + if (!ret) { + /* + * Limit coherent and dma mask based on size retrieved from + * firmware. + */ + end = dmaaddr + size - 1; + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } + + *dma_addr = dmaaddr; + *dma_size = size; + + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); +} diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 554943be2698..5539779d78cd 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1143,56 +1143,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size) } /** - * iort_dma_setup() - Set-up device DMA parameters. + * iort_dma_get_ranges() - Look up DMA addressing limit for the device + * @dev: device to lookup + * @size: DMA range size result pointer * - * @dev: device to configure - * @dma_addr: device DMA address result pointer - * @dma_size: DMA range size result pointer + * Return: 0 on success, an error otherwise. */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +int iort_dma_get_ranges(struct device *dev, u64 *size) { - u64 end, mask, dmaaddr = 0, size = 0, offset = 0; - int ret; - - /* - * If @dev is expected to be DMA-capable then the bus code that created - * it should have initialised its dma_mask pointer by this point. For - * now, we'll continue the legacy behaviour of coercing it to the - * coherent mask if not, but we'll no longer do so quietly. - */ - if (!dev->dma_mask) { - dev_warn(dev, "DMA mask not set\n"); - dev->dma_mask = &dev->coherent_dma_mask; - } - - if (dev->coherent_dma_mask) - size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); + if (dev_is_pci(dev)) + return rc_dma_get_range(dev, size); else - size = 1ULL << 32; - - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); - if (ret == -ENODEV) - ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size) - : nc_dma_get_range(dev, &size); - - if (!ret) { - /* - * Limit coherent and dma mask based on size retrieved from - * firmware. - */ - end = dmaaddr + size - 1; - mask = DMA_BIT_MASK(ilog2(end) + 1); - dev->bus_dma_limit = end; - dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); - *dev->dma_mask = min(*dev->dma_mask, mask); - } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); + return nc_dma_get_range(dev, size); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 67a5ee2fedfd..5062830c81f8 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1503,7 +1503,7 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, return 0; } - iort_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev, &dma_addr, &size); iommu = iort_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a3afd20e523a..e8fdccad3ef9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -260,9 +260,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); +void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } +static inline void +acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 136dba94c646..5870243835cb 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -35,7 +35,7 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id, void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ -void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); +int iort_dma_get_ranges(struct device *dev, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); @@ -49,8 +49,8 @@ static inline struct irq_domain *iort_get_device_domain( { return NULL; } static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ -static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, - u64 *size) { } +static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +{ return -ENODEV; } static inline const struct iommu_ops *iort_iommu_configure_id( struct device *dev, const u32 *id_in) { return NULL; } -- Gitee From dad969f081232fe85ed5dd8eba1d4abfbea7bafb Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Sun, 11 Sep 2022 17:06:34 +0800 Subject: [PATCH 076/241] ACPI: scan: Support multiple DMA windows with different offsets In DT systems configurations, of_dma_get_range() returns struct bus_dma_region DMA regions; they are used to set-up devices DMA windows with different offset available for translation between DMA address and CPU address. In ACPI systems configuration, acpi_dma_get_range() does not return DMA regions yet and that precludes setting up the dev->dma_range_map pointer and therefore DMA regions with multiple offsets. Update acpi_dma_get_range() to return struct bus_dma_region DMA regions like of_dma_get_range() does. After updating acpi_dma_get_range(), acpi_arch_dma_setup() is changed for ARM64, where the original dma_addr and size are removed as these arguments are now redundant, and pass 0 and U64_MAX for dma_base and size of arch_setup_dma_ops; this is a simplification consistent with what other ACPI architectures also pass to iommu_setup_dma_ops(). Change-Id: I299ce28dafbc6516448f6a04f98aa3d09cf1e2d5 Reviewed-by: Robin Murphy Signed-off-by: Jianmin Lv Reviewed-by: Lorenzo Pieralisi Signed-off-by: Rafael J. Wysocki --- drivers/acpi/arm64/dma.c | 28 ++++++++++++--------- drivers/acpi/scan.c | 53 +++++++++++++++++----------------------- include/acpi/acpi_bus.h | 3 +-- include/linux/acpi.h | 7 +++--- 4 files changed, 44 insertions(+), 47 deletions(-) diff --git a/drivers/acpi/arm64/dma.c b/drivers/acpi/arm64/dma.c index f16739ad3cc0..93d796531af3 100644 --- a/drivers/acpi/arm64/dma.c +++ b/drivers/acpi/arm64/dma.c @@ -4,11 +4,12 @@ #include #include -void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) +void acpi_arch_dma_setup(struct device *dev) { int ret; u64 end, mask; - u64 dmaaddr = 0, size = 0, offset = 0; + u64 size = 0; + const struct bus_dma_region *map = NULL; /* * If @dev is expected to be DMA-capable then the bus code that created @@ -26,7 +27,19 @@ void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) else size = 1ULL << 32; - ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size); + ret = acpi_dma_get_range(dev, &map); + if (!ret && map) { + const struct bus_dma_region *r = map; + + for (end = 0; r->size; r++) { + if (r->dma_start + r->size - 1 > end) + end = r->dma_start + r->size - 1; + } + + size = end + 1; + dev->dma_range_map = map; + } + if (ret == -ENODEV) ret = iort_dma_get_ranges(dev, &size); if (!ret) { @@ -34,17 +47,10 @@ void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) * Limit coherent and dma mask based on size retrieved from * firmware. */ - end = dmaaddr + size - 1; + end = size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); *dev->dma_mask = min(*dev->dma_mask, mask); } - - *dma_addr = dmaaddr; - *dma_size = size; - - ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); - - dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 5062830c81f8..6c08fb354773 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "internal.h" @@ -1411,25 +1412,21 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) * acpi_dma_get_range() - Get device DMA parameters. * * @dev: device to configure - * @dma_addr: pointer device DMA address result - * @offset: pointer to the DMA offset result - * @size: pointer to DMA range size result + * @map: pointer to DMA ranges result * - * Evaluate DMA regions and return respectively DMA region start, offset - * and size in dma_addr, offset and size on parsing success; it does not - * update the passed in values on failure. + * Evaluate DMA regions and return pointer to DMA regions on + * parsing success; it does not update the passed in values on failure. * * Return 0 on success, < 0 on failure. */ -int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, - u64 *size) +int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { struct acpi_device *adev; LIST_HEAD(list); struct resource_entry *rentry; int ret; struct device *dma_dev = dev; - u64 len, dma_start = U64_MAX, dma_end = 0, dma_offset = 0; + struct bus_dma_region *r; /* * Walk the device tree chasing an ACPI companion with a _DMA @@ -1454,31 +1451,28 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, ret = acpi_dev_get_dma_resources(adev, &list); if (ret > 0) { + r = kcalloc(ret + 1, sizeof(*r), GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + goto out; + } + list_for_each_entry(rentry, &list, node) { - if (dma_offset && rentry->offset != dma_offset) { + if (rentry->res->start >= rentry->res->end) { + kfree(r); ret = -EINVAL; - dev_warn(dma_dev, "Can't handle multiple windows with different offsets\n"); + dev_dbg(dma_dev, "Invalid DMA regions configuration\n"); goto out; } - dma_offset = rentry->offset; - /* Take lower and upper limits */ - if (rentry->res->start < dma_start) - dma_start = rentry->res->start; - if (rentry->res->end > dma_end) - dma_end = rentry->res->end; - } - - if (dma_start >= dma_end) { - ret = -EINVAL; - dev_dbg(dma_dev, "Invalid DMA regions configuration\n"); - goto out; + r->cpu_start = rentry->res->start; + r->dma_start = rentry->res->start - rentry->offset; + r->size = resource_size(rentry->res); + r->offset = rentry->offset; + r++; } - *dma_addr = dma_start - dma_offset; - len = dma_end - dma_start; - *size = max(len, len + 1); - *offset = dma_offset; + *map = r; } out: acpi_dev_free_resource_list(&list); @@ -1496,20 +1490,19 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id) { const struct iommu_ops *iommu; - u64 dma_addr = 0, size = 0; if (attr == DEV_DMA_NOT_SUPPORTED) { set_dma_ops(dev, &dma_dummy_ops); return 0; } - acpi_arch_dma_setup(dev, &dma_addr, &size); + acpi_arch_dma_setup(dev); iommu = iort_iommu_configure_id(dev, input_id); if (PTR_ERR(iommu) == -EPROBE_DEFER) return -EPROBE_DEFER; - arch_setup_dma_ops(dev, dma_addr, size, + arch_setup_dma_ops(dev, 0, U64_MAX, iommu, attr == DEV_DMA_COHERENT); return 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0f5366792d22..e528ca6641d6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -587,8 +587,7 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); -int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset, - u64 *size); +int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); static inline int acpi_dma_configure(struct device *dev, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e8fdccad3ef9..48645d15292b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -260,12 +260,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); -void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size); +void acpi_arch_dma_setup(struct device *dev); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } static inline void -acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { } +acpi_arch_dma_setup(struct device *dev) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); @@ -913,8 +913,7 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline int acpi_dma_get_range(struct device *dev, u64 *dma_addr, - u64 *offset, u64 *size) +static inline int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) { return -ENODEV; } -- Gitee From 37767d4930ae8169c7275c8dd8848b44b87b1494 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Sun, 11 Sep 2022 17:06:35 +0800 Subject: [PATCH 077/241] LoongArch: Use acpi_arch_dma_setup() and remove ARCH_HAS_PHYS_TO_DMA Use _DMA defined in ACPI spec for translation between DMA address and CPU address, and implement acpi_arch_dma_setup for initializing dev->dma_range_map, where acpi_dma_get_range is called for parsing _DMA. e.g. If we have two dma ranges: cpu address dma address size offset 0x200080000000 0x2080000000 0x400000000 0x1fe000000000 0x400080000000 0x4080000000 0x400000000 0x3fc000000000 _DMA for pci devices should be declared in host bridge as flowing: Name (_DMA, ResourceTemplate() { QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x0, 0x4080000000, 0x447fffffff, 0x3fc000000000, 0x400000000, , , ) QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, 0x0, 0x2080000000, 0x247fffffff, 0x1fe000000000, 0x400000000, , , ) }) Change-Id: I397d36429ea4a66ff1c0514c56cdb45a3eb7c376 Acked-by: Huacai Chen Signed-off-by: Jianmin Lv Signed-off-by: Rafael J. Wysocki --- arch/loongarch/Kconfig | 1 - arch/loongarch/kernel/setup.c | 2 +- arch/loongarch/loongson64/dma.c | 47 +++++++++++++-------------------- include/linux/acpi.h | 9 ++++--- 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index c71d0003d769..7663c7e023dc 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -173,7 +173,6 @@ choice config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 - select ARCH_HAS_PHYS_TO_DMA select CPU_SUPPORTS_64BIT_KERNEL select GPIOLIB select SWIOTLB diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 8a5c34039c52..9dbbd288472a 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -148,7 +148,7 @@ static void __init arch_mem_init(char **cmdline_p) sparse_init(); memblock_set_bottom_up(true); - plat_swiotlb_setup(); + swiotlb_init(1); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/loongson64/dma.c b/arch/loongarch/loongson64/dma.c index 1743f3088de2..0f9040535cbb 100644 --- a/arch/loongarch/loongson64/dma.c +++ b/arch/loongarch/loongson64/dma.c @@ -7,40 +7,29 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited * */ -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include +void acpi_arch_dma_setup(struct device *dev) +{ + int ret; + u64 mask, end = 0; + const struct bus_dma_region *map = NULL; -static int node_id_offset; + ret = acpi_dma_get_range(dev, &map); + if (!ret && map) { + const struct bus_dma_region *r = map; -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - /* We extract 4bit node id (bit 44~47) from Loongson-3's - * 48bit address space and embed it into 40bit */ - long nid = (paddr >> 44) & 0xf; - return ((nid << 44) ^ paddr) | (nid << node_id_offset); -} + for (end = 0; r->size; r++) { + if (r->dma_start + r->size - 1 > end) + end = r->dma_start + r->size - 1; + } -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - /* We extract 4bit node id (bit 44~47) from Loongson-3's - * 48bit address space and embed it into 40bit */ - long nid = (daddr >> node_id_offset) & 0xf; - return ((nid << node_id_offset) ^ daddr) | (nid << 44); -} + mask = DMA_BIT_MASK(ilog2(end) + 1); + dev->bus_dma_limit = end; + dev->dma_range_map = map; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); + } -void __init plat_swiotlb_setup(void) -{ - swiotlb_init(1); - node_id_offset = ((readl(LS7A_DMA_CFG) & LS7A_DMA_NODE_MASK) >> LS7A_DMA_NODE_SHF) + 36; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 48645d15292b..01e44fde11a0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -258,14 +258,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) { } void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa); +#if defined(CONFIG_ARM64) || defined(CONFIG_LOONGARCH) +void acpi_arch_dma_setup(struct device *dev); +#else +static inline void acpi_arch_dma_setup(struct device *dev) { } +#endif + #ifdef CONFIG_ARM64 void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa); -void acpi_arch_dma_setup(struct device *dev); #else static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } -static inline void -acpi_arch_dma_setup(struct device *dev) { } #endif int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); -- Gitee From c4ba0097df3dc722c3d7c97f9d0ad133a147634c Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 30 Aug 2022 18:48:02 +0800 Subject: [PATCH 078/241] LoongArch: Add Kconfig option AS_HAS_EXPLICIT_RELOCS GNU as >= 2.40 and GCC >= 13 will support using explicit relocation hints in the assembly code, instead of la.* macros. The usage of explicit relocation hints can improve code generation so it's enabled by default by GCC >= 13. Introduce a Kconfig option AS_HAS_EXPLICIT_RELOCS as the switch for "use explicit relocation hints or not". Change-Id: I9ed68f9c6719f5d3218d5142d1b22f9e0e98e02c Tested-by: WANG Xuerui Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 7663c7e023dc..ea4f9b507d3d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -151,6 +151,9 @@ config SCHED_OMIT_FRAME_POINTER bool default y +config AS_HAS_EXPLICIT_RELOCS + def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x)) + config SYS_SUPPORTS_HOTPLUG_CPU bool -- Gitee From f02c45d4ff4d184e07aa34ff7f2777c0f3f4eecc Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 30 Aug 2022 18:48:03 +0800 Subject: [PATCH 079/241] LoongArch: Adjust symbol addressing for AS_HAS_EXPLICIT_RELOCS If explicit relocation hints are used by the toolchain, -Wa,-mla-* options will be useless for the C code. So only use them for the !CONFIG_AS_HAS_EXPLICIT_RELOCS case. Replace "la" with "la.pcrel" in head.S to keep the semantic consistent with new and old toolchains for the low level startup code. For per-CPU variables, the "address" of the symbol is actually an offset from $r21. The value is near the loading address of main kernel image, but far from the loading address of modules. So we use model("extreme") attibute to tell the compiler that a PC-relative addressing with 32-bit offset is not sufficient for local per-CPU variables. The behavior with different assemblers and compilers are summarized in the following table: AS has CC has explicit relocs explicit relocs * Behavior ============================================================== No No Use la.* macros. No change from Linux 6.0. -------------------------------------------------------------- No Yes Disable explicit relocs. No change from Linux 6.0. -------------------------------------------------------------- Yes No Not supported. -------------------------------------------------------------- Yes Yes Enable explicit relocs. No -Wa,-mla* options used. ============================================================== *: We assume CC must have model attribute if it has explicit relocs. Both features are added in GCC 13 development cycle, so any GCC release >= 13 should be OK. Using early GCC 13 development snapshots may produce modules with unsupported relocations. Change-Id: I6e256949411295c816044eea147b069fc84bfaf6 Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f09482a Link: https://gcc.gnu.org/r13-1834 Link: https://gcc.gnu.org/r13-2199 Tested-by: WANG Xuerui Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 18 ++++++++++++++++++ arch/loongarch/include/asm/percpu.h | 9 +++++++++ arch/loongarch/kernel/head.S | 12 ++++++------ arch/loongarch/kernel/vmlinux.lds.S | 4 ++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 6b0394d87da5..2553b83396c2 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -34,10 +34,28 @@ all-y := vmlinux cflags-y += -G0 -pipe -msoft-float LDFLAGS_vmlinux += -G0 -static -n -nostdlib + +# When the assembler supports explicit relocation hint, we must use it. +# GCC may have -mexplicit-relocs off by default if it was built with an old +# assembler, so we force it via an option. +# +# When the assembler does not supports explicit relocation hint, we can't use +# it. Disable it if the compiler supports it. +# +# If you've seen "unknown reloc hint" message building the kernel and you are +# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the +# combination of a "new" assembler and "old" compiler is not supported. Either +# upgrade the compiler or downgrade the assembler. +ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS +cflags-y += -mexplicit-relocs +KBUILD_CFLAGS_KERNEL += -mdirect-extern-access +else +cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_CFLAGS_KERNEL += -Wa,-mla-global-with-pcrel KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs +endif cflags-y += -ffreestanding diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 166778c3db05..fd0556726418 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -7,6 +7,15 @@ #include +/* + * The "address" (in fact, offset from $r21) of a per-CPU variable is close to + * the loading address of main kernel image, but far from where the modules are + * loaded. Tell the compiler this fact when using explicit relocs. + */ +#if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS) +#define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +#endif + /* Use r21 for fast access */ register unsigned long __my_cpu_offset __asm__("$r21"); diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index e10edaab1a69..6e742e9fbf0b 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -39,19 +39,19 @@ SYM_CODE_START(kernel_entry) # kernel entry point li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0 csrwr t0, LOONGARCH_CSR_EUEN - la t0, __bss_start # clear .bss + la.pcrel t0, __bss_start # clear .bss st.d zero, t0, 0 - la t1, __bss_stop - LONGSIZE + la.pcrel t1, __bss_stop - LONGSIZE 1: addi.d t0, t0, LONGSIZE st.d zero, t0, 0 bne t0, t1, 1b - la t0, fw_arg0 + la.pcrel t0, fw_arg0 st.d a0, t0, 0 # firmware arguments - la t0, fw_arg1 + la.pcrel t0, fw_arg1 st.d a1, t0, 0 - la t0, fw_arg2 + la.pcrel t0, fw_arg2 st.d a2, t0, 0 /* KSave3 used for percpu base, initialized as 0 */ @@ -59,7 +59,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point /* GPR21 used for percpu base (runtime), initialized as 0 */ or u0, zero, zero - la tp, init_thread_union + la.pcrel tp, init_thread_union /* Set the SP after an empty pt_regs. */ PTR_LI sp, (_THREAD_SIZE - PT_SIZE) PTR_ADD sp, sp, tp diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 5f9cbc43a617..b7888440997c 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -43,6 +43,10 @@ SECTIONS EXCEPTION_TABLE(16) + .got : ALIGN(16) { *(.got) } + .plt : ALIGN(16) { *(.plt) } + .got.plt : ALIGN(16) { *(.got.plt) } + _sdata = .; /* Start of data section */ RO_DATA(4096) RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) -- Gitee From 5ef228b8d4927c058facc28f422ce43ddeffd1cd Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 30 Aug 2022 18:48:04 +0800 Subject: [PATCH 080/241] LoongArch: Define ELF relocation types added in ABIv2.0 These relocation types are used by GNU binutils >= 2.40 and GCC >= 13. Add their definitions so we will be able to use them in later patches. Change-Id: Ib1e92eb708d899be24512b62a829b683d9dbba4c Link: https://github.com/loongson/LoongArch-Documentation/pull/57 Tested-by: WANG Xuerui Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 37 ++++++++++++++++++++++++++++++++ arch/loongarch/kernel/module.c | 2 +- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index f881bd16514d..9107fa0dcca3 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -79,6 +79,43 @@ #define R_LARCH_SUB64 56 #define R_LARCH_GNU_VTINHERIT 57 #define R_LARCH_GNU_VTENTRY 58 +#define R_LARCH_B16 64 +#define R_LARCH_B21 65 +#define R_LARCH_B26 66 +#define R_LARCH_ABS_HI20 67 +#define R_LARCH_ABS_LO12 68 +#define R_LARCH_ABS64_LO20 69 +#define R_LARCH_ABS64_HI12 70 +#define R_LARCH_PCALA_HI20 71 +#define R_LARCH_PCALA_LO12 72 +#define R_LARCH_PCALA64_LO20 73 +#define R_LARCH_PCALA64_HI12 74 +#define R_LARCH_GOT_PC_HI20 75 +#define R_LARCH_GOT_PC_LO12 76 +#define R_LARCH_GOT64_PC_LO20 77 +#define R_LARCH_GOT64_PC_HI12 78 +#define R_LARCH_GOT_HI20 79 +#define R_LARCH_GOT_LO12 80 +#define R_LARCH_GOT64_LO20 81 +#define R_LARCH_GOT64_HI12 82 +#define R_LARCH_TLS_LE_HI20 83 +#define R_LARCH_TLS_LE_LO12 84 +#define R_LARCH_TLS_LE64_LO20 85 +#define R_LARCH_TLS_LE64_HI12 86 +#define R_LARCH_TLS_IE_PC_HI20 87 +#define R_LARCH_TLS_IE_PC_LO12 88 +#define R_LARCH_TLS_IE64_PC_LO20 89 +#define R_LARCH_TLS_IE64_PC_HI12 90 +#define R_LARCH_TLS_IE_HI20 91 +#define R_LARCH_TLS_IE_LO12 92 +#define R_LARCH_TLS_IE64_LO20 93 +#define R_LARCH_TLS_IE64_HI12 94 +#define R_LARCH_TLS_LD_PC_HI20 95 +#define R_LARCH_TLS_LD_HI20 96 +#define R_LARCH_TLS_GD_PC_HI20 97 +#define R_LARCH_TLS_GD_HI20 98 +#define R_LARCH_32_PCREL 99 +#define R_LARCH_RELAX 100 #ifndef ELF_ARCH diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 5e8a5b7cc45c..a46f47b411a1 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -296,7 +296,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, /* The handlers for known reloc types */ static reloc_rela_handler reloc_rela_handlers[] = { - [R_LARCH_NONE ... R_LARCH_SUB64] = apply_r_larch_error, + [R_LARCH_NONE ... R_LARCH_RELAX] = apply_r_larch_error, [R_LARCH_NONE] = apply_r_larch_none, [R_LARCH_32] = apply_r_larch_32, -- Gitee From 8716fd2ca0301effdbb7f69e51a0f69d0b8df8ce Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 30 Aug 2022 18:48:05 +0800 Subject: [PATCH 081/241] LoongArch: Support PC-relative relocations in modules Binutils >= 2.40 uses R_LARCH_B26 instead of R_LARCH_SOP_PUSH_PLT_PCREL, and R_LARCH_PCALA* instead of R_LARCH_SOP_PUSH_PCREL. Handle R_LARCH_B26 and R_LARCH_PCALA* in the module loader. For R_LARCH_ B26, also create a PLT entry as needed. Change-Id: I66af604ea28e7f7498b304b6c10d4d8b4a3663af Tested-by: WANG Xuerui Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module-sections.c | 7 ++- arch/loongarch/kernel/module.c | 69 +++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 6d498288977d..e25c2ccf2665 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -56,9 +56,14 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) for (i = 0; i < num; i++) { type = ELF_R_TYPE(relas[i].r_info); - if (type == R_LARCH_SOP_PUSH_PLT_PCREL) { + switch (type) { + case R_LARCH_SOP_PUSH_PLT_PCREL: + case R_LARCH_B26: if (!duplicate_rela(relas, i)) (*plts)++; + break; + default: + break; /* Do nothing. */ } } } diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index a46f47b411a1..6c311896e8d7 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -281,6 +281,73 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, } } +static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + union loongarch_instruction *insn = (union loongarch_instruction *)location; + + if (offset >= SZ_128M) + v = module_emit_plt_entry(mod, v); + + if (offset < -SZ_128M) + v = module_emit_plt_entry(mod, v); + + offset = (void *)v - (void *)location; + + if (offset & 3) { + pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n", + mod->name, (long long)offset, type); + return -ENOEXEC; + } + + if (!signed_imm_check(offset, 28)) { + pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n", + mod->name, (long long)offset, type); + return -ENOEXEC; + } + + offset >>= 2; + insn->reg0i26_format.simmediate_l = offset & 0xffff; + insn->reg0i26_format.simmediate_h = (offset >> 16) & 0x3ff; + + return 0; +} + +static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + union loongarch_instruction *insn = (union loongarch_instruction *)location; + /* Use s32 for a sign-extension deliberately. */ + s32 offset_hi20 = (void *)((v + 0x800) & ~0xfff) - + (void *)((Elf_Addr)location & ~0xfff); + Elf_Addr anchor = (((Elf_Addr)location) & ~0xfff) + offset_hi20; + ptrdiff_t offset_rem = (void *)v - (void *)anchor; + + switch (type) { + case R_LARCH_PCALA_LO12: + insn->reg2i12_format.simmediate = v & 0xfff; + break; + case R_LARCH_PCALA_HI20: + v = offset_hi20 >> 12; + insn->reg1i20_format.simmediate = v & 0xfffff; + break; + case R_LARCH_PCALA64_LO20: + v = offset_rem >> 32; + insn->reg1i20_format.simmediate = v & 0xfffff; + break; + case R_LARCH_PCALA64_HI12: + v = offset_rem >> 52; + insn->reg2i12_format.simmediate = v & 0xfff; + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return 0; +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -310,6 +377,8 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, + [R_LARCH_B26] = apply_r_larch_b26, + [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- Gitee From 53ca2d1f1c95d259af6462604f7509bb71ab257c Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 30 Aug 2022 18:48:06 +0800 Subject: [PATCH 082/241] LoongArch: Support R_LARCH_GOT_PC_{LO12,HI20} in modules GCC >= 13 and GNU assembler >= 2.40 use these relocations to address external symbols, so we need to add them. Let the module loader emit GOT entries for data symbols so we would be able to handle GOT relocations. The GOT entry is just the data's symbol address. In module.lds, emit a stub .got section for a section header entry. The actual content of the section entry will be filled at runtime by module_ frob_arch_sections(). Change-Id: I31f9c7d25d6ed330e9bed3351942c864664bcbe7 Tested-by: WANG Xuerui Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/module.h | 27 ++++++++++++- arch/loongarch/include/asm/module.lds.h | 1 + arch/loongarch/kernel/module-sections.c | 54 ++++++++++++++++++++++--- arch/loongarch/kernel/module.c | 24 +++++++++++ 4 files changed, 99 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index c54d93ea2c27..bab3d67ad3df 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -17,10 +17,15 @@ struct mod_section { }; struct mod_arch_specific { + struct mod_section got; struct mod_section plt; struct mod_section plt_idx; }; +struct got_entry { + Elf_Addr symbol_addr; +}; + struct plt_entry { u32 inst_lu12iw; u32 inst_lu32id; @@ -29,10 +34,16 @@ struct plt_entry { }; struct plt_idx_entry { - unsigned long symbol_addr; + Elf_Addr symbol_addr; }; -Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val); +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val); +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val); + +static inline struct got_entry emit_got_entry(Elf_Addr val) +{ + return (struct got_entry) { val }; +} static inline struct plt_entry emit_plt_entry(unsigned long val) { @@ -77,4 +88,16 @@ static inline struct plt_entry *get_plt_entry(unsigned long val, return plt + plt_idx; } +static inline struct got_entry *get_got_entry(Elf_Addr val, + const struct mod_section *sec) +{ + struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; + int i; + + for (i = 0; i < sec->num_entries; i++) + if (got[i].symbol_addr == val) + return &got[i]; + return NULL; +} + #endif /* _ASM_MODULE_H */ diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index 31c1c0db11a3..a3d1bc0fcc72 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -2,6 +2,7 @@ /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ SECTIONS { . = ALIGN(4); + .got : { BYTE(0) } .plt : { BYTE(0) } .plt.idx : { BYTE(0) } } diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index e25c2ccf2665..d296a70b758f 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -7,7 +7,33 @@ #include #include -Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val) +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) +{ + struct mod_section *got_sec = &mod->arch.got; + int i = got_sec->num_entries; + struct got_entry *got = get_got_entry(val, got_sec); + + if (got) + return (Elf_Addr)got; + + /* There is no GOT entry for val yet, create a new one. */ + got = (struct got_entry *)got_sec->shdr->sh_addr; + got[i] = emit_got_entry(val); + + got_sec->num_entries++; + if (got_sec->num_entries > got_sec->max_entries) { + /* + * This may happen when the module contains a GOT_HI20 without + * a paired GOT_LO12. Such a module is broken, reject it. + */ + pr_err("%s: module contains bad GOT relocation\n", mod->name); + return 0; + } + + return (Elf_Addr)&got[i]; +} + +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) { int nr; struct mod_section *plt_sec = &mod->arch.plt; @@ -50,7 +76,8 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx) return false; } -static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) +static void count_max_entries(Elf_Rela *relas, int num, + unsigned int *plts, unsigned int *gots) { unsigned int i, type; @@ -62,6 +89,10 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) if (!duplicate_rela(relas, i)) (*plts)++; break; + case R_LARCH_GOT_PC_HI20: + if (!duplicate_rela(relas, i)) + (*gots)++; + break; default: break; /* Do nothing. */ } @@ -71,18 +102,24 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts) int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned int i, num_plts = 0; + unsigned int i, num_plts = 0, num_gots = 0; /* * Find the empty .plt sections. */ for (i = 0; i < ehdr->e_shnum; i++) { - if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) + mod->arch.got.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) mod->arch.plt.shdr = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) mod->arch.plt_idx.shdr = sechdrs + i; } + if (!mod->arch.got.shdr) { + pr_err("%s: module GOT section(s) missing\n", mod->name); + return -ENOEXEC; + } if (!mod->arch.plt.shdr) { pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; @@ -105,9 +142,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dst_sec->sh_flags & SHF_EXECINSTR)) continue; - count_max_entries(relas, num_rela, &num_plts); + count_max_entries(relas, num_rela, &num_plts, &num_gots); } + mod->arch.got.shdr->sh_type = SHT_NOBITS; + mod->arch.got.shdr->sh_flags = SHF_ALLOC; + mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + mod->arch.got.num_entries = 0; + mod->arch.got.max_entries = num_gots; + mod->arch.plt.shdr->sh_type = SHT_NOBITS; mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 6c311896e8d7..ddb1cd5ff9ac 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -348,6 +348,29 @@ static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, return 0; } +static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + Elf_Addr got = module_emit_got_entry(mod, v); + + if (!got) + return -EINVAL; + + switch (type) { + case R_LARCH_GOT_PC_LO12: + type = R_LARCH_PCALA_LO12; + break; + case R_LARCH_GOT_PC_HI20: + type = R_LARCH_PCALA_HI20; + break; + default: + pr_err("%s: Unsupport relocation type %u\n", mod->name, type); + return -EINVAL; + } + + return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type); +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -379,6 +402,7 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, [R_LARCH_B26] = apply_r_larch_b26, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, + [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- Gitee From 4ac3112487fb3f75da3cfa58826bcf71336b1ac6 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 4 Oct 2022 22:02:30 +0800 Subject: [PATCH 083/241] LoongArch: mm: Refactor TLB exception handlers This patch simplifies TLB load, store and modify exception handlers: 1. Reduce instructions, such as alu/csr and memory access; 2. Execute tlb search instruction only in the fast path; 3. Return directly from the fast path for both normal and huge pages; 4. Re-tab the assembly for better vertical alignment. And fixes the concurrent modification issue of fast path for huge pages. This issue will occur in the following steps: CPU-1 (In TLB exception) CPU-2 (In THP splitting) 1: Load PMD entry (HUGE=1) 2: Goto huge path 3: Store PMD entry (HUGE=0) 4: Reload PMD entry (HUGE=0) 5: Fill TLB entry (PA is incorrect) This patch also slightly improves the TLB processing performance: * Normal pages: 2.15%, Huge pages: 1.70%. #include #include #include #include int main(int argc, char *argv[]) { size_t page_size; size_t mem_size; size_t off; void *base; int flags; int i; if (argc < 2) { fprintf(stderr, "%s MEM_SIZE [HUGE]\n", argv[0]); return -1; } page_size = sysconf(_SC_PAGESIZE); flags = MAP_PRIVATE | MAP_ANONYMOUS; mem_size = strtoul(argv[1], NULL, 10); if (argc > 2) flags |= MAP_HUGETLB; for (i = 0; i < 10; i++) { base = mmap(NULL, mem_size, PROT_READ, flags, -1, 0); if (base == MAP_FAILED) { fprintf(stderr, "Map memory failed!\n"); return -1; } for (off = 0; off < mem_size; off += page_size) *(volatile int *)(base + off); munmap(base, mem_size); } return 0; } Change-Id: I9f490d004d74b88e1750b27df83c60dbc92a4a1d Signed-off-by: Rui Wang Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlbex.S | 577 ++++++++++++++++++-------------------- 1 file changed, 267 insertions(+), 310 deletions(-) diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index c8a6e0595593..293b62462ac4 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -14,15 +14,20 @@ #include #endif +#define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) +#define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) +#define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) +#define PTRS_PER_PTE_BITS (PAGE_SHIFT - 3) + .macro tlb_do_page_fault, write SYM_FUNC_START(tlb_do_page_fault_\write) SAVE_ALL - csrrd a2, LOONGARCH_CSR_BADV - move a0, sp - REG_S a2, sp, PT_BVADDR - li.w a1, \write - la.abs t0, do_page_fault - jirl ra, t0, 0 + csrrd a2, LOONGARCH_CSR_BADV + move a0, sp + REG_S a2, sp, PT_BVADDR + li.w a1, \write + la.abs t0, do_page_fault + jirl ra, t0, 0 RESTORE_ALL_AND_RET SYM_FUNC_END(tlb_do_page_fault_\write) .endm @@ -33,133 +38,115 @@ SYM_FUNC_START(handle_tlb_protect) BACKUP_T0T1 SAVE_ALL - move a0, sp - move a1, zero - csrrd a2, LOONGARCH_CSR_BADV - REG_S a2, sp, PT_BVADDR - la.abs t0, do_page_fault - jirl ra, t0, 0 + move a0, sp + move a1, zero + csrrd a2, LOONGARCH_CSR_BADV + REG_S a2, sp, PT_BVADDR + la.abs t0, do_page_fault + jirl ra, t0, 0 RESTORE_ALL_AND_RET SYM_FUNC_END(handle_tlb_protect) SYM_FUNC_START(handle_tlb_load) - csrwr t0, EXCEPTION_KS0 - csrwr t1, EXCEPTION_KS1 - csrwr ra, EXCEPTION_KS2 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 /* * The vmalloc handling is not in the hotpath. */ - csrrd t0, LOONGARCH_CSR_BADV - blt t0, $r0, vmalloc_load - csrrd t1, LOONGARCH_CSR_PGDL + csrrd t0, LOONGARCH_CSR_BADV + bltz t0, vmalloc_load + csrrd t1, LOONGARCH_CSR_PGDL vmalloc_done_load: /* Get PGD offset in bytes */ - srli.d t0, t0, PGDIR_SHIFT - andi t0, t0, (PTRS_PER_PGD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + bstrpick.d ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT + alsl.d t1, ra, t1, 3 #if CONFIG_PGTABLE_LEVELS > 3 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PUD_SHIFT - andi t0, t0, (PTRS_PER_PUD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT + alsl.d t1, ra, t1, 3 #endif #if CONFIG_PGTABLE_LEVELS > 2 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PMD_SHIFT - andi t0, t0, (PTRS_PER_PMD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT + alsl.d t1, ra, t1, 3 #endif - ld.d ra, t1, 0 + ld.d ra, t1, 0 /* * For huge tlb entries, pmde doesn't contain an address but * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - andi t0, ra, _PAGE_HUGE - bne t0, $r0, tlb_huge_update_load + rotri.d ra, ra, _PAGE_HUGE_SHIFT + 1 + bltz ra, tlb_huge_update_load - csrrd t0, LOONGARCH_CSR_BADV - srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) - andi t0, t0, (PTRS_PER_PTE - 1) - slli.d t0, t0, _PTE_T_LOG2 - add.d t1, ra, t0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + bstrpick.d t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT + alsl.d t1, t0, ra, _PTE_T_LOG2 #ifdef CONFIG_SMP smp_pgtable_change_load: -#endif -#ifdef CONFIG_SMP - ll.d t0, t1, 0 + ll.d t0, t1, 0 #else - ld.d t0, t1, 0 + ld.d t0, t1, 0 #endif - tlbsrch + andi ra, t0, _PAGE_PRESENT + beqz ra, nopage_tlb_load - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, 1 - beq ra, $r0, nopage_tlb_load - - ori t0, t0, _PAGE_VALID + ori t0, t0, _PAGE_VALID #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beq t0, $r0, smp_pgtable_change_load + sc.d t0, t1, 0 + beqz t0, smp_pgtable_change_load #else - st.d t0, t1, 0 + st.d t0, t1, 0 #endif - ori t1, t1, 8 - xori t1, t1, 8 - ld.d t0, t1, 0 - ld.d t1, t1, 8 - csrwr t0, LOONGARCH_CSR_TLBELO0 - csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbsrch + bstrins.d t1, zero, 3, 3 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 tlbwr -leave_load: - csrrd t0, EXCEPTION_KS0 - csrrd t1, EXCEPTION_KS1 - csrrd ra, EXCEPTION_KS2 + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 ertn + #ifdef CONFIG_64BIT vmalloc_load: - la.abs t1, swapper_pg_dir - b vmalloc_done_load + la.abs t1, swapper_pg_dir + b vmalloc_done_load #endif - /* - * This is the entry point when build_tlbchange_handler_head - * spots a huge page. - */ + /* This is the entry point of a huge page. */ tlb_huge_update_load: #ifdef CONFIG_SMP - ll.d t0, t1, 0 -#else - ld.d t0, t1, 0 + ll.d ra, t1, 0 #endif - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, 1 - beq ra, $r0, nopage_tlb_load - tlbsrch + andi t0, ra, _PAGE_PRESENT + beqz t0, nopage_tlb_load - ori t0, t0, _PAGE_VALID #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beq t0, $r0, tlb_huge_update_load - ld.d t0, t1, 0 + ori t0, ra, _PAGE_VALID + sc.d t0, t1, 0 + beqz t0, tlb_huge_update_load + ori t0, ra, _PAGE_VALID #else - st.d t0, t1, 0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + ori t0, ra, _PAGE_VALID + st.d t0, t1, 0 #endif - addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX + tlbsrch + addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) + addi.d ra, t1, 0 + csrxchg ra, t1, LOONGARCH_CSR_TLBIDX tlbwr - csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX + csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the @@ -171,163 +158,146 @@ tlb_huge_update_load: * address space. */ /* Huge page: Move Global bit */ - xori t0, t0, _PAGE_HUGE - lu12i.w t1, _PAGE_HGLOBAL >> 12 - and t1, t0, t1 - srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) - or t0, t0, t1 + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 - addi.d ra, t0, 0 - csrwr t0, LOONGARCH_CSR_TLBELO0 - addi.d t0, ra, 0 + move ra, t0 + csrwr ra, LOONGARCH_CSR_TLBELO0 /* Convert to entrylo1 */ - addi.d t1, $r0, 1 - slli.d t1, t1, (HPAGE_SHIFT - 1) - add.d t0, t0, t1 - csrwr t0, LOONGARCH_CSR_TLBELO1 + addi.d t1, zero, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 /* Set huge page tlb entry size */ - addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) - addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX tlbfill - addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) - addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn + nopage_tlb_load: - dbar 0 - csrrd ra, EXCEPTION_KS2 - la.abs t0, tlb_do_page_fault_0 - jirl $r0, t0, 0 + dbar 0 + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_0 + jr t0 SYM_FUNC_END(handle_tlb_load) SYM_FUNC_START(handle_tlb_store) - csrwr t0, EXCEPTION_KS0 - csrwr t1, EXCEPTION_KS1 - csrwr ra, EXCEPTION_KS2 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 /* * The vmalloc handling is not in the hotpath. */ - csrrd t0, LOONGARCH_CSR_BADV - blt t0, $r0, vmalloc_store - csrrd t1, LOONGARCH_CSR_PGDL + csrrd t0, LOONGARCH_CSR_BADV + bltz t0, vmalloc_store + csrrd t1, LOONGARCH_CSR_PGDL vmalloc_done_store: /* Get PGD offset in bytes */ - srli.d t0, t0, PGDIR_SHIFT - andi t0, t0, (PTRS_PER_PGD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 - + bstrpick.d ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT + alsl.d t1, ra, t1, 3 #if CONFIG_PGTABLE_LEVELS > 3 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PUD_SHIFT - andi t0, t0, (PTRS_PER_PUD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT + alsl.d t1, ra, t1, 3 #endif #if CONFIG_PGTABLE_LEVELS > 2 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PMD_SHIFT - andi t0, t0, (PTRS_PER_PMD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT + alsl.d t1, ra, t1, 3 #endif - ld.d ra, t1, 0 + ld.d ra, t1, 0 /* * For huge tlb entries, pmde doesn't contain an address but * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - andi t0, ra, _PAGE_HUGE - bne t0, $r0, tlb_huge_update_store + rotri.d ra, ra, _PAGE_HUGE_SHIFT + 1 + bltz ra, tlb_huge_update_store - csrrd t0, LOONGARCH_CSR_BADV - srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) - andi t0, t0, (PTRS_PER_PTE - 1) - slli.d t0, t0, _PTE_T_LOG2 - add.d t1, ra, t0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + bstrpick.d t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT + alsl.d t1, t0, ra, _PTE_T_LOG2 #ifdef CONFIG_SMP smp_pgtable_change_store: -#endif -#ifdef CONFIG_SMP - ll.d t0, t1, 0 + ll.d t0, t1, 0 #else - ld.d t0, t1, 0 + ld.d t0, t1, 0 #endif - tlbsrch + andi ra, t0, _PAGE_PRESENT | _PAGE_WRITE + xori ra, ra, _PAGE_PRESENT | _PAGE_WRITE + bnez ra, nopage_tlb_store - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - bne ra, $r0, nopage_tlb_store - - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beq t0, $r0, smp_pgtable_change_store + sc.d t0, t1, 0 + beqz t0, smp_pgtable_change_store #else - st.d t0, t1, 0 + st.d t0, t1, 0 #endif - - ori t1, t1, 8 - xori t1, t1, 8 - ld.d t0, t1, 0 - ld.d t1, t1, 8 - csrwr t0, LOONGARCH_CSR_TLBELO0 - csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbsrch + bstrins.d t1, zero, 3, 3 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 tlbwr -leave_store: - csrrd t0, EXCEPTION_KS0 - csrrd t1, EXCEPTION_KS1 - csrrd ra, EXCEPTION_KS2 + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 ertn + #ifdef CONFIG_64BIT vmalloc_store: - la.abs t1, swapper_pg_dir - b vmalloc_done_store + la.abs t1, swapper_pg_dir + b vmalloc_done_store #endif - /* - * This is the entry point when build_tlbchange_handler_head - * spots a huge page. - */ + /* This is the entry point of a huge page. */ tlb_huge_update_store: #ifdef CONFIG_SMP - ll.d t0, t1, 0 -#else - ld.d t0, t1, 0 + ll.d ra, t1, 0 #endif - srli.d ra, t0, _PAGE_PRESENT_SHIFT - andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT) - bne ra, $r0, nopage_tlb_store - - tlbsrch - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + andi t0, ra, _PAGE_PRESENT | _PAGE_WRITE + xori t0, t0, _PAGE_PRESENT | _PAGE_WRITE + bnez t0, nopage_tlb_store #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beq t0, $r0, tlb_huge_update_store - ld.d t0, t1, 0 + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + sc.d t0, t1, 0 + beqz t0, tlb_huge_update_store + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #else - st.d t0, t1, 0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 #endif - addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX + tlbsrch + addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) + addi.d ra, t1, 0 + csrxchg ra, t1, LOONGARCH_CSR_TLBIDX tlbwr - csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX + csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -338,154 +308,137 @@ tlb_huge_update_store: * address space. */ /* Huge page: Move Global bit */ - xori t0, t0, _PAGE_HUGE - lu12i.w t1, _PAGE_HGLOBAL >> 12 - and t1, t0, t1 - srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) - or t0, t0, t1 + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 - addi.d ra, t0, 0 - csrwr t0, LOONGARCH_CSR_TLBELO0 - addi.d t0, ra, 0 + move ra, t0 + csrwr ra, LOONGARCH_CSR_TLBELO0 /* Convert to entrylo1 */ - addi.d t1, $r0, 1 - slli.d t1, t1, (HPAGE_SHIFT - 1) - add.d t0, t0, t1 - csrwr t0, LOONGARCH_CSR_TLBELO1 + addi.d t1, zero, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 /* Set huge page tlb entry size */ - addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) - addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX tlbfill /* Reset default page size */ - addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) - addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn + nopage_tlb_store: - dbar 0 - csrrd ra, EXCEPTION_KS2 - la.abs t0, tlb_do_page_fault_1 - jirl $r0, t0, 0 + dbar 0 + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jr t0 SYM_FUNC_END(handle_tlb_store) SYM_FUNC_START(handle_tlb_modify) - csrwr t0, EXCEPTION_KS0 - csrwr t1, EXCEPTION_KS1 - csrwr ra, EXCEPTION_KS2 + csrwr t0, EXCEPTION_KS0 + csrwr t1, EXCEPTION_KS1 + csrwr ra, EXCEPTION_KS2 /* * The vmalloc handling is not in the hotpath. */ - csrrd t0, LOONGARCH_CSR_BADV - blt t0, $r0, vmalloc_modify - csrrd t1, LOONGARCH_CSR_PGDL + csrrd t0, LOONGARCH_CSR_BADV + bltz t0, vmalloc_modify + csrrd t1, LOONGARCH_CSR_PGDL vmalloc_done_modify: /* Get PGD offset in bytes */ - srli.d t0, t0, PGDIR_SHIFT - andi t0, t0, (PTRS_PER_PGD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + bstrpick.d ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT + alsl.d t1, ra, t1, 3 #if CONFIG_PGTABLE_LEVELS > 3 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PUD_SHIFT - andi t0, t0, (PTRS_PER_PUD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT + alsl.d t1, ra, t1, 3 #endif #if CONFIG_PGTABLE_LEVELS > 2 - csrrd t0, LOONGARCH_CSR_BADV - ld.d t1, t1, 0 - srli.d t0, t0, PMD_SHIFT - andi t0, t0, (PTRS_PER_PMD - 1) - slli.d t0, t0, 3 - add.d t1, t1, t0 + ld.d t1, t1, 0 + bstrpick.d ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT + alsl.d t1, ra, t1, 3 #endif - ld.d ra, t1, 0 + ld.d ra, t1, 0 /* * For huge tlb entries, pmde doesn't contain an address but * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - andi t0, ra, _PAGE_HUGE - bne t0, $r0, tlb_huge_update_modify + rotri.d ra, ra, _PAGE_HUGE_SHIFT + 1 + bltz ra, tlb_huge_update_modify - csrrd t0, LOONGARCH_CSR_BADV - srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER) - andi t0, t0, (PTRS_PER_PTE - 1) - slli.d t0, t0, _PTE_T_LOG2 - add.d t1, ra, t0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + bstrpick.d t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT + alsl.d t1, t0, ra, _PTE_T_LOG2 #ifdef CONFIG_SMP smp_pgtable_change_modify: -#endif -#ifdef CONFIG_SMP - ll.d t0, t1, 0 + ll.d t0, t1, 0 #else - ld.d t0, t1, 0 + ld.d t0, t1, 0 #endif - tlbsrch - - srli.d ra, t0, _PAGE_WRITE_SHIFT - andi ra, ra, 1 - beq ra, $r0, nopage_tlb_modify + andi ra, t0, _PAGE_WRITE + beqz ra, nopage_tlb_modify - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beq t0, $r0, smp_pgtable_change_modify + sc.d t0, t1, 0 + beqz t0, smp_pgtable_change_modify #else - st.d t0, t1, 0 + st.d t0, t1, 0 #endif - ori t1, t1, 8 - xori t1, t1, 8 - ld.d t0, t1, 0 - ld.d t1, t1, 8 - csrwr t0, LOONGARCH_CSR_TLBELO0 - csrwr t1, LOONGARCH_CSR_TLBELO1 + tlbsrch + bstrins.d t1, zero, 3, 3 + ld.d t0, t1, 0 + ld.d t1, t1, 8 + csrwr t0, LOONGARCH_CSR_TLBELO0 + csrwr t1, LOONGARCH_CSR_TLBELO1 tlbwr -leave_modify: - csrrd t0, EXCEPTION_KS0 - csrrd t1, EXCEPTION_KS1 - csrrd ra, EXCEPTION_KS2 + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 ertn + #ifdef CONFIG_64BIT vmalloc_modify: - la.abs t1, swapper_pg_dir - b vmalloc_done_modify + la.abs t1, swapper_pg_dir + b vmalloc_done_modify #endif - /* - * This is the entry point when - * build_tlbchange_handler_head spots a huge page. - */ + /* This is the entry point of a huge page. */ tlb_huge_update_modify: #ifdef CONFIG_SMP - ll.d t0, t1, 0 -#else - ld.d t0, t1, 0 + ll.d ra, t1, 0 #endif - - srli.d ra, t0, _PAGE_WRITE_SHIFT - andi ra, ra, 1 - beq ra, $r0, nopage_tlb_modify - - tlbsrch - ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + andi t0, ra, _PAGE_WRITE + beqz t0, nopage_tlb_modify #ifdef CONFIG_SMP - sc.d t0, t1, 0 - beq t0, $r0, tlb_huge_update_modify - ld.d t0, t1, 0 + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + sc.d t0, t1, 0 + beqz t0, tlb_huge_update_modify + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) #else - st.d t0, t1, 0 + rotri.d ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1) + ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) + st.d t0, t1, 0 #endif /* * A huge PTE describes an area the size of the @@ -497,54 +450,58 @@ tlb_huge_update_modify: * address space. */ /* Huge page: Move Global bit */ - xori t0, t0, _PAGE_HUGE - lu12i.w t1, _PAGE_HGLOBAL >> 12 - and t1, t0, t1 - srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) - or t0, t0, t1 + xori t0, t0, _PAGE_HUGE + lu12i.w t1, _PAGE_HGLOBAL >> 12 + and t1, t0, t1 + srli.d t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT) + or t0, t0, t1 - addi.d ra, t0, 0 - csrwr t0, LOONGARCH_CSR_TLBELO0 - addi.d t0, ra, 0 + move ra, t0 + csrwr ra, LOONGARCH_CSR_TLBELO0 /* Convert to entrylo1 */ - addi.d t1, $r0, 1 - slli.d t1, t1, (HPAGE_SHIFT - 1) - add.d t0, t0, t1 - csrwr t0, LOONGARCH_CSR_TLBELO1 + addi.d t1, zero, 1 + slli.d t1, t1, (HPAGE_SHIFT - 1) + add.d t0, t0, t1 + csrwr t0, LOONGARCH_CSR_TLBELO1 /* Set huge page tlb entry size */ - addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) - addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) - csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX tlbwr /* Reset default page size */ - addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16) - addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) - csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) + addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) + csrxchg t1, t0, LOONGARCH_CSR_TLBIDX + + csrrd t0, EXCEPTION_KS0 + csrrd t1, EXCEPTION_KS1 + csrrd ra, EXCEPTION_KS2 + ertn nopage_tlb_modify: - dbar 0 - csrrd ra, EXCEPTION_KS2 - la.abs t0, tlb_do_page_fault_1 - jirl $r0, t0, 0 + dbar 0 + csrrd ra, EXCEPTION_KS2 + la.abs t0, tlb_do_page_fault_1 + jr t0 SYM_FUNC_END(handle_tlb_modify) SYM_FUNC_START(handle_tlb_refill) - csrwr t0, LOONGARCH_CSR_TLBRSAVE - csrrd t0, LOONGARCH_CSR_PGD - lddir t0, t0, 3 + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 #if CONFIG_PGTABLE_LEVELS > 3 - lddir t0, t0, 2 + lddir t0, t0, 2 #endif #if CONFIG_PGTABLE_LEVELS > 2 - lddir t0, t0, 1 + lddir t0, t0, 1 #endif - ldpte t0, 0 - ldpte t0, 1 + ldpte t0, 0 + ldpte t0, 1 tlbfill - csrrd t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_TLBRSAVE ertn SYM_FUNC_END(handle_tlb_refill) -- Gitee From 7fa257134b4bf53eb4a098e071bb279f5f6fca1a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Sep 2022 14:44:36 +0800 Subject: [PATCH 084/241] LoongArch: Refactor cache probe and flush methods Current cache probe and flush methods have some drawbacks: 1, Assume there are 3 cache levels and only 3 levels; 2, Assume L1 = I + D, L2 = V, L3 = S, V is exclusive, S is inclusive. However, the fact is I + D, I + D + V, I + D + S and I + D + V + S are all valid. So, refactor the cache probe and flush methods to adapt more types of cache hierarchy. Change-Id: I77850e7a6a843b4aaed2ded7581bcfb8dc9f2937 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cacheflush.h | 87 +++++---- arch/loongarch/include/asm/cacheops.h | 36 ++-- arch/loongarch/include/asm/cpu-features.h | 5 - arch/loongarch/include/asm/cpu-info.h | 21 ++- arch/loongarch/include/asm/loongarchregs.h | 33 +--- arch/loongarch/include/asm/setup.h | 2 + arch/loongarch/kernel/cacheinfo.c | 97 +++------- arch/loongarch/kernel/traps.c | 3 - arch/loongarch/mm/cache.c | 210 +++++++++++---------- arch/loongarch/pci/pci.c | 7 +- 10 files changed, 234 insertions(+), 267 deletions(-) diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h index 053cedd08d4f..27f9078b6a5b 100644 --- a/arch/loongarch/include/asm/cacheflush.h +++ b/arch/loongarch/include/asm/cacheflush.h @@ -6,10 +6,33 @@ #define _ASM_CACHEFLUSH_H #include -#include +#include #include -extern void local_flush_icache_range(unsigned long start, unsigned long end); +static inline bool cache_present(struct cache_desc *cdesc) +{ + return cdesc->flags & CACHE_PRESENT; +} + +static inline bool cache_private(struct cache_desc *cdesc) +{ + return cdesc->flags & CACHE_PRIVATE; +} + +static inline bool cache_inclusive(struct cache_desc *cdesc) +{ + return cdesc->flags & CACHE_INCLUSIVE; +} + +static inline unsigned int cpu_last_level_cache_line_size(void) +{ + int cache_present = boot_cpu_data.cache_leaves_present; + + return boot_cpu_data.cache_leaves[cache_present - 1].linesz; +} + +asmlinkage void __flush_cache_all(void); +void local_flush_icache_range(unsigned long start, unsigned long end); #define flush_icache_range local_flush_icache_range #define flush_icache_user_range local_flush_icache_range @@ -35,44 +58,30 @@ extern void local_flush_icache_range(unsigned long start, unsigned long end); : \ : "i" (op), "ZC" (*(unsigned char *)(addr))) -static inline void flush_icache_line_indexed(unsigned long addr) -{ - cache_op(Index_Invalidate_I, addr); -} - -static inline void flush_dcache_line_indexed(unsigned long addr) -{ - cache_op(Index_Writeback_Inv_D, addr); -} - -static inline void flush_vcache_line_indexed(unsigned long addr) -{ - cache_op(Index_Writeback_Inv_V, addr); -} - -static inline void flush_scache_line_indexed(unsigned long addr) -{ - cache_op(Index_Writeback_Inv_S, addr); -} - -static inline void flush_icache_line(unsigned long addr) -{ - cache_op(Hit_Invalidate_I, addr); -} - -static inline void flush_dcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_D, addr); -} - -static inline void flush_vcache_line(unsigned long addr) -{ - cache_op(Hit_Writeback_Inv_V, addr); -} - -static inline void flush_scache_line(unsigned long addr) +static inline void flush_cache_line(int leaf, unsigned long addr) { - cache_op(Hit_Writeback_Inv_S, addr); + switch (leaf) { + case Cache_LEAF0: + cache_op(Index_Writeback_Inv_LEAF0, addr); + break; + case Cache_LEAF1: + cache_op(Index_Writeback_Inv_LEAF1, addr); + break; + case Cache_LEAF2: + cache_op(Index_Writeback_Inv_LEAF2, addr); + break; + case Cache_LEAF3: + cache_op(Index_Writeback_Inv_LEAF3, addr); + break; + case Cache_LEAF4: + cache_op(Index_Writeback_Inv_LEAF4, addr); + break; + case Cache_LEAF5: + cache_op(Index_Writeback_Inv_LEAF5, addr); + break; + default: + break; + } } #include diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h index 8d7649a918fe..51ff636b8f8e 100644 --- a/arch/loongarch/include/asm/cacheops.h +++ b/arch/loongarch/include/asm/cacheops.h @@ -8,16 +8,18 @@ #define __ASM_CACHEOPS_H /* - * Most cache ops are split into a 2 bit field identifying the cache, and a 3 + * Most cache ops are split into a 3 bit field identifying the cache, and a 2 * bit field identifying the cache operation. */ -#define CacheOp_Cache 0x03 -#define CacheOp_Op 0x1c +#define CacheOp_Cache 0x07 +#define CacheOp_Op 0x18 -#define Cache_I 0x00 -#define Cache_D 0x01 -#define Cache_V 0x02 -#define Cache_S 0x03 +#define Cache_LEAF0 0x00 +#define Cache_LEAF1 0x01 +#define Cache_LEAF2 0x02 +#define Cache_LEAF3 0x03 +#define Cache_LEAF4 0x04 +#define Cache_LEAF5 0x05 #define Index_Invalidate 0x08 #define Index_Writeback_Inv 0x08 @@ -25,13 +27,17 @@ #define Hit_Writeback_Inv 0x10 #define CacheOp_User_Defined 0x18 -#define Index_Invalidate_I (Cache_I | Index_Invalidate) -#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv) -#define Index_Writeback_Inv_V (Cache_V | Index_Writeback_Inv) -#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv) -#define Hit_Invalidate_I (Cache_I | Hit_Invalidate) -#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_V (Cache_V | Hit_Writeback_Inv) -#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv) +#define Index_Writeback_Inv_LEAF0 (Cache_LEAF0 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF1 (Cache_LEAF1 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF2 (Cache_LEAF2 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF3 (Cache_LEAF3 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF4 (Cache_LEAF4 | Index_Writeback_Inv) +#define Index_Writeback_Inv_LEAF5 (Cache_LEAF5 | Index_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF0 (Cache_LEAF0 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF1 (Cache_LEAF1 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF2 (Cache_LEAF2 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF3 (Cache_LEAF3 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF4 (Cache_LEAF4 | Hit_Writeback_Inv) +#define Hit_Writeback_Inv_LEAF5 (Cache_LEAF5 | Hit_Writeback_Inv) #endif /* __ASM_CACHEOPS_H */ diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index dca5c43851f2..866ca1a01d81 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -15,11 +15,6 @@ #define cpu_has_loongarch32 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT) #define cpu_has_loongarch64 (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) -#define cpu_icache_line_size() cpu_data[0].icache.linesz -#define cpu_dcache_line_size() cpu_data[0].dcache.linesz -#define cpu_vcache_line_size() cpu_data[0].vcache.linesz -#define cpu_scache_line_size() cpu_data[0].scache.linesz - #ifdef CONFIG_32BIT # define cpu_has_64bits (cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT) # define cpu_vabits 31 diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index 6a025fa1ad66..d3bfed781c4f 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -10,18 +10,28 @@ #include +/* cache_desc->flags */ +enum { + CACHE_PRESENT = (1 << 0), + CACHE_PRIVATE = (1 << 1), /* core private cache */ + CACHE_INCLUSIVE = (1 << 2), /* include the inner level caches */ +}; + /* * Descriptor for a cache */ struct cache_desc { - unsigned int waysize; /* Bytes per way */ + unsigned char type; + unsigned char level; unsigned short sets; /* Number of lines per set */ unsigned char ways; /* Number of ways */ unsigned char linesz; /* Size of line in bytes */ - unsigned char waybit; /* Bits to select in a cache set */ unsigned char flags; /* Flags describing cache properties */ }; +#define CACHE_LEVEL_MAX 3 +#define CACHE_LEAVES_MAX 6 + struct cpuinfo_loongarch { u64 asid_cache; unsigned long asid_mask; @@ -41,11 +51,8 @@ struct cpuinfo_loongarch { int tlbsizemtlb; int tlbsizestlbsets; int tlbsizestlbways; - struct cache_desc icache; /* Primary I-cache */ - struct cache_desc dcache; /* Primary D or combined I/D cache */ - struct cache_desc vcache; /* Victim cache, between pcache and scache */ - struct cache_desc scache; /* Secondary cache */ - struct cache_desc tcache; /* Tertiary/split secondary cache */ + int cache_leaves_present; /* number of cache_leaves[] elements */ + struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ int vabits; /* Virtual Address size in bits */ diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 1d93f34be853..0ab158785a09 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -187,36 +187,15 @@ static inline u32 read_cpucfg(u32 reg) #define CPUCFG16_L3_DINCL BIT(16) #define LOONGARCH_CPUCFG17 0x11 -#define CPUCFG17_L1I_WAYS_M GENMASK(15, 0) -#define CPUCFG17_L1I_SETS_M GENMASK(23, 16) -#define CPUCFG17_L1I_SIZE_M GENMASK(30, 24) -#define CPUCFG17_L1I_WAYS 0 -#define CPUCFG17_L1I_SETS 16 -#define CPUCFG17_L1I_SIZE 24 - #define LOONGARCH_CPUCFG18 0x12 -#define CPUCFG18_L1D_WAYS_M GENMASK(15, 0) -#define CPUCFG18_L1D_SETS_M GENMASK(23, 16) -#define CPUCFG18_L1D_SIZE_M GENMASK(30, 24) -#define CPUCFG18_L1D_WAYS 0 -#define CPUCFG18_L1D_SETS 16 -#define CPUCFG18_L1D_SIZE 24 - #define LOONGARCH_CPUCFG19 0x13 -#define CPUCFG19_L2_WAYS_M GENMASK(15, 0) -#define CPUCFG19_L2_SETS_M GENMASK(23, 16) -#define CPUCFG19_L2_SIZE_M GENMASK(30, 24) -#define CPUCFG19_L2_WAYS 0 -#define CPUCFG19_L2_SETS 16 -#define CPUCFG19_L2_SIZE 24 - #define LOONGARCH_CPUCFG20 0x14 -#define CPUCFG20_L3_WAYS_M GENMASK(15, 0) -#define CPUCFG20_L3_SETS_M GENMASK(23, 16) -#define CPUCFG20_L3_SIZE_M GENMASK(30, 24) -#define CPUCFG20_L3_WAYS 0 -#define CPUCFG20_L3_SETS 16 -#define CPUCFG20_L3_SIZE 24 +#define CPUCFG_CACHE_WAYS_M GENMASK(15, 0) +#define CPUCFG_CACHE_SETS_M GENMASK(23, 16) +#define CPUCFG_CACHE_LSIZE_M GENMASK(30, 24) +#define CPUCFG_CACHE_WAYS 0 +#define CPUCFG_CACHE_SETS 16 +#define CPUCFG_CACHE_LSIZE 24 #define LOONGARCH_CPUCFG48 0x30 #define CPUCFG48_MCSR_LCK BIT(0) diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index ebd9d210f287..6a628c3bce32 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -13,7 +13,9 @@ extern unsigned long eentry; extern unsigned long tlbrentry; +extern void tlb_init(int cpu); extern void cpu_cache_init(void); +extern void cache_error_setup(void); extern void per_cpu_trap_init(int cpu); extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len); diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c index a23663ed75f7..882cb1944a6b 100644 --- a/arch/loongarch/kernel/cacheinfo.c +++ b/arch/loongarch/kernel/cacheinfo.c @@ -8,70 +8,30 @@ #include #include -/* Populates leaf and increments to next leaf */ -#define populate_cache(cache, leaf, c_level, c_type) \ -do { \ - leaf->type = c_type; \ - leaf->level = c_level; \ - leaf->coherency_line_size = c->cache.linesz; \ - leaf->number_of_sets = c->cache.sets; \ - leaf->ways_of_associativity = c->cache.ways; \ - leaf->size = c->cache.linesz * c->cache.sets * \ - c->cache.ways; \ - if (leaf->level > 2) \ - leaf->size *= nodes_per_package; \ - leaf++; \ -} while (0) - int init_cache_level(unsigned int cpu) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; + int cache_present = current_cpu_data.cache_leaves_present; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - int levels = 0, leaves = 0; - - /* - * If Dcache is not set, we assume the cache structures - * are not properly initialized. - */ - if (c->dcache.waysize) - levels += 1; - else - return -ENOENT; - - - leaves += (c->icache.waysize) ? 2 : 1; - - if (c->vcache.waysize) { - levels++; - leaves++; - } - if (c->scache.waysize) { - levels++; - leaves++; - } + this_cpu_ci->num_levels = + current_cpu_data.cache_leaves[cache_present - 1].level; + this_cpu_ci->num_leaves = cache_present; - if (c->tcache.waysize) { - levels++; - leaves++; - } - - this_cpu_ci->num_levels = levels; - this_cpu_ci->num_leaves = leaves; return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { - return !((this_leaf->level == 1) || (this_leaf->level == 2)); + return (!(*(unsigned char *)(this_leaf->priv) & CACHE_PRIVATE) + && !(*(unsigned char *)(sib_leaf->priv) & CACHE_PRIVATE)); } static void cache_cpumap_setup(unsigned int cpu) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf, *sib_leaf; unsigned int index; + struct cacheinfo *this_leaf, *sib_leaf; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (index = 0; index < this_cpu_ci->num_leaves; index++) { unsigned int i; @@ -85,8 +45,10 @@ static void cache_cpumap_setup(unsigned int cpu) for_each_online_cpu(i) { struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); - if (i == cpu || !sib_cpu_ci->info_list) - continue;/* skip if itself or no cacheinfo */ + if (i == cpu || !sib_cpu_ci->info_list || + (cpu_to_node(i) != cpu_to_node(cpu))) + continue; + sib_leaf = sib_cpu_ci->info_list + index; if (cache_leaves_are_shared(this_leaf, sib_leaf)) { cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); @@ -98,31 +60,24 @@ static void cache_cpumap_setup(unsigned int cpu) int populate_cache_leaves(unsigned int cpu) { - int level = 1, nodes_per_package = 1; - struct cpuinfo_loongarch *c = ¤t_cpu_data; + int i, cache_present = current_cpu_data.cache_leaves_present; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; - - if (loongson_sysconf.nr_nodes > 1) - nodes_per_package = loongson_sysconf.cores_per_package - / loongson_sysconf.cores_per_node; - - if (c->icache.waysize) { - populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA); - populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST); - } else { - populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED); + struct cache_desc *cd, *cdesc = current_cpu_data.cache_leaves; + + for (i = 0; i < cache_present; i++) { + cd = cdesc + i; + + this_leaf->type = cd->type; + this_leaf->level = cd->level; + this_leaf->coherency_line_size = cd->linesz; + this_leaf->number_of_sets = cd->sets; + this_leaf->ways_of_associativity = cd->ways; + this_leaf->size = cd->linesz * cd->sets * cd->ways; + this_leaf->priv = &cd->flags; + this_leaf++; } - if (c->vcache.waysize) - populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->scache.waysize) - populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED); - - if (c->tcache.waysize) - populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED); - cache_cpumap_setup(cpu); this_cpu_ci->cpu_map_populated = true; diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 0697ff186b40..0f77bdb8c489 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -632,9 +632,6 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp) irqentry_exit(regs, state); } -extern void tlb_init(int cpu); -extern void cache_error_setup(void); - unsigned long eentry; EXPORT_SYMBOL_GPL(eentry); unsigned long tlbrentry; diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index 9ba646c7a3f2..d68423998210 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -3,13 +3,13 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ #include -#include #include #include #include #include #include #include +#include #include #include @@ -17,11 +17,17 @@ #include #include #include +#include #include -#include #include #include +void cache_error_setup(void) +{ + extern char __weak except_vec_cex; + set_merr_handler(0x0, &except_vec_cex, 0x80); +} + /* * LoongArch maintains ICache/DCache coherency by hardware, * we just need "ibar" to avoid instruction hazard here. @@ -32,108 +38,120 @@ void local_flush_icache_range(unsigned long start, unsigned long end) } EXPORT_SYMBOL(local_flush_icache_range); -void cache_error_setup(void) -{ - extern char __weak except_vec_cex; - set_merr_handler(0x0, &except_vec_cex, 0x80); -} - -static unsigned long icache_size __read_mostly; -static unsigned long dcache_size __read_mostly; -static unsigned long vcache_size __read_mostly; -static unsigned long scache_size __read_mostly; - -static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", - "9-way", "10-way", "11-way", "12-way", - "13-way", "14-way", "15-way", "16-way", -}; - -static void probe_pcache(void) +static void flush_cache_leaf(unsigned int leaf) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; - - config = read_cpucfg(LOONGARCH_CPUCFG17); - lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE); - sets = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS); - ways = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1; - - c->icache.linesz = lsize; - c->icache.sets = sets; - c->icache.ways = ways; - icache_size = sets * ways * lsize; - c->icache.waysize = icache_size / c->icache.ways; - - config = read_cpucfg(LOONGARCH_CPUCFG18); - lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE); - sets = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS); - ways = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1; - - c->dcache.linesz = lsize; - c->dcache.sets = sets; - c->dcache.ways = ways; - dcache_size = sets * ways * lsize; - c->dcache.waysize = dcache_size / c->dcache.ways; - - c->options |= LOONGARCH_CPU_PREFETCH; - - pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", - icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz); - - pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", - dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz); + int i, j, nr_nodes; + uint64_t addr = CSR_DMW0_BASE; + struct cache_desc *cdesc = current_cpu_data.cache_leaves + leaf; + + nr_nodes = cache_private(cdesc) ? 1 : loongson_sysconf.nr_nodes; + + do { + for (i = 0; i < cdesc->sets; i++) { + for (j = 0; j < cdesc->ways; j++) { + flush_cache_line(leaf, addr); + addr++; + } + + addr -= cdesc->ways; + addr += cdesc->linesz; + } + addr += (1ULL << NODE_ADDRSPACE_SHIFT); + } while (--nr_nodes > 0); } -static void probe_vcache(void) +asmlinkage __visible void __flush_cache_all(void) { - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; - - config = read_cpucfg(LOONGARCH_CPUCFG19); - lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE); - sets = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS); - ways = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1; - - c->vcache.linesz = lsize; - c->vcache.sets = sets; - c->vcache.ways = ways; - vcache_size = lsize * sets * ways; - c->vcache.waysize = vcache_size / c->vcache.ways; - - pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", - vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); + int leaf; + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + unsigned int cache_present = current_cpu_data.cache_leaves_present; + + leaf = cache_present - 1; + if (cache_inclusive(cdesc + leaf)) { + flush_cache_leaf(leaf); + return; + } + + for (leaf = 0; leaf < cache_present; leaf++) + flush_cache_leaf(leaf); } -static void probe_scache(void) -{ - struct cpuinfo_loongarch *c = ¤t_cpu_data; - unsigned int lsize, sets, ways; - unsigned int config; - - config = read_cpucfg(LOONGARCH_CPUCFG20); - lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE); - sets = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS); - ways = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1; - - c->scache.linesz = lsize; - c->scache.sets = sets; - c->scache.ways = ways; - /* 4 cores. scaches are shared */ - scache_size = lsize * sets * ways; - c->scache.waysize = scache_size / c->scache.ways; - - pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", - scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); -} +#define L1IUPRE (1 << 0) +#define L1IUUNIFY (1 << 1) +#define L1DPRE (1 << 2) + +#define LXIUPRE (1 << 0) +#define LXIUUNIFY (1 << 1) +#define LXIUPRIV (1 << 2) +#define LXIUINCL (1 << 3) +#define LXDPRE (1 << 4) +#define LXDPRIV (1 << 5) +#define LXDINCL (1 << 6) + +#define populate_cache_properties(cfg0, cdesc, level, leaf) \ +do { \ + unsigned int cfg1; \ + \ + cfg1 = read_cpucfg(LOONGARCH_CPUCFG17 + leaf); \ + if (level == 1) { \ + cdesc->flags |= CACHE_PRIVATE; \ + } else { \ + if (cfg0 & LXIUPRIV) \ + cdesc->flags |= CACHE_PRIVATE; \ + if (cfg0 & LXIUINCL) \ + cdesc->flags |= CACHE_INCLUSIVE; \ + } \ + cdesc->level = level; \ + cdesc->flags |= CACHE_PRESENT; \ + cdesc->ways = ((cfg1 & CPUCFG_CACHE_WAYS_M) >> CPUCFG_CACHE_WAYS) + 1; \ + cdesc->sets = 1 << ((cfg1 & CPUCFG_CACHE_SETS_M) >> CPUCFG_CACHE_SETS); \ + cdesc->linesz = 1 << ((cfg1 & CPUCFG_CACHE_LSIZE_M) >> CPUCFG_CACHE_LSIZE); \ + cdesc++; leaf++; \ +} while (0) void cpu_cache_init(void) { - probe_pcache(); - probe_vcache(); - probe_scache(); - + unsigned int leaf = 0, level = 1; + unsigned int config = read_cpucfg(LOONGARCH_CPUCFG16); + struct cache_desc *cdesc = current_cpu_data.cache_leaves; + + if (config & L1IUPRE) { + if (config & L1IUUNIFY) + cdesc->type = CACHE_TYPE_UNIFIED; + else + cdesc->type = CACHE_TYPE_INST; + populate_cache_properties(config, cdesc, level, leaf); + } + + if (config & L1DPRE) { + cdesc->type = CACHE_TYPE_DATA; + populate_cache_properties(config, cdesc, level, leaf); + } + + config = config >> 3; + for (level = 2; level <= CACHE_LEVEL_MAX; level++) { + if (!config) + break; + + if (config & LXIUPRE) { + if (config & LXIUUNIFY) + cdesc->type = CACHE_TYPE_UNIFIED; + else + cdesc->type = CACHE_TYPE_INST; + populate_cache_properties(config, cdesc, level, leaf); + } + + if (config & LXDPRE) { + cdesc->type = CACHE_TYPE_DATA; + populate_cache_properties(config, cdesc, level, leaf); + } + + config = config >> 7; + } + + BUG_ON(leaf > CACHE_LEAVES_MAX); + + current_cpu_data.cache_leaves_present = leaf; + current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; shm_align_mask = PAGE_SIZE - 1; } diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c index f843b351e471..2707c4d1e772 100644 --- a/arch/loongarch/pci/pci.c +++ b/arch/loongarch/pci/pci.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #define PCI_DEVICE_ID_LOONGSON_HOSTBRIDGE 0x7a00 @@ -40,12 +41,10 @@ static int __init pcibios_init(void) unsigned int lsize; /* - * Set PCI cacheline size to that of the highest level in the + * Set PCI cacheline size to that of the last level in the * cache hierarchy. */ - lsize = cpu_dcache_line_size(); - lsize = cpu_vcache_line_size() ? : lsize; - lsize = cpu_scache_line_size() ? : lsize; + lsize = cpu_last_level_cache_line_size(); BUG_ON(!lsize); -- Gitee From 76b68d507c2a0480728d9d3dcb671edb37ecb176 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 27 Sep 2022 22:45:49 +0800 Subject: [PATCH 085/241] LoongArch: Support access filter to /dev/mem interface Accidental access to /dev/mem is obviously disastrous, but specific access can be used by people debugging the kernel. So select GENERIC_ LIB_DEVMEM_IS_ALLOWED, as well as define ARCH_HAS_VALID_PHYS_ADDR_RANGE and related helpers, to support access filter to /dev/mem interface. Change-Id: Ic1651a9cb402572438842ba018cc54035e798e01 Signed-off-by: Weihao Li Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/io.h | 4 ++++ arch/loongarch/mm/mmap.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ea4f9b507d3d..72637c7bb8e9 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -60,6 +60,7 @@ config LOONGARCH select GENERIC_LIB_CMPDI2 select GENERIC_LIB_LSHRDI3 select GENERIC_LIB_UCMPDI2 + select GENERIC_LIB_DEVMEM_IS_ALLOWED select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index dd82d5a11dd5..a8ebe6a61205 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -107,4 +107,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #include +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + #endif /* _ASM_IO_H */ diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index a3a609f589a9..ad0bd39100ce 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -3,6 +3,8 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ #include +#include +#include #include #include @@ -116,3 +118,30 @@ int __virt_addr_valid(volatile void *kaddr) return pfn_valid(PFN_DOWN(PHYSADDR(kaddr))); } EXPORT_SYMBOL_GPL(__virt_addr_valid); + +/* + * You really shouldn't be using read() or write() on /dev/mem. This might go + * away in the future. + */ +int valid_phys_addr_range(phys_addr_t addr, size_t size) +{ + /* + * Check whether addr is covered by a memory region without the + * MEMBLOCK_NOMAP attribute, and whether that region covers the + * entire range. In theory, this could lead to false negatives + * if the range is covered by distinct but adjacent memory regions + * that only differ in other attributes. However, few of such + * attributes have been defined, and it is debatable whether it + * follows that /dev/mem read() calls should be able traverse + * such boundaries. + */ + return memblock_is_region_memory(addr, size) && memblock_is_map_memory(addr); +} + +/* + * Do not allow /dev/mem mappings beyond the supported physical range. + */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) +{ + return !(((pfn << PAGE_SHIFT) + size) & ~(GENMASK_ULL(cpu_pabits, 0))); +} -- Gitee From 34c026f23c2386a87db2ef94fc6558d8e42c073e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 7 Dec 2022 16:50:50 +0800 Subject: [PATCH 086/241] LoongArch: mm: Fix huge page entry update for virtual machine In virtual machine (guest mode), the tlbwr instruction can not write the last entry of MTLB, so we need to make it non-present by invtlb and then write it by tlbfill. This also simplify the whole logic. Change-Id: Ied3cb5380280724176918688ab7e97a8634ced9a Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlbex.S | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 293b62462ac4..b11776de96f7 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -14,6 +14,8 @@ #include #endif +#define INVTLB_ADDR_GFALSE_AND_ASID 5 + #define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) @@ -140,13 +142,10 @@ tlb_huge_update_load: ori t0, ra, _PAGE_VALID st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr - - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 /* * A huge PTE describes an area the size of the @@ -291,13 +290,11 @@ tlb_huge_update_store: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -440,6 +437,11 @@ tlb_huge_update_modify: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 + /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -470,7 +472,7 @@ tlb_huge_update_modify: addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX - tlbwr + tlbfill /* Reset default page size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) -- Gitee From e7b752c6928cf43fc5a4428d21cfc292c449ba6a Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 10 Oct 2022 17:32:14 +0800 Subject: [PATCH 087/241] LoongArch: Consolidate __ex_table construction Consolidate all the __ex_table constuction code with a _ASM_EXTABLE or _asm_extable helper. There should be no functional change as a result of this patch. Change-Id: Ib633b89b292abdfd1181b20a09fb4373bc851daf Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 35 ++++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 13 ++++----- arch/loongarch/include/asm/uaccess.h | 9 ++---- arch/loongarch/kernel/fpu.S | 5 ++-- arch/loongarch/lib/clear_user.S | 5 ++-- arch/loongarch/lib/copy_user.S | 5 ++-- arch/loongarch/lib/unaligned.S | 5 ++-- 7 files changed, 51 insertions(+), 26 deletions(-) create mode 100644 arch/loongarch/include/asm/asm-extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h new file mode 100644 index 000000000000..4f615bf56727 --- /dev/null +++ b/arch/loongarch/include/asm/asm-extable.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + .pushsection __ex_table, "a"; \ + .balign 8; \ + .quad (insn); \ + .quad (fixup); \ + .popsection; + + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup) + .endm + +#else /* __ASSEMBLY__ */ + +#include +#include + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + ".pushsection __ex_table, \"a\"\n" \ + ".balign 8\n" \ + ".quad ((" insn "))\n" \ + ".quad ((" fixup "))\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index d45f0b36dd7c..0342eab5c402 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -22,10 +23,8 @@ "4: li.w %0, %6 \n" \ " b 3b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 4b \n" \ - " "__UA_ADDR "\t2b, 4b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE(2b, 4b) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ @@ -90,10 +89,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv "4: li.d %0, %6 \n" " b 3b \n" " .previous \n" - " .section __ex_table,\"a\" \n" - " "__UA_ADDR "\t1b, 4b \n" - " "__UA_ADDR "\t2b, 4b \n" - " .previous \n" + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 9cdb48d4a475..22fb4af9225f 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #ifdef CONFIG_64BIT @@ -217,9 +218,7 @@ do { \ " or %1, $r0, $r0 \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " "__UA_ADDR "\t1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ : "m" (__m(ptr)), "i" (-EFAULT)); \ \ @@ -285,9 +284,7 @@ do { \ "3: li.w %0, %3 \n" \ " b 2b \n" \ " .previous \n" \ - " .section __ex_table,\"a\" \n" \ - " " __UA_ADDR " 1b, 3b \n" \ - " .previous \n" \ + _ASM_EXTABLE(1b, 3b) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ : "Jr" (__pu_val), "i" (-EFAULT)); \ } diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index a7240e22e0c7..86fad21241ef 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -24,9 +25,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - .section __ex_table,"a" - PTR .ex\@, fault - .previous + _asm_extable .ex\@, fault .endm .macro sc_save_fp base diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 875a7e634220..d10e2ac1ce54 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index b8fddc2d6384..f5685d05542b 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -15,9 +16,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S index b42c4a2edfad..09e17e1c303b 100644 --- a/arch/loongarch/lib/unaligned.S +++ b/arch/loongarch/lib/unaligned.S @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -18,9 +19,7 @@ jr ra .previous .endif - .section __ex_table, "a" - PTR \from\()b, \to\()b - .previous + _asm_extable \from\()b, \to\()b .endm /* -- Gitee From ff86376496f446fd40fee3c781644ef1a6d06146 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 10 Oct 2022 17:32:15 +0800 Subject: [PATCH 088/241] LoongArch: Switch to relative exception tables Similar to other architectures such as arm64, x86, riscv and so on, use offsets relative to the exception table entry values rather than their absolute addresses for both the exception location and the fixup. However, LoongArch label difference because it will actually produce two relocations, a pair of R_LARCH_ADD32 and R_LARCH_SUB32. Take simple code below for example: $ cat test_ex_table.S .section .text 1: nop .section __ex_table,"a" .balign 4 .long (1b - .) .previous $ loongarch64-unknown-linux-gnu-gcc -c test_ex_table.S $ loongarch64-unknown-linux-gnu-readelf -Wr test_ex_table.o Relocation section '.rela__ex_table' at offset 0x100 contains 2 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000600000032 R_LARCH_ADD32 0000000000000000 .L1^B1 + 0 0000000000000000 0000000500000037 R_LARCH_SUB32 0000000000000000 L0^A + 0 The modpost will complain the R_LARCH_SUB32 relocation, so we need to patch modpost.c to skip this relocation for .rela__ex_table section. Change-Id: Ie526947dac985f3da7473caa7a8820cc0dc7d590 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 12 ++++----- arch/loongarch/include/asm/extable.h | 26 +++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 2 +- arch/loongarch/mm/extable.c | 32 ++++++++++++++++-------- scripts/mod/modpost.c | 15 +++++++++++ scripts/sorttable.c | 1 + 6 files changed, 71 insertions(+), 17 deletions(-) create mode 100644 arch/loongarch/include/asm/extable.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 4f615bf56727..74f8bc75472a 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -6,9 +6,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ .pushsection __ex_table, "a"; \ - .balign 8; \ - .quad (insn); \ - .quad (fixup); \ + .balign 4; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ .popsection; .macro _asm_extable, insn, fixup @@ -22,9 +22,9 @@ #define __ASM_EXTABLE_RAW(insn, fixup) \ ".pushsection __ex_table, \"a\"\n" \ - ".balign 8\n" \ - ".quad ((" insn "))\n" \ - ".quad ((" fixup "))\n" \ + ".balign 4\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h new file mode 100644 index 000000000000..b571c89705d1 --- /dev/null +++ b/arch/loongarch/include/asm/extable.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_EXTABLE_H +#define _ASM_LOONGARCH_EXTABLE_H + +/* + * The exception table consists of pairs of relative offsets: the first + * is the relative offset to an instruction that is allowed to fault, + * and the second is the relative offset at which the program should + * continue. No registers are modified, so it is entirely up to the + * continuation code to figure out what to do. + * + * All the routines below use bits of fixup code that are out of line + * with the main instruction path. This means when everything is well, + * we don't even have to jump over them. Further, they do not intrude + * on our cache or tlb entries. + */ + +struct exception_table_entry { + int insn, fixup; +}; + +#define ARCH_HAS_RELATIVE_EXTABLE + +bool fixup_exception(struct pt_regs *regs); + +#endif diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 22fb4af9225f..55eaca641d4a 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -12,8 +12,8 @@ #include #include #include +#include #include -#include #ifdef CONFIG_64BIT diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index f31aabb4b9c6..ce7e41f32e7d 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -3,20 +3,32 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ #include -#include -#include #include +#include +#include + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} -int fixup_exception(struct pt_regs *regs) +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) { - const struct exception_table_entry *fixup; + regs->csr_era = get_ex_fixup(ex); - fixup = search_exception_tables(exception_era(regs)); - if (fixup) { - regs->csr_era = fixup->fixup; + return true; +} + + +bool fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *ex; - return 1; - } + ex = search_exception_tables(exception_era(regs)); + if (!ex) + return false; - return 0; + return ex_handler_fixup(ex, regs); } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index cde1ce6f171b..aa43a8d13f53 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1852,6 +1852,14 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) return 0; } +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif + +#ifndef R_LARCH_SUB32 +#define R_LARCH_SUB32 55 +#endif + static void section_rela(const char *modname, struct elf_info *elf, Elf_Shdr *sechdr) { @@ -1888,6 +1896,13 @@ static void section_rela(const char *modname, struct elf_info *elf, r_sym = ELF_R_SYM(r.r_info); #endif r.r_addend = TO_NATIVE(rela->r_addend); + switch (elf->hdr->e_machine) { + case EM_LOONGARCH: + if (!strcmp("__ex_table", fromsec) && + ELF_R_TYPE(r.r_info) == R_LARCH_SUB32) + continue; + break; + } sym = elf->symtab_start + r_sym; /* Skip special sections */ if (is_shndx_special(sym->st_shndx)) diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 3edef1c6aaa0..2d911025ef2b 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -347,6 +347,7 @@ static int do_file(char const *const fname, void *addr) case EM_PPC: case EM_ARM: case EM_PPC64: + case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: -- Gitee From 5fca7a8d165505f67b9545f1feb391d6aeb92f02 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 10 Oct 2022 17:32:16 +0800 Subject: [PATCH 089/241] LoongArch: extable: Add `type` and `data` fields This is a LoongArch port of commit d6e2cc564775 ("arm64: extable: add `type` and `data` fields"). Subsequent patches will add specialized handlers for fixups, in addition to the simple PC fixup we have today. In preparation, this patch adds a new `type` field to struct exception_table_entry, and uses this to distinguish the fixup and other cases. A `data` field is also added so that subsequent patches can associate data specific to each exception site (e.g. register numbers). Handlers are named ex_handler_*() for consistency, following the example of x86. At the same time, get_ex_fixup() is split out into a helper so that it can be used by other ex_handler_*() functions in the subsequent patches. Change-Id: I2ee735dd730fdda5adda136ccef68a5e8c06bab2 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 15 ++++++++--- arch/loongarch/include/asm/extable.h | 11 ++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 +-- arch/loongarch/mm/extable.c | 7 +++++- scripts/sorttable.c | 32 +++++++++++++++++++++++- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 74f8bc75472a..634bd770e3c4 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -2,17 +2,22 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 + #ifdef __ASSEMBLY__ -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ .pushsection __ex_table, "a"; \ .balign 4; \ .long ((insn) - .); \ .long ((fixup) - .); \ + .short (type); \ + .short (data); \ .popsection; .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup) + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm #else /* __ASSEMBLY__ */ @@ -20,15 +25,17 @@ #include #include -#define __ASM_EXTABLE_RAW(insn, fixup) \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".balign 4\n" \ ".long ((" insn ") - .)\n" \ ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup) + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h index b571c89705d1..92612b4364a1 100644 --- a/arch/loongarch/include/asm/extable.h +++ b/arch/loongarch/include/asm/extable.h @@ -17,10 +17,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index b7888440997c..fcb5493232a6 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -4,6 +4,7 @@ #include #define PAGE_SIZE _PAGE_SIZE +#define RO_EXCEPTION_TABLE_ALIGN 4 #define RUNTIME_DISCARD_EXIT /* @@ -41,8 +42,6 @@ SECTIONS } :text = 0 _etext = .; /* End of text section */ - EXCEPTION_TABLE(16) - .got : ALIGN(16) { *(.got) } .plt : ALIGN(16) { *(.plt) } .got.plt : ALIGN(16) { *(.got.plt) } diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index ce7e41f32e7d..4c6af04ee6f3 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -30,5 +30,10 @@ bool fixup_exception(struct pt_regs *regs) if (!ex) return false; - return ex_handler_fixup(ex, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + } + + BUG(); } diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 2d911025ef2b..030bf70530eb 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -231,6 +231,34 @@ static void sort_relative_table(char *extab_image, int image_size) } } +static void sort_relative_table_with_data(char *extab_image, int image_size) +{ + int i = 0; + + while (i < image_size) { + uint32_t *loc = (uint32_t *)(extab_image + i); + + w(r(loc) + i, loc); + w(r(loc + 1) + i + 4, loc + 1); + /* Don't touch the fixup type or data */ + + i += sizeof(uint32_t) * 3; + } + + qsort(extab_image, image_size / 12, 12, compare_relative_table); + + i = 0; + while (i < image_size) { + uint32_t *loc = (uint32_t *)(extab_image + i); + + w(r(loc) - i, loc); + w(r(loc + 1) - (i + 4), loc + 1); + /* Don't touch the fixup type or data */ + + i += sizeof(uint32_t) * 3; + } +} + static void x86_sort_relative_table(char *extab_image, int image_size) { int i = 0; @@ -342,12 +370,14 @@ static int do_file(char const *const fname, void *addr) case EM_S390: custom_sort = s390_sort_relative_table; break; + case EM_LOONGARCH: + custom_sort = sort_relative_table_with_data; + break; case EM_AARCH64: case EM_PARISC: case EM_PPC: case EM_ARM: case EM_PPC64: - case EM_LOONGARCH: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: -- Gitee From 46a21c6fb90a9dcbaa38047cb5e289fd2ae94d0f Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 10 Oct 2022 17:32:18 +0800 Subject: [PATCH 090/241] LoongArch: extable: Add a dedicated uaccess handler Inspired by commit 2e77a62cb3a6("arm64: extable: add a dedicated uaccess handler"), do similar to LoongArch to add a dedicated uaccess exception handler to update registers in exception context and subsequently return back into the function which faulted, so we remove the need for fixups specialized to each faulting instruction. Add gpr-num.h here because we need to map the same GPR names to integer constants, so that we can use this to build meta-data for the exception fixups. The compiler treats gpr 0 as zero rather than $r0, so set it separately to .L__gpr_num_zero, otherwise the following assembly error will occurs: {standard input}: Assembler messages: {standard input}:1074: Error: invalid operands (*UND* and *ABS* sections) for `<<' {standard input}:1160: Error: invalid operands (*UND* and *ABS* sections) for `<<' make[1]: *** [scripts/Makefile.build:249: fs/fcntl.o] Error 1 Change-Id: Iedd149a5c2f1645b6644a99593b9cf1a444caf5c Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asm-extable.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/futex.h | 22 ++++++---------------- arch/loongarch/include/asm/gpr-num.h | 22 ++++++++++++++++++++++ arch/loongarch/include/asm/uaccess.h | 17 ++++------------- arch/loongarch/mm/extable.c | 22 ++++++++++++++++++++++ 5 files changed, 76 insertions(+), 29 deletions(-) create mode 100644 arch/loongarch/include/asm/gpr-num.h diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h index 634bd770e3c4..f5502cb50c6e 100644 --- a/arch/loongarch/include/asm/asm-extable.h +++ b/arch/loongarch/include/asm/asm-extable.h @@ -4,6 +4,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 +#define EX_TYPE_UACCESS_ERR_ZERO 2 #ifdef __ASSEMBLY__ @@ -24,6 +25,7 @@ #include #include +#include #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ @@ -37,6 +39,26 @@ #define _ASM_EXTABLE(insn, fixup) \ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h index 0342eab5c402..ce24a9f13f21 100644 --- a/arch/loongarch/include/asm/futex.h +++ b/arch/loongarch/include/asm/futex.h @@ -19,16 +19,11 @@ "2: sc.w $t0, %2 \n" \ " beq $t0, $zero, 1b \n" \ "3: \n" \ - " .section .fixup,\"ax\" \n" \ - "4: li.w %0, %6 \n" \ - " b 3b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \ : "=r" (ret), "=&r" (oldval), \ "=ZC" (*uaddr) \ - : "0" (0), "ZC" (*uaddr), "Jr" (oparg), \ - "i" (-EFAULT) \ + : "0" (0), "ZC" (*uaddr), "Jr" (oparg) \ : "memory", "t0"); \ } @@ -85,15 +80,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv " beq $zero, $t0, 1b \n" "3: \n" __WEAK_LLSC_MB - " .section .fixup,\"ax\" \n" - "4: li.d %0, %6 \n" - " b 3b \n" - " .previous \n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) : "+r" (ret), "=&r" (val), "=ZC" (*uaddr) - : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval), - "i" (-EFAULT) + : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval) : "memory", "t0"); *uval = val; diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h new file mode 100644 index 000000000000..e0941af20c7e --- /dev/null +++ b/arch/loongarch/include/asm/gpr-num.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .equ .L__gpr_num_zero, 0 + .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_$r\num, \num + .endr + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .equ .L__gpr_num_zero, 0\n" \ +" .irp num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_$r\\num, \\num\n" \ +" .endr\n" \ + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h index 55eaca641d4a..f2d837d03db9 100644 --- a/arch/loongarch/include/asm/uaccess.h +++ b/arch/loongarch/include/asm/uaccess.h @@ -213,14 +213,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %1, %2 \n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " or %1, $r0, $r0 \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \ : "+r" (__gu_err), "=r" (__gu_tmp) \ - : "m" (__m(ptr)), "i" (-EFAULT)); \ + : "m" (__m(ptr))); \ \ (val) = (__typeof__(*(ptr))) __gu_tmp; \ } @@ -280,13 +275,9 @@ do { \ __asm__ __volatile__( \ "1: " insn " %z2, %1 # __put_user_asm\n" \ "2: \n" \ - " .section .fixup,\"ax\" \n" \ - "3: li.w %0, %3 \n" \ - " b 2b \n" \ - " .previous \n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \ : "+r" (__pu_err), "=m" (__m(ptr)) \ - : "Jr" (__pu_val), "i" (-EFAULT)); \ + : "Jr" (__pu_val)); \ } #define __put_data_asm_ll32(insn, ptr) \ diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c index 4c6af04ee6f3..eead3f7bf3f4 100644 --- a/arch/loongarch/mm/extable.c +++ b/arch/loongarch/mm/extable.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -13,6 +14,13 @@ get_ex_fixup(const struct exception_table_entry *ex) return ((unsigned long)&ex->fixup + ex->fixup); } +static inline void regs_set_gpr(struct pt_regs *regs, + unsigned int offset, unsigned long val) +{ + if (offset && offset <= MAX_REG_OFFSET) + *(unsigned long *)((unsigned long)regs + offset) = val; +} + static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_regs *regs) { @@ -21,6 +29,18 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); + regs->csr_era = get_ex_fixup(ex); + + return true; +} bool fixup_exception(struct pt_regs *regs) { @@ -33,6 +53,8 @@ bool fixup_exception(struct pt_regs *regs) switch (ex->type) { case EX_TYPE_FIXUP: return ex_handler_fixup(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); } BUG(); -- Gitee From e4e3a5e190951c17bfbc886bd8affe2ba2f00dd5 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Mon, 10 Oct 2022 17:34:36 +0800 Subject: [PATCH 091/241] LoongArch: Remove the .fixup section usage Use the `.L_xxx` label to improve fixup code and then remove the .fixup section usage. Change-Id: Icca724194e9bfe544d44bb0a6fa13412e328863a Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/lib/clear_user.S | 14 +++++--------- arch/loongarch/lib/copy_user.S | 16 ++++++---------- arch/loongarch/lib/unaligned.S | 18 ++++++------------ 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index d10e2ac1ce54..93deae5000ec 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a1, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a1, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __clear_user(void *addr, size_t size) @@ -36,7 +32,7 @@ SYM_FUNC_START(__clear_user) 2: move a0, a1 jr ra - fixup_ex 1, 3, 0, 1 + _asm_extable 1b, .L_fixup_handle_0 SYM_FUNC_END(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index f5685d05542b..9388ed184dce 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -9,15 +9,11 @@ #include #include -.macro fixup_ex from, to, offset, fix -.if \fix - .section .fixup, "ax" -\to: addi.d a0, a2, \offset +.irp to, 0 +.L_fixup_handle_\to\(): + addi.d a0, a2, (\to) * (-8) jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm +.endr /* * unsigned long __copy_user(void *to, const void *from, size_t n) @@ -39,8 +35,8 @@ SYM_FUNC_START(__copy_user) 3: move a0, a2 jr ra - fixup_ex 1, 4, 0, 1 - fixup_ex 2, 4, 0, 0 + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_0 SYM_FUNC_END(__copy_user) EXPORT_SYMBOL(__copy_user) diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S index 09e17e1c303b..9177fd638f07 100644 --- a/arch/loongarch/lib/unaligned.S +++ b/arch/loongarch/lib/unaligned.S @@ -12,15 +12,9 @@ #include #include -.macro fixup_ex from, to, fix -.if \fix - .section .fixup, "ax" -\to: li.w a0, -EFAULT +.L_fixup_handle_unaligned: + li.w a0, -EFAULT jr ra - .previous -.endif - _asm_extable \from\()b, \to\()b -.endm /* * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign) @@ -57,9 +51,9 @@ SYM_FUNC_START(unaligned_read) 5: li.w a0, -EFAULT jr ra - fixup_ex 1, 6, 1 - fixup_ex 2, 6, 0 - fixup_ex 4, 6, 0 + _asm_extable 1b, .L_fixup_handle_unaligned + _asm_extable 2b, .L_fixup_handle_unaligned + _asm_extable 4b, .L_fixup_handle_unaligned SYM_FUNC_END(unaligned_read) /* @@ -86,5 +80,5 @@ SYM_FUNC_START(unaligned_write) 3: li.w a0, -EFAULT jr ra - fixup_ex 2, 4, 1 + _asm_extable 2b, .L_fixup_handle_unaligned SYM_FUNC_END(unaligned_write) -- Gitee From d485d8acc5df1c85e21d9fa7d16d7b272868e2bc Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 24 Jun 2021 12:16:11 +0800 Subject: [PATCH 092/241] LoongArch: Add sparse memory vmemmap support Change-Id: I451ff9962e66ed81bf3f78b26c53d79c6037676b Signed-off-by: Min Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/pgtable-64.h | 6 +- arch/loongarch/include/asm/sparsemem.h | 10 ++ arch/loongarch/mm/init.c | 184 +++++++++++++++++++++++- 4 files changed, 199 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 72637c7bb8e9..597997bcbd5c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -347,6 +347,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y + select SPARSEMEM_VMEMMAP_ENABLE help Say Y to support efficient handling of sparse physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h index d8e7b01c8127..fe15f72dc79a 100644 --- a/arch/loongarch/include/asm/pgtable-64.h +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -60,6 +60,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * TLB refill handlers may also map the vmalloc area into xkvrange. @@ -72,7 +73,10 @@ #define VMALLOC_START MODULES_END #define VMALLOC_END \ (vm_map_base + \ - min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE) + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) + +#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) +#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) #define pte_ERROR(e) \ printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h index 3d18cdf1b069..fd39eea7a375 100644 --- a/arch/loongarch/include/asm/sparsemem.h +++ b/arch/loongarch/include/asm/sparsemem.h @@ -11,8 +11,18 @@ #define SECTION_SIZE_BITS 29 /* 2^29 = Largest Huge Page Size */ #define MAX_PHYSMEM_BITS 48 +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_SIZE (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT))) +#endif + +#include + #endif /* CONFIG_SPARSEMEM */ +#ifndef VMEMMAP_SIZE +#define VMEMMAP_SIZE 0 /* 1, For FLATMEM; 2, For SPARSEMEM without VMEMMAP. */ +#endif + #ifdef CONFIG_MEMORY_HOTPLUG int memory_add_physaddr_to_nid(u64 addr); #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 2c31ad9c6722..6b97654eddc2 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include @@ -156,6 +156,188 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP +void __meminit arch_vmemmap_verify(pte_t *pte, int node, + unsigned long start, unsigned long end) +{ + unsigned long pfn = pte_pfn(*pte); + int actual_node = early_pfn_to_nid(pfn); + + if (node_distance(actual_node, node) > LOCAL_DISTANCE) + pr_warn("[%lx-%lx] potential offnode page_structs\n", + start, end - 1); +} + +void * __meminit arch_vmemmap_alloc_block_zero(unsigned long size, int node) +{ + void *p = vmemmap_alloc_block(size, node); + + if (!p) + return NULL; + memset(p, 0, size); + + return p; +} + +pte_t * __meminit arch_vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) { + pte_t entry; + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + set_pte_at(&init_mm, addr, pte, entry); + } + return pte; +} + +pmd_t * __meminit arch_vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) +{ + pmd_t *pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; + pmd_populate_kernel(&init_mm, pmd, p); + } + return pmd; +} + +pud_t * __meminit arch_vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) +{ + pud_t *pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)p, (unsigned long)invalid_pte_table); +#endif + pud_populate(&init_mm, pud, p); + } + return pud; +} + +p4d_t * __meminit arch_vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)p, (unsigned long)invalid_pmd_table); +#endif + p4d_populate(&init_mm, p4d, p); + } + return p4d; +} + +pgd_t * __meminit arch_vmemmap_pgd_populate(unsigned long addr, int node) +{ + pgd_t *pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + void *p = arch_vmemmap_alloc_block_zero(PAGE_SIZE, node); + if (!p) + return NULL; + pgd_populate(&init_mm, pgd, p); + } + return pgd; +} + +int __meminit arch_vmemmap_populate_basepages(unsigned long start, + unsigned long end, int node) +{ + unsigned long addr = start; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + for (; addr < end; addr += PAGE_SIZE) { + pgd = arch_vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + p4d = arch_vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = arch_vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + pmd = arch_vmemmap_pmd_populate(pud, addr, node); + if (!pmd) + return -ENOMEM; + pte = arch_vmemmap_pte_populate(pmd, addr, node); + if (!pte) + return -ENOMEM; + arch_vmemmap_verify(pte, node, addr, addr + PAGE_SIZE); + } + + return 0; +} + +int __meminit arch_vmemmap_populate_hugepages(unsigned long start, + unsigned long end, int node) +{ + unsigned long addr = start; + unsigned long next; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + for (addr = start; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = arch_vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + p4d = arch_vmemmap_p4d_populate(pgd, addr, node); + if (!p4d) + return -ENOMEM; + pud = arch_vmemmap_pud_populate(p4d, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = NULL; + + p = arch_vmemmap_alloc_block_zero(PMD_SIZE, node); + if (p) { + pmd_t entry; + + entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL); + pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL; + set_pmd_at(&init_mm, addr, pmd, entry); + + continue; + } + } else if (pmd_val(*pmd) & _PAGE_HUGE) { + arch_vmemmap_verify((pte_t *)pmd, node, addr, next); + continue; + } + if (arch_vmemmap_populate_basepages(addr, next, node)) + return -ENOMEM; + } + + return 0; +} + +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) +{ + return arch_vmemmap_populate_hugepages(start, end, node); +} +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) +{ +} +#endif + /* * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used -- Gitee From 41da0dd7a1a2d61c2dccf472f5949b7b97c29fef Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 093/241] LoongArch: Add vector extensions support Add LoongArch's vector extensions support, which including 128bit LSX (i.e., Loongson SIMD eXtension) and 256bit LASX (i.e., Loongson Advanced SIMD eXtension). Change-Id: Ieb6d9ce16b354f98d6f0d4083f2c3a72364642c5 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 32 + arch/loongarch/include/asm/asmmacro.h | 624 +++++++++++++++++++ arch/loongarch/include/asm/fpu.h | 185 +++++- arch/loongarch/include/uapi/asm/ptrace.h | 16 +- arch/loongarch/include/uapi/asm/sigcontext.h | 2 - arch/loongarch/kernel/cpu-probe.c | 12 + arch/loongarch/kernel/fpu.S | 282 +++++++++ arch/loongarch/kernel/process.c | 10 +- arch/loongarch/kernel/ptrace.c | 110 ++++ arch/loongarch/kernel/signal-common.h | 8 + arch/loongarch/kernel/signal.c | 316 +++++++++- arch/loongarch/kernel/traps.c | 84 ++- 12 files changed, 1666 insertions(+), 15 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 597997bcbd5c..9b72f6d1cbc3 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -178,6 +178,8 @@ config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_LSX + select CPU_SUPPORTS_LASX select GPIOLIB select SWIOTLB select ARCH_SUPPORTS_ATOMIC_RMW @@ -338,6 +340,36 @@ config CPU_HAS_FPU bool default y +config CPU_HAS_LSX + bool "Support for the Loongson SIMD Extension" + depends on CPU_SUPPORTS_LSX + depends on 64BIT + help + Loongson SIMD Extension (LSX) introduces 128 bit wide vector registers + and a set of SIMD instructions to operate on them. When this option + is enabled the kernel will support allocating & switching LSX + vector register contexts. If you know that your kernel will only be + running on CPUs which do not support LSX or that your userland will + not be making use of it then you may wish to say N here to reduce + the size & complexity of your kernel. + + If unsure, say Y. + +config CPU_HAS_LASX + bool "Support for the Loongson Advanced SIMD Extension" + depends on CPU_SUPPORTS_LASX + depends on 64BIT && CPU_HAS_LSX + help + Loongson Advanced SIMD Extension is 256 bit wide SIMD extension. + + If unsure, say Y. + +config CPU_SUPPORTS_LSX + bool + +config CPU_SUPPORTS_LASX + bool + config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index 40414d605ed6..4ef3b9dfa80f 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -121,6 +121,212 @@ .endif .endm + .macro parse_vr var vr + \var = -1 + .ifc \vr, $vr0 + \var = 0 + .endif + .ifc \vr, $vr1 + \var = 1 + .endif + .ifc \vr, $vr2 + \var = 2 + .endif + .ifc \vr, $vr3 + \var = 3 + .endif + .ifc \vr, $vr4 + \var = 4 + .endif + .ifc \vr, $vr5 + \var = 5 + .endif + .ifc \vr, $vr6 + \var = 6 + .endif + .ifc \vr, $vr7 + \var = 7 + .endif + .ifc \vr, $vr8 + \var = 8 + .endif + .ifc \vr, $vr9 + \var = 9 + .endif + .ifc \vr, $vr10 + \var = 10 + .endif + .ifc \vr, $vr11 + \var = 11 + .endif + .ifc \vr, $vr12 + \var = 12 + .endif + .ifc \vr, $vr13 + \var = 13 + .endif + .ifc \vr, $vr14 + \var = 14 + .endif + .ifc \vr, $vr15 + \var = 15 + .endif + .ifc \vr, $vr16 + \var = 16 + .endif + .ifc \vr, $vr17 + \var = 17 + .endif + .ifc \vr, $vr18 + \var = 18 + .endif + .ifc \vr, $vr19 + \var = 19 + .endif + .ifc \vr, $vr20 + \var = 20 + .endif + .ifc \vr, $vr21 + \var = 21 + .endif + .ifc \vr, $vr22 + \var = 22 + .endif + .ifc \vr, $vr23 + \var = 23 + .endif + .ifc \vr, $vr24 + \var = 24 + .endif + .ifc \vr, $vr25 + \var = 25 + .endif + .ifc \vr, $vr26 + \var = 26 + .endif + .ifc \vr, $vr27 + \var = 27 + .endif + .ifc \vr, $vr28 + \var = 28 + .endif + .ifc \vr, $vr29 + \var = 29 + .endif + .ifc \vr, $vr30 + \var = 30 + .endif + .ifc \vr, $vr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + + .macro parse_xr var xr + \var = -1 + .ifc \xr, $xr0 + \var = 0 + .endif + .ifc \xr, $xr1 + \var = 1 + .endif + .ifc \xr, $xr2 + \var = 2 + .endif + .ifc \xr, $xr3 + \var = 3 + .endif + .ifc \xr, $xr4 + \var = 4 + .endif + .ifc \xr, $xr5 + \var = 5 + .endif + .ifc \xr, $xr6 + \var = 6 + .endif + .ifc \xr, $xr7 + \var = 7 + .endif + .ifc \xr, $xr8 + \var = 8 + .endif + .ifc \xr, $xr9 + \var = 9 + .endif + .ifc \xr, $xr10 + \var = 10 + .endif + .ifc \xr, $xr11 + \var = 11 + .endif + .ifc \xr, $xr12 + \var = 12 + .endif + .ifc \xr, $xr13 + \var = 13 + .endif + .ifc \xr, $xr14 + \var = 14 + .endif + .ifc \xr, $xr15 + \var = 15 + .endif + .ifc \xr, $xr16 + \var = 16 + .endif + .ifc \xr, $xr17 + \var = 17 + .endif + .ifc \xr, $xr18 + \var = 18 + .endif + .ifc \xr, $xr19 + \var = 19 + .endif + .ifc \xr, $xr20 + \var = 20 + .endif + .ifc \xr, $xr21 + \var = 21 + .endif + .ifc \xr, $xr22 + \var = 22 + .endif + .ifc \xr, $xr23 + \var = 23 + .endif + .ifc \xr, $xr24 + \var = 24 + .endif + .ifc \xr, $xr25 + \var = 25 + .endif + .ifc \xr, $xr26 + \var = 26 + .endif + .ifc \xr, $xr27 + \var = 27 + .endif + .ifc \xr, $xr28 + \var = 28 + .endif + .ifc \xr, $xr29 + \var = 29 + .endif + .ifc \xr, $xr30 + \var = 30 + .endif + .ifc \xr, $xr31 + \var = 31 + .endif + .iflt \var + .error "Unable to parse register name \r" + .endif + .endm + .macro fpu_save_csr thread tmp movfcsr2gr \tmp, fcsr0 stptr.w \tmp, \thread, THREAD_FCSR @@ -245,6 +451,424 @@ fld.d $f31, \tmp, THREAD_FPR31 - THREAD_FPR0 .endm + .macro lsx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* vst opcode is 0xb1 */ + .word (0xb1 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb1 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb1 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb1 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb1 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb1 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb1 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb1 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb1 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb1 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb1 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb1 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb1 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb1 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb1 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb1 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb1 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb1 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb1 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb1 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb1 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb1 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb1 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb1 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb1 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb1 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb1 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb1 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb1 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb1 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb1 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb1 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb0 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb0 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb0 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb0 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb0 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb0 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb0 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb0 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb0 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb0 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb0 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb0 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb0 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb0 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb0 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb0 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb0 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb0 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb0 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb0 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb0 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb0 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb0 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb0 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb0 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb0 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb0 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb0 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb0 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb0 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb0 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lsx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lsx_save_data \thread, \tmp0 + .endm + + .macro lsx_restore_all thread tmp0 tmp1 + lsx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lsx_save_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vpickve2gr opcode is 0xe5dfe */ + .word (0xe5dfe << 11 | 1 << 10 | __vd << 5 | __tmp) + st.d \tmp, \base, (\off+8) + .endm + + .macro lsx_save_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_save_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_save_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_save_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_save_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_save_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_save_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_save_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_save_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_save_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_save_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_save_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_save_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_save_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_save_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_save_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_save_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_save_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_save_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_save_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_save_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_save_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_save_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_save_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_save_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_save_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_save_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_save_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_save_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_save_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_save_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_save_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_save_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_restore_upper vd base tmp off + parse_vr __vd, \vd + parse_r __tmp, \tmp + ld.d \tmp, \base, (\off+8) + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + lsx_restore_upper $vr0, \base, \tmp, (THREAD_FPR0-THREAD_FPR0) + lsx_restore_upper $vr1, \base, \tmp, (THREAD_FPR1-THREAD_FPR0) + lsx_restore_upper $vr2, \base, \tmp, (THREAD_FPR2-THREAD_FPR0) + lsx_restore_upper $vr3, \base, \tmp, (THREAD_FPR3-THREAD_FPR0) + lsx_restore_upper $vr4, \base, \tmp, (THREAD_FPR4-THREAD_FPR0) + lsx_restore_upper $vr5, \base, \tmp, (THREAD_FPR5-THREAD_FPR0) + lsx_restore_upper $vr6, \base, \tmp, (THREAD_FPR6-THREAD_FPR0) + lsx_restore_upper $vr7, \base, \tmp, (THREAD_FPR7-THREAD_FPR0) + lsx_restore_upper $vr8, \base, \tmp, (THREAD_FPR8-THREAD_FPR0) + lsx_restore_upper $vr9, \base, \tmp, (THREAD_FPR9-THREAD_FPR0) + lsx_restore_upper $vr10, \base, \tmp, (THREAD_FPR10-THREAD_FPR0) + lsx_restore_upper $vr11, \base, \tmp, (THREAD_FPR11-THREAD_FPR0) + lsx_restore_upper $vr12, \base, \tmp, (THREAD_FPR12-THREAD_FPR0) + lsx_restore_upper $vr13, \base, \tmp, (THREAD_FPR13-THREAD_FPR0) + lsx_restore_upper $vr14, \base, \tmp, (THREAD_FPR14-THREAD_FPR0) + lsx_restore_upper $vr15, \base, \tmp, (THREAD_FPR15-THREAD_FPR0) + lsx_restore_upper $vr16, \base, \tmp, (THREAD_FPR16-THREAD_FPR0) + lsx_restore_upper $vr17, \base, \tmp, (THREAD_FPR17-THREAD_FPR0) + lsx_restore_upper $vr18, \base, \tmp, (THREAD_FPR18-THREAD_FPR0) + lsx_restore_upper $vr19, \base, \tmp, (THREAD_FPR19-THREAD_FPR0) + lsx_restore_upper $vr20, \base, \tmp, (THREAD_FPR20-THREAD_FPR0) + lsx_restore_upper $vr21, \base, \tmp, (THREAD_FPR21-THREAD_FPR0) + lsx_restore_upper $vr22, \base, \tmp, (THREAD_FPR22-THREAD_FPR0) + lsx_restore_upper $vr23, \base, \tmp, (THREAD_FPR23-THREAD_FPR0) + lsx_restore_upper $vr24, \base, \tmp, (THREAD_FPR24-THREAD_FPR0) + lsx_restore_upper $vr25, \base, \tmp, (THREAD_FPR25-THREAD_FPR0) + lsx_restore_upper $vr26, \base, \tmp, (THREAD_FPR26-THREAD_FPR0) + lsx_restore_upper $vr27, \base, \tmp, (THREAD_FPR27-THREAD_FPR0) + lsx_restore_upper $vr28, \base, \tmp, (THREAD_FPR28-THREAD_FPR0) + lsx_restore_upper $vr29, \base, \tmp, (THREAD_FPR29-THREAD_FPR0) + lsx_restore_upper $vr30, \base, \tmp, (THREAD_FPR30-THREAD_FPR0) + lsx_restore_upper $vr31, \base, \tmp, (THREAD_FPR31-THREAD_FPR0) + .endm + + .macro lsx_init_upper vd tmp + parse_vr __vd, \vd + parse_r __tmp, \tmp + /* vinsgr2vr opcode is 0xe5d7e */ + .word (0xe5d7e << 11 | 1 << 10 | __tmp << 5 | __vd) + .endm + + .macro lsx_init_all_upper tmp + not \tmp, zero + lsx_init_upper $vr0 \tmp + lsx_init_upper $vr1 \tmp + lsx_init_upper $vr2 \tmp + lsx_init_upper $vr3 \tmp + lsx_init_upper $vr4 \tmp + lsx_init_upper $vr5 \tmp + lsx_init_upper $vr6 \tmp + lsx_init_upper $vr7 \tmp + lsx_init_upper $vr8 \tmp + lsx_init_upper $vr9 \tmp + lsx_init_upper $vr10 \tmp + lsx_init_upper $vr11 \tmp + lsx_init_upper $vr12 \tmp + lsx_init_upper $vr13 \tmp + lsx_init_upper $vr14 \tmp + lsx_init_upper $vr15 \tmp + lsx_init_upper $vr16 \tmp + lsx_init_upper $vr17 \tmp + lsx_init_upper $vr18 \tmp + lsx_init_upper $vr19 \tmp + lsx_init_upper $vr20 \tmp + lsx_init_upper $vr21 \tmp + lsx_init_upper $vr22 \tmp + lsx_init_upper $vr23 \tmp + lsx_init_upper $vr24 \tmp + lsx_init_upper $vr25 \tmp + lsx_init_upper $vr26 \tmp + lsx_init_upper $vr27 \tmp + lsx_init_upper $vr28 \tmp + lsx_init_upper $vr29 \tmp + lsx_init_upper $vr30 \tmp + lsx_init_upper $vr31 \tmp + .endm + + .macro lasx_save_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* xvst opcode is 0xb3 */ + .word (0xb3 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb3 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb3 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb3 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb3 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb3 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb3 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb3 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb3 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb3 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb3 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb3 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb3 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb3 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb3 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb3 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb3 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb3 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb3 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb3 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb3 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb3 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb3 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb3 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb3 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb3 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb3 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb3 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb3 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb3 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb3 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb3 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_restore_data thread tmp + parse_r __tmp, \tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \tmp, \thread, \tmp + /* xvld opcode is 0xb2 */ + .word (0xb2 << 22 | ((THREAD_FPR0-THREAD_FPR0) << 10) | __tmp << 5 | 0) + .word (0xb2 << 22 | ((THREAD_FPR1-THREAD_FPR0) << 10) | __tmp << 5 | 1) + .word (0xb2 << 22 | ((THREAD_FPR2-THREAD_FPR0) << 10) | __tmp << 5 | 2) + .word (0xb2 << 22 | ((THREAD_FPR3-THREAD_FPR0) << 10) | __tmp << 5 | 3) + .word (0xb2 << 22 | ((THREAD_FPR4-THREAD_FPR0) << 10) | __tmp << 5 | 4) + .word (0xb2 << 22 | ((THREAD_FPR5-THREAD_FPR0) << 10) | __tmp << 5 | 5) + .word (0xb2 << 22 | ((THREAD_FPR6-THREAD_FPR0) << 10) | __tmp << 5 | 6) + .word (0xb2 << 22 | ((THREAD_FPR7-THREAD_FPR0) << 10) | __tmp << 5 | 7) + .word (0xb2 << 22 | ((THREAD_FPR8-THREAD_FPR0) << 10) | __tmp << 5 | 8) + .word (0xb2 << 22 | ((THREAD_FPR9-THREAD_FPR0) << 10) | __tmp << 5 | 9) + .word (0xb2 << 22 | ((THREAD_FPR10-THREAD_FPR0) << 10) | __tmp << 5 | 10) + .word (0xb2 << 22 | ((THREAD_FPR11-THREAD_FPR0) << 10) | __tmp << 5 | 11) + .word (0xb2 << 22 | ((THREAD_FPR12-THREAD_FPR0) << 10) | __tmp << 5 | 12) + .word (0xb2 << 22 | ((THREAD_FPR13-THREAD_FPR0) << 10) | __tmp << 5 | 13) + .word (0xb2 << 22 | ((THREAD_FPR14-THREAD_FPR0) << 10) | __tmp << 5 | 14) + .word (0xb2 << 22 | ((THREAD_FPR15-THREAD_FPR0) << 10) | __tmp << 5 | 15) + .word (0xb2 << 22 | ((THREAD_FPR16-THREAD_FPR0) << 10) | __tmp << 5 | 16) + .word (0xb2 << 22 | ((THREAD_FPR17-THREAD_FPR0) << 10) | __tmp << 5 | 17) + .word (0xb2 << 22 | ((THREAD_FPR18-THREAD_FPR0) << 10) | __tmp << 5 | 18) + .word (0xb2 << 22 | ((THREAD_FPR19-THREAD_FPR0) << 10) | __tmp << 5 | 19) + .word (0xb2 << 22 | ((THREAD_FPR20-THREAD_FPR0) << 10) | __tmp << 5 | 20) + .word (0xb2 << 22 | ((THREAD_FPR21-THREAD_FPR0) << 10) | __tmp << 5 | 21) + .word (0xb2 << 22 | ((THREAD_FPR22-THREAD_FPR0) << 10) | __tmp << 5 | 22) + .word (0xb2 << 22 | ((THREAD_FPR23-THREAD_FPR0) << 10) | __tmp << 5 | 23) + .word (0xb2 << 22 | ((THREAD_FPR24-THREAD_FPR0) << 10) | __tmp << 5 | 24) + .word (0xb2 << 22 | ((THREAD_FPR25-THREAD_FPR0) << 10) | __tmp << 5 | 25) + .word (0xb2 << 22 | ((THREAD_FPR26-THREAD_FPR0) << 10) | __tmp << 5 | 26) + .word (0xb2 << 22 | ((THREAD_FPR27-THREAD_FPR0) << 10) | __tmp << 5 | 27) + .word (0xb2 << 22 | ((THREAD_FPR28-THREAD_FPR0) << 10) | __tmp << 5 | 28) + .word (0xb2 << 22 | ((THREAD_FPR29-THREAD_FPR0) << 10) | __tmp << 5 | 29) + .word (0xb2 << 22 | ((THREAD_FPR30-THREAD_FPR0) << 10) | __tmp << 5 | 30) + .word (0xb2 << 22 | ((THREAD_FPR31-THREAD_FPR0) << 10) | __tmp << 5 | 31) + .endm + + .macro lasx_save_all thread tmp0 tmp1 + fpu_save_cc \thread, \tmp0, \tmp1 + fpu_save_csr \thread, \tmp0 + lasx_save_data \thread, \tmp0 + .endm + + .macro lasx_restore_all thread tmp0 tmp1 + lasx_restore_data \thread, \tmp0 + fpu_restore_cc \thread, \tmp0, \tmp1 + fpu_restore_csr \thread, \tmp0 + .endm + + .macro lasx_save_upper xd base tmp off + /* Nothing */ + .endm + + .macro lasx_save_all_upper thread base tmp + /* Nothing */ + .endm + + .macro lasx_restore_upper xd base tmp off + parse_xr __xd, \xd + parse_xr __xt, \tmp + parse_r __base, \base + /* vld opcode is 0xb0 */ + .word (0xb0 << 22 | (\off+16) << 10 | __base << 5 | __xt) + /* xvpermi.q opcode is 0x1dfb */ + .word (0x1dfb << 18 | 0x2 << 10 | __xt << 5 | __xd) + .endm + + .macro lasx_restore_all_upper thread base tmp + li.w \tmp, THREAD_FPR0 + PTR_ADD \base, \thread, \tmp + /* Save $vr31, xvpickve2gr opcode is 0x76efe */ + .word (0x76efe << 12 | 0 << 10 | 31 << 5 | 0x11) + .word (0x76efe << 12 | 1 << 10 | 31 << 5 | 0x12) + lasx_restore_upper $xr0, \base, $xr31, (THREAD_FPR0-THREAD_FPR0) + lasx_restore_upper $xr1, \base, $xr31, (THREAD_FPR1-THREAD_FPR0) + lasx_restore_upper $xr2, \base, $xr31, (THREAD_FPR2-THREAD_FPR0) + lasx_restore_upper $xr3, \base, $xr31, (THREAD_FPR3-THREAD_FPR0) + lasx_restore_upper $xr4, \base, $xr31, (THREAD_FPR4-THREAD_FPR0) + lasx_restore_upper $xr5, \base, $xr31, (THREAD_FPR5-THREAD_FPR0) + lasx_restore_upper $xr6, \base, $xr31, (THREAD_FPR6-THREAD_FPR0) + lasx_restore_upper $xr7, \base, $xr31, (THREAD_FPR7-THREAD_FPR0) + lasx_restore_upper $xr8, \base, $xr31, (THREAD_FPR8-THREAD_FPR0) + lasx_restore_upper $xr9, \base, $xr31, (THREAD_FPR9-THREAD_FPR0) + lasx_restore_upper $xr10, \base, $xr31, (THREAD_FPR10-THREAD_FPR0) + lasx_restore_upper $xr11, \base, $xr31, (THREAD_FPR11-THREAD_FPR0) + lasx_restore_upper $xr12, \base, $xr31, (THREAD_FPR12-THREAD_FPR0) + lasx_restore_upper $xr13, \base, $xr31, (THREAD_FPR13-THREAD_FPR0) + lasx_restore_upper $xr14, \base, $xr31, (THREAD_FPR14-THREAD_FPR0) + lasx_restore_upper $xr15, \base, $xr31, (THREAD_FPR15-THREAD_FPR0) + lasx_restore_upper $xr16, \base, $xr31, (THREAD_FPR16-THREAD_FPR0) + lasx_restore_upper $xr17, \base, $xr31, (THREAD_FPR17-THREAD_FPR0) + lasx_restore_upper $xr18, \base, $xr31, (THREAD_FPR18-THREAD_FPR0) + lasx_restore_upper $xr19, \base, $xr31, (THREAD_FPR19-THREAD_FPR0) + lasx_restore_upper $xr20, \base, $xr31, (THREAD_FPR20-THREAD_FPR0) + lasx_restore_upper $xr21, \base, $xr31, (THREAD_FPR21-THREAD_FPR0) + lasx_restore_upper $xr22, \base, $xr31, (THREAD_FPR22-THREAD_FPR0) + lasx_restore_upper $xr23, \base, $xr31, (THREAD_FPR23-THREAD_FPR0) + lasx_restore_upper $xr24, \base, $xr31, (THREAD_FPR24-THREAD_FPR0) + lasx_restore_upper $xr25, \base, $xr31, (THREAD_FPR25-THREAD_FPR0) + lasx_restore_upper $xr26, \base, $xr31, (THREAD_FPR26-THREAD_FPR0) + lasx_restore_upper $xr27, \base, $xr31, (THREAD_FPR27-THREAD_FPR0) + lasx_restore_upper $xr28, \base, $xr31, (THREAD_FPR28-THREAD_FPR0) + lasx_restore_upper $xr29, \base, $xr31, (THREAD_FPR29-THREAD_FPR0) + lasx_restore_upper $xr30, \base, $xr31, (THREAD_FPR30-THREAD_FPR0) + lasx_restore_upper $xr31, \base, $xr31, (THREAD_FPR31-THREAD_FPR0) + /* Restore $vr31, xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 0 << 10 | 0x11 << 5 | 31) + .word (0x76ebe << 12 | 1 << 10 | 0x12 << 5 | 31) + .endm + + .macro lasx_init_upper xd tmp + parse_xr __xd, \xd + parse_r __tmp, \tmp + /* xvinsgr2vr opcode is 0x76ebe */ + .word (0x76ebe << 12 | 2 << 10 | __tmp << 5 | __xd) + .word (0x76ebe << 12 | 3 << 10 | __tmp << 5 | __xd) + .endm + + .macro lasx_init_all_upper tmp + not \tmp, zero + lasx_init_upper $xr0 \tmp + lasx_init_upper $xr1 \tmp + lasx_init_upper $xr2 \tmp + lasx_init_upper $xr3 \tmp + lasx_init_upper $xr4 \tmp + lasx_init_upper $xr5 \tmp + lasx_init_upper $xr6 \tmp + lasx_init_upper $xr7 \tmp + lasx_init_upper $xr8 \tmp + lasx_init_upper $xr9 \tmp + lasx_init_upper $xr10 \tmp + lasx_init_upper $xr11 \tmp + lasx_init_upper $xr12 \tmp + lasx_init_upper $xr13 \tmp + lasx_init_upper $xr14 \tmp + lasx_init_upper $xr15 \tmp + lasx_init_upper $xr16 \tmp + lasx_init_upper $xr17 \tmp + lasx_init_upper $xr18 \tmp + lasx_init_upper $xr19 \tmp + lasx_init_upper $xr20 \tmp + lasx_init_upper $xr21 \tmp + lasx_init_upper $xr22 \tmp + lasx_init_upper $xr23 \tmp + lasx_init_upper $xr24 \tmp + lasx_init_upper $xr25 \tmp + lasx_init_upper $xr26 \tmp + lasx_init_upper $xr27 \tmp + lasx_init_upper $xr28 \tmp + lasx_init_upper $xr29 \tmp + lasx_init_upper $xr30 \tmp + lasx_init_upper $xr31 \tmp + .endm + .macro jr dst jirl zero, \dst, 0 .endm diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index d53ab744e151..4d81e3b6e908 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -26,6 +26,26 @@ extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); +extern void _save_lsx(struct loongarch_fpu *fpu); +extern void _restore_lsx(struct loongarch_fpu *fpu); +extern void _init_lsx_upper(void); +extern void _restore_lsx_upper(struct loongarch_fpu *fpu); + +extern void _save_lasx(struct loongarch_fpu *fpu); +extern void _restore_lasx(struct loongarch_fpu *fpu); +extern void _init_lasx_upper(void); +extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +static inline void enable_lsx(void); +static inline void disable_lsx(void); +static inline void save_lsx(struct task_struct *t); +static inline void restore_lsx(struct task_struct *t); + +static inline void enable_lasx(void); +static inline void disable_lasx(void); +static inline void save_lasx(struct task_struct *t); +static inline void restore_lasx(struct task_struct *t); + /* * Mask the FCSR Cause bits according to the Enable bits, observing * that Unimplemented is always enabled. @@ -42,6 +62,29 @@ static inline int is_fp_enabled(void) 1 : 0; } +static inline int is_lsx_enabled(void) +{ + if (!cpu_has_lsx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LSXEN) ? + 1 : 0; +} + +static inline int is_lasx_enabled(void) +{ + if (!cpu_has_lasx) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LASXEN) ? + 1 : 0; +} + +static inline int is_simd_enabled(void) +{ + return is_lsx_enabled() | is_lasx_enabled(); +} + #define enable_fpu() \ do { \ set_csr_euen(CSR_EUEN_FPEN); \ @@ -85,9 +128,22 @@ static inline void own_fpu(int restore) static inline void lose_fpu_inatomic(int save, struct task_struct *tsk) { if (is_fpu_owner()) { - if (save) - _save_fp(&tsk->thread.fpu); - disable_fpu(); + if (!is_simd_enabled()) { + if (save) + _save_fp(&tsk->thread.fpu); + disable_fpu(); + } else { + if (save) { + if (!is_lasx_enabled()) + save_lsx(tsk); + else + save_lasx(tsk); + } + disable_fpu(); + disable_lsx(); + disable_lasx(); + clear_tsk_thread_flag(tsk, TIF_USEDSIMD); + } clear_tsk_thread_flag(tsk, TIF_USEDFPU); } KSTK_EUEN(tsk) &= ~(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); @@ -133,4 +189,127 @@ static inline union fpureg *get_fpu_regs(struct task_struct *tsk) return tsk->thread.fpu.fpr; } +static inline int is_simd_owner(void) +{ + return test_thread_flag(TIF_USEDSIMD); +} + +#ifdef CONFIG_CPU_HAS_LSX + +static inline void enable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(CSR_EUEN_LSXEN, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lsx(void) +{ + if (cpu_has_lsx) + csr_xchg32(0, CSR_EUEN_LSXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _save_lsx(&t->thread.fpu); +} + +static inline void restore_lsx(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx(&t->thread.fpu); +} + +static inline void init_lsx_upper(void) +{ + /* + * Check cpu_has_lsx only if it's a constant. This will allow the + * compiler to optimise out code for CPUs without LSX without adding + * an extra redundant check for CPUs with LSX. + */ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return; + + _init_lsx_upper(); +} + +static inline void restore_lsx_upper(struct task_struct *t) +{ + if (cpu_has_lsx) + _restore_lsx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lsx(void) {} +static inline void disable_lsx(void) {} +static inline void save_lsx(struct task_struct *t) {} +static inline void restore_lsx(struct task_struct *t) {} +static inline void init_lsx_upper(void) {} +static inline void restore_lsx_upper(struct task_struct *t) {} +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +static inline void enable_lasx(void) +{ + + if (cpu_has_lasx) + csr_xchg32(CSR_EUEN_LASXEN, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lasx(void) +{ + if (cpu_has_lasx) + csr_xchg32(0, CSR_EUEN_LASXEN, LOONGARCH_CSR_EUEN); +} + +static inline void save_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _save_lasx(&t->thread.fpu); +} + +static inline void restore_lasx(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx(&t->thread.fpu); +} + +static inline void init_lasx_upper(void) +{ + if (cpu_has_lasx) + _init_lasx_upper(); +} + +static inline void restore_lasx_upper(struct task_struct *t) +{ + if (cpu_has_lasx) + _restore_lasx_upper(&t->thread.fpu); +} + +#else +static inline void enable_lasx(void) {} +static inline void disable_lasx(void) {} +static inline void save_lasx(struct task_struct *t) {} +static inline void restore_lasx(struct task_struct *t) {} +static inline void init_lasx_upper(void) {} +static inline void restore_lasx_upper(struct task_struct *t) {} +#endif + +static inline int thread_lsx_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + return 0; + + return test_thread_flag(TIF_LSX_CTX_LIVE); +} + +static inline int thread_lasx_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + return 0; + + return test_thread_flag(TIF_LASX_CTX_LIVE); +} + #endif /* _ASM_FPU_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index 2a6efcc760c8..17250e696421 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -41,9 +41,19 @@ struct user_pt_regs { } __attribute__((aligned(8))); struct user_fp_state { - uint64_t fpr[32]; - uint64_t fcc; - uint32_t fcsr; + uint64_t fpr[32]; + uint64_t fcc; + uint32_t fcsr; +}; + +struct user_lsx_state { + /* 32 registers, 128 bits width per register. */ + uint64_t vregs[32*2]; +}; + +struct user_lasx_state { + /* 32 registers, 256 bits width per register. */ + uint64_t vregs[32*4]; }; #define PTRACE_SYSEMU 0x1f diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index aeea923d0597..0d11a600101c 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -47,7 +47,6 @@ struct lsx_context { __u64 regs[2*32]; __u64 fcc; __u32 fcsr; - __u32 vcsr; }; /* LASX context */ @@ -57,7 +56,6 @@ struct lasx_context { __u64 regs[4*32]; __u64 fcc; __u32 fcsr; - __u32 vcsr; }; #endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index b249cbb807e7..023b43b55238 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -111,6 +111,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_FPU; elf_hwcap |= HWCAP_LOONGARCH_FPU; } +#ifdef CONFIG_CPU_HAS_LSX + if (config & CPUCFG2_LSX) { + c->options |= LOONGARCH_CPU_LSX; + elf_hwcap |= HWCAP_LOONGARCH_LSX; + } +#endif +#ifdef CONFIG_CPU_HAS_LASX + if (config & CPUCFG2_LASX) { + c->options |= LOONGARCH_CPU_LASX; + elf_hwcap |= HWCAP_LOONGARCH_LASX; + } +#endif if (config & CPUCFG2_COMPLEX) { c->options |= LOONGARCH_CPU_COMPLEX; elf_hwcap |= HWCAP_LOONGARCH_COMPLEX; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 86fad21241ef..078f2f7aae4a 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -28,6 +28,26 @@ _asm_extable .ex\@, fault .endm + .macro EX_V insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_vr __reg, \reg +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + _asm_extable .ex\@, fault + .endm + + .macro EX_XV insn, reg, src, offs + parse_v __insn, \insn + parse_v __offs, \offs + parse_r __src, \src + parse_xr __reg, \reg +.ex\@: + .word __insn << 22 | __offs << 10 | __src << 5 | __reg + _asm_extable .ex\@, fault + .endm + .macro sc_save_fp base EX fst.d $f0, \base, (0 * FPU_REG_WIDTH) EX fst.d $f1, \base, (1 * FPU_REG_WIDTH) @@ -148,6 +168,146 @@ movgr2fcsr fcsr0, \tmp0 .endm + .macro sc_save_lsx base + EX_V 0xb1 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb1 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb1 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb1 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb1 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb1 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb1 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb1 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb1 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb1 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb1 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb1 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb1 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb1 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb1 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb1 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb1 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb1 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb1 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb1 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb1 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb1 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb1 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb1 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb1 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb1 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb1 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb1 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb1 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb1 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb1 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb1 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_restore_lsx base + EX_V 0xb0 $vr0, \base, (0 * LSX_REG_WIDTH) + EX_V 0xb0 $vr1, \base, (1 * LSX_REG_WIDTH) + EX_V 0xb0 $vr2, \base, (2 * LSX_REG_WIDTH) + EX_V 0xb0 $vr3, \base, (3 * LSX_REG_WIDTH) + EX_V 0xb0 $vr4, \base, (4 * LSX_REG_WIDTH) + EX_V 0xb0 $vr5, \base, (5 * LSX_REG_WIDTH) + EX_V 0xb0 $vr6, \base, (6 * LSX_REG_WIDTH) + EX_V 0xb0 $vr7, \base, (7 * LSX_REG_WIDTH) + EX_V 0xb0 $vr8, \base, (8 * LSX_REG_WIDTH) + EX_V 0xb0 $vr9, \base, (9 * LSX_REG_WIDTH) + EX_V 0xb0 $vr10, \base, (10 * LSX_REG_WIDTH) + EX_V 0xb0 $vr11, \base, (11 * LSX_REG_WIDTH) + EX_V 0xb0 $vr12, \base, (12 * LSX_REG_WIDTH) + EX_V 0xb0 $vr13, \base, (13 * LSX_REG_WIDTH) + EX_V 0xb0 $vr14, \base, (14 * LSX_REG_WIDTH) + EX_V 0xb0 $vr15, \base, (15 * LSX_REG_WIDTH) + EX_V 0xb0 $vr16, \base, (16 * LSX_REG_WIDTH) + EX_V 0xb0 $vr17, \base, (17 * LSX_REG_WIDTH) + EX_V 0xb0 $vr18, \base, (18 * LSX_REG_WIDTH) + EX_V 0xb0 $vr19, \base, (19 * LSX_REG_WIDTH) + EX_V 0xb0 $vr20, \base, (20 * LSX_REG_WIDTH) + EX_V 0xb0 $vr21, \base, (21 * LSX_REG_WIDTH) + EX_V 0xb0 $vr22, \base, (22 * LSX_REG_WIDTH) + EX_V 0xb0 $vr23, \base, (23 * LSX_REG_WIDTH) + EX_V 0xb0 $vr24, \base, (24 * LSX_REG_WIDTH) + EX_V 0xb0 $vr25, \base, (25 * LSX_REG_WIDTH) + EX_V 0xb0 $vr26, \base, (26 * LSX_REG_WIDTH) + EX_V 0xb0 $vr27, \base, (27 * LSX_REG_WIDTH) + EX_V 0xb0 $vr28, \base, (28 * LSX_REG_WIDTH) + EX_V 0xb0 $vr29, \base, (29 * LSX_REG_WIDTH) + EX_V 0xb0 $vr30, \base, (30 * LSX_REG_WIDTH) + EX_V 0xb0 $vr31, \base, (31 * LSX_REG_WIDTH) + .endm + + .macro sc_save_lasx base + EX_XV 0xb3 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb3 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + + .macro sc_restore_lasx base + EX_XV 0xb2 $xr0, \base, (0 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr1, \base, (1 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr2, \base, (2 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr3, \base, (3 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr4, \base, (4 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr5, \base, (5 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr6, \base, (6 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr7, \base, (7 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr8, \base, (8 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr9, \base, (9 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr10, \base, (10 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr11, \base, (11 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr12, \base, (12 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr13, \base, (13 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr14, \base, (14 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr15, \base, (15 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr16, \base, (16 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr17, \base, (17 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr18, \base, (18 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr19, \base, (19 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr20, \base, (20 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr21, \base, (21 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr22, \base, (22 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr23, \base, (23 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr24, \base, (24 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr25, \base, (25 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr26, \base, (26 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr27, \base, (27 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr28, \base, (28 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr29, \base, (29 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr30, \base, (30 * LASX_REG_WIDTH) + EX_XV 0xb2 $xr31, \base, (31 * LASX_REG_WIDTH) + .endm + /* * Save a thread's fp context. */ @@ -169,6 +329,76 @@ SYM_FUNC_START(_restore_fp) jirl zero, ra, 0 SYM_FUNC_END(_restore_fp) +#ifdef CONFIG_CPU_HAS_LSX + +/* + * Save a thread's LSX vector context. + */ +SYM_FUNC_START(_save_lsx) + lsx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx) +EXPORT_SYMBOL(_save_lsx) + +/* + * Restore a thread's LSX vector context. + */ +SYM_FUNC_START(_restore_lsx) + lsx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx) + +SYM_FUNC_START(_save_lsx_upper) + lsx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_upper) + +SYM_FUNC_START(_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_upper) + +SYM_FUNC_START(_init_lsx_upper) + lsx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX + +/* + * Save a thread's LASX vector context. + */ +SYM_FUNC_START(_save_lasx) + lasx_save_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx) +EXPORT_SYMBOL(_save_lasx) + +/* + * Restore a thread's LASX vector context. + */ +SYM_FUNC_START(_restore_lasx) + lasx_restore_all a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx) + +SYM_FUNC_START(_save_lasx_upper) + lasx_save_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_upper) + +SYM_FUNC_START(_restore_lasx_upper) + lasx_restore_all_upper a0 t0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_upper) + +SYM_FUNC_START(_init_lasx_upper) + lasx_init_all_upper t1 + jirl zero, ra, 0 +SYM_FUNC_END(_init_lasx_upper) +#endif + /* * Load the FPU with signalling NANS. This bit pattern we're using has * the property that no matter whether considered as single or as double @@ -247,6 +477,58 @@ SYM_FUNC_START(_restore_fp_context) jirl zero, ra, 0 SYM_FUNC_END(_restore_fp_context) +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lsx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lsx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lsx_context) + sc_restore_lsx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lsx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_save_lasx_context) + sc_save_fcc a1, t0, t1 + sc_save_fcsr a2, t0 + sc_save_lasx a0 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_lasx_context) + +/* + * a0: fpregs + * a1: fcc + * a2: fcsr + */ +SYM_FUNC_START(_restore_lasx_context) + sc_restore_lasx a0 + sc_restore_fcc a1, t1, t2 + sc_restore_fcsr a2, t1 + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_lasx_context) + SYM_FUNC_START(fault) li.w a0, -EFAULT # failure jirl zero, ra, 0 diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index ed0aebfe95d4..dc71fe9b949c 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -98,8 +98,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ preempt_disable(); - if (is_fpu_owner()) - save_fp(current); + if (is_fpu_owner()) { + if (is_lasx_enabled()) + save_lasx(current); + else if (is_lsx_enabled()) + save_lsx(current); + else + save_fp(current); + } preempt_enable(); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 6d893db98d1d..9dc348fcb987 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -240,6 +240,90 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LSX + +static void copy_pad_fprs(struct task_struct *target, + const struct user_regset *regset, + struct membuf *to, unsigned int live_sz) +{ + int i, j; + unsigned long long fill = ~0ull; + unsigned int cp_sz, pad_sz; + + cp_sz = min(regset->size, live_sz); + pad_sz = regset->size - cp_sz; + WARN_ON(pad_sz % sizeof(fill)); + + for (i = 0; i < NUM_FPU_REGS; i++) { + membuf_write(to, &target->thread.fpu.fpr[i], cp_sz); + for (j = 0; j < (pad_sz / sizeof(fill)); j++) { + membuf_store(to, fill); + } + } +} + +static int simd_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + + if (!tsk_used_math(target)) { + /* The task hasn't used FP or LSX, fill with 0xff */ + copy_pad_fprs(target, regset, &to, 0); + } else if (!test_tsk_thread_flag(target, TIF_LSX_CTX_LIVE)) { + /* Copy scalar FP context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 8); +#ifdef CONFIG_CPU_HAS_LASX + } else if (!test_tsk_thread_flag(target, TIF_LASX_CTX_LIVE)) { + /* Copy LSX 128 Bit context, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, 16); +#endif + } else if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + membuf_write(&to, &target->thread.fpu.fpr, wr_size); + } else { + /* Copy as much context as possible, fill the rest with 0xff */ + copy_pad_fprs(target, regset, &to, sizeof(target->thread.fpu.fpr[0])); + } + + return 0; +} + +static int simd_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + const unsigned int wr_size = NUM_FPU_REGS * regset->size; + unsigned int cp_sz; + int i, err, start; + + init_fp_ctx(target); + + if (sizeof(target->thread.fpu.fpr[0]) == regset->size) { + /* Trivially copy the vector registers */ + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr, + 0, wr_size); + } else { + /* Copy as much context as possible */ + cp_sz = min_t(unsigned int, regset->size, + sizeof(target->thread.fpu.fpr[0])); + + i = start = err = 0; + for (; i < NUM_FPU_REGS; i++, start += regset->size) { + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.fpr[i], + start, start + cp_sz); + } + } + + return err; +} + +#endif /* CONFIG_CPU_HAS_LSX */ + struct pt_regs_offset { const char *name; int offset; @@ -312,6 +396,12 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LSX + REGSET_LSX, +#endif +#ifdef CONFIG_CPU_HAS_LASX + REGSET_LASX, +#endif }; static const struct user_regset loongarch64_regsets[] = { @@ -339,6 +429,26 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LSX + [REGSET_LSX] = { + .core_note_type = NT_LOONGARCH_LSX, + .n = NUM_FPU_REGS, + .size = 16, + .align = 16, + .regset_get = simd_get, + .set = simd_set, + }, +#endif +#ifdef CONFIG_CPU_HAS_LASX + [REGSET_LASX] = { + .core_note_type = NT_LOONGARCH_LASX, + .n = NUM_FPU_REGS, + .size = 32, + .align = 32, + .regset_get = simd_get, + .set = simd_set, + }, +#endif }; static const struct user_regset_view user_loongarch64_view = { diff --git a/arch/loongarch/kernel/signal-common.h b/arch/loongarch/kernel/signal-common.h index 004b540bd0b9..91e890eb3a00 100644 --- a/arch/loongarch/kernel/signal-common.h +++ b/arch/loongarch/kernel/signal-common.h @@ -47,5 +47,13 @@ extern asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); extern asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +extern asmlinkage int +_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +extern asmlinkage int +_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); #endif /* __SIGNAL_COMMON_H */ diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 5bf0c01c8312..2fd6dc72e1c2 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -85,6 +85,96 @@ static int copy_fpu_from_sigcontext(struct fpu_context __user *ctx) return err; } +static int copy_lsx_to_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[2*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[2*i+1]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lsx_from_sigcontext(struct lsx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[2*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[2*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_to_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 0), + ®s[4*i]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), + ®s[4*i+1]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 2), + ®s[4*i+2]); + err |= __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 3), + ®s[4*i+3]); + } + err |= __put_user(current->thread.fpu.fcc, fcc); + err |= __put_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + +static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) +{ + int i; + int err = 0; + u64 fpr_val; + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + for (i = 0; i < NUM_FPU_REGS; i++) { + err |= __get_user(fpr_val, ®s[4*i]); + set_fpr64(¤t->thread.fpu.fpr[i], 0, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+1]); + set_fpr64(¤t->thread.fpu.fpr[i], 1, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+2]); + set_fpr64(¤t->thread.fpu.fpr[i], 2, fpr_val); + err |= __get_user(fpr_val, ®s[4*i+3]); + set_fpr64(¤t->thread.fpu.fpr[i], 3, fpr_val); + } + err |= __get_user(current->thread.fpu.fcc, fcc); + err |= __get_user(current->thread.fpu.fcsr, fcsr); + + return err; +} + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -106,6 +196,42 @@ static int restore_hw_fpu_context(struct fpu_context __user *ctx) return _restore_fp_context(regs, fcc, fcsr); } +static int save_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lsx_context(regs, fcc, fcsr); +} + +static int restore_hw_lsx_context(struct lsx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lsx_context(regs, fcc, fcsr); +} + +static int save_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _save_lasx_context(regs, fcc, fcsr); +} + +static int restore_hw_lasx_context(struct lasx_context __user *ctx) +{ + uint64_t __user *regs = (uint64_t *)&ctx->regs; + uint64_t __user *fcc = &ctx->fcc; + uint32_t __user *fcsr = &ctx->fcsr; + + return _restore_lasx_context(regs, fcc, fcsr); +} + /* * Helper routines */ @@ -178,6 +304,162 @@ static int protected_restore_fpu_context(struct extctx_layout *extctx) return err ?: sig; } +static int protected_save_lsx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = save_hw_lsx_context(lsx_ctx); + else { + if (is_fpu_owner()) + save_fp(current); + err = copy_lsx_to_sigcontext(lsx_ctx); + } + unlock_fpu_owner(); + + err |= __put_user(LSX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lsx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*2-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lsx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lsx.addr; + struct lsx_context __user *lsx_ctx = (struct lsx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lsx_ctx->regs; + uint64_t __user *fcc = &lsx_ctx->fcc; + uint32_t __user *fcsr = &lsx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lsx_enabled()) + err = restore_hw_lsx_context(lsx_ctx); + else { + err = copy_lsx_from_sigcontext(lsx_ctx); + if (is_fpu_owner()) + restore_fp(current); + } + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LSX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*2-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + +static int protected_save_lasx_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = save_hw_lasx_context(lasx_ctx); + else { + if (is_lsx_enabled()) + save_lsx(current); + else if (is_fpu_owner()) + save_fp(current); + err = copy_lasx_to_sigcontext(lasx_ctx); + } + unlock_fpu_owner(); + + err |= __put_user(LASX_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lasx.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __put_user(0, ®s[0]) | + __put_user(0, ®s[32*4-1]) | + __put_user(0, fcc) | + __put_user(0, fcsr); + if (err) + return err; /* really bad sigcontext */ + } + + return err; +} + +static int protected_restore_lasx_context(struct extctx_layout *extctx) +{ + int err = 0, sig = 0, tmp __maybe_unused; + struct sctx_info __user *info = extctx->lasx.addr; + struct lasx_context __user *lasx_ctx = + (struct lasx_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *regs = (uint64_t *)&lasx_ctx->regs; + uint64_t __user *fcc = &lasx_ctx->fcc; + uint32_t __user *fcsr = &lasx_ctx->fcsr; + + err = sig = fcsr_pending(fcsr); + if (err < 0) + return err; + + while (1) { + lock_fpu_owner(); + if (is_lasx_enabled()) + err = restore_hw_lasx_context(lasx_ctx); + else { + err = copy_lasx_from_sigcontext(lasx_ctx); + if (is_lsx_enabled()) + restore_lsx(current); + else if (is_fpu_owner()) + restore_fp(current); + } + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LASX context and try again */ + err = __get_user(tmp, ®s[0]) | + __get_user(tmp, ®s[32*4-1]) | + __get_user(tmp, fcc) | + __get_user(tmp, fcsr); + if (err) + break; /* really bad sigcontext */ + } + + return err ?: sig; +} + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -191,7 +473,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, for (i = 1; i < 32; i++) err |= __put_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx->fpu.addr) + if (extctx->lasx.addr) + err |= protected_save_lasx_context(extctx); + else if (extctx->lsx.addr) + err |= protected_save_lsx_context(extctx); + else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); /* Set the "end" magic */ @@ -244,6 +530,20 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->fpu.addr = info; break; + case LSX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lsx_context))) + goto invalid; + extctx->lsx.addr = info; + break; + + case LASX_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lasx_context))) + goto invalid; + extctx->lasx.addr = info; + break; + default: goto invalid; } @@ -289,7 +589,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc for (i = 1; i < 32; i++) err |= __get_user(regs->regs[i], &sc->sc_regs[i]); - if (extctx.fpu.addr) + if (extctx.lasx.addr) + err |= protected_restore_lasx_context(&extctx); + else if (extctx.lsx.addr) + err |= protected_restore_lsx_context(&extctx); + else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); bad: @@ -345,7 +649,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon extctx->size += extctx->end.size; if (extctx->flags & SC_USED_FP) { - if (cpu_has_fpu) + if (cpu_has_lasx && thread_lasx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lasx, + sizeof(struct lasx_context), LASX_CTX_ALIGN, new_sp); + else if (cpu_has_lsx && thread_lsx_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lsx, + sizeof(struct lsx_context), LSX_CTX_ALIGN, new_sp); + else if (cpu_has_fpu) new_sp = extframe_alloc(extctx, &extctx->fpu, sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 0f77bdb8c489..85ecdc00ff0c 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -529,12 +529,67 @@ static void init_restore_fp(void) BUG_ON(!is_fp_enabled()); } +static void init_restore_lsx(void) +{ + enable_lsx(); + + if (!thread_lsx_context_live()) { + /* First time LSX context user */ + init_restore_fp(); + init_lsx_upper(); + set_thread_flag(TIF_LSX_CTX_LIVE); + } else { + if (!is_simd_owner()) { + if (is_fpu_owner()) { + restore_lsx_upper(current); + } else { + __own_fpu(); + restore_lsx(current); + } + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); +} + +static void init_restore_lasx(void) +{ + enable_lasx(); + + if (!thread_lasx_context_live()) { + /* First time LASX context user */ + init_restore_lsx(); + init_lasx_upper(); + set_thread_flag(TIF_LASX_CTX_LIVE); + } else { + if (is_fpu_owner() || is_simd_owner()) { + init_restore_lsx(); + restore_lasx_upper(current); + } else { + __own_fpu(); + enable_lsx(); + restore_lasx(current); + } + } + + set_thread_flag(TIF_USEDSIMD); + + BUG_ON(!is_fp_enabled()); + BUG_ON(!is_lsx_enabled()); + BUG_ON(!is_lasx_enabled()); +} + asmlinkage void noinstr do_fpu(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); local_irq_enable(); die_if_kernel("do_fpu invoked from kernel context!", regs); + BUG_ON(is_lsx_enabled()); + BUG_ON(is_lasx_enabled()); preempt_disable(); init_restore_fp(); @@ -547,8 +602,21 @@ asmlinkage void noinstr do_fpu(struct pt_regs *regs) asmlinkage void noinstr do_lsx(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lsx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lsx invoked from kernel context!", regs); + BUG_ON(is_lasx_enabled()); + + preempt_disable(); + init_restore_lsx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); } @@ -556,8 +624,20 @@ asmlinkage void noinstr do_lsx(struct pt_regs *regs) asmlinkage void noinstr do_lasx(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); + local_irq_enable(); - force_sig(SIGILL); + if (!cpu_has_lasx) { + force_sig(SIGILL); + goto out; + } + + die_if_kernel("do_lasx invoked from kernel context!", regs); + + preempt_disable(); + init_restore_lasx(); + preempt_enable(); + +out: local_irq_disable(); irqentry_exit(regs, state); } -- Gitee From b3dc79b5122a9fa0879af1e569654bb900d41c3a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 094/241] LoongArch: Add zboot (compressed kernel) support Change-Id: Id097c1cc161f3d8c3059f4254b1ba3a752b4254c Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 11 +++ arch/loongarch/Makefile | 42 +++++++- arch/loongarch/boot/Makefile | 54 +++++++++++ arch/loongarch/boot/compressed/Makefile | 81 ++++++++++++++++ .../boot/compressed/calc_vmlinuz_load_addr.c | 58 +++++++++++ arch/loongarch/boot/compressed/decompress.c | 97 +++++++++++++++++++ arch/loongarch/boot/compressed/dummy.c | 4 + arch/loongarch/boot/compressed/head.S | 48 +++++++++ arch/loongarch/boot/compressed/ld.script | 51 ++++++++++ arch/loongarch/boot/compressed/string.c | 46 +++++++++ arch/loongarch/tools/Makefile | 5 + arch/loongarch/tools/elf-entry.c | 66 +++++++++++++ 12 files changed, 562 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/boot/Makefile create mode 100644 arch/loongarch/boot/compressed/Makefile create mode 100644 arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c create mode 100644 arch/loongarch/boot/compressed/decompress.c create mode 100644 arch/loongarch/boot/compressed/dummy.c create mode 100644 arch/loongarch/boot/compressed/head.S create mode 100644 arch/loongarch/boot/compressed/ld.script create mode 100644 arch/loongarch/boot/compressed/string.c create mode 100644 arch/loongarch/tools/Makefile create mode 100644 arch/loongarch/tools/elf-entry.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 9b72f6d1cbc3..6a9cbb19aba1 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -127,6 +127,7 @@ config MACH_LOONGSON64 select SPARSE_IRQ select SYS_HAS_CPU_LOONGSON64 select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_ZBOOT select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL @@ -189,6 +190,16 @@ config CPU_LOONGSON64 endchoice +config SYS_SUPPORTS_ZBOOT + bool + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_LZ4 + select HAVE_KERNEL_LZMA + select HAVE_KERNEL_LZO + select HAVE_KERNEL_XZ + select HAVE_KERNEL_ZSTD + config SYS_HAS_CPU_LOONGSON64 bool diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 2553b83396c2..d0ab77b7bda5 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -3,6 +3,9 @@ # Author: Huacai Chen # Copyright (C) 2020 Loongson Technology Corporation Limited +archscripts: scripts_basic + $(Q)$(MAKE) $(build)=arch/loongarch/tools elf-entry + # # Select the object file format to substitute into the linker script. # @@ -31,6 +34,7 @@ cflags-y += -mabi=lp64s endif all-y := vmlinux +all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlinuz cflags-y += -G0 -pipe -msoft-float LDFLAGS_vmlinux += -G0 -static -n -nostdlib @@ -59,6 +63,15 @@ endif cflags-y += -ffreestanding +# +# Kernel compression +# +ifdef CONFIG_SYS_SUPPORTS_ZBOOT +KBUILD_IMAGE = vmlinuz +else +KBUILD_IMAGE = vmlinux +endif + # # Board-dependent options and extra files # @@ -68,13 +81,15 @@ ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif +entry-y = $(shell $(objtree)/arch/loongarch/tools/elf-entry vmlinux) drivers-$(CONFIG_PCI) += arch/loongarch/pci/ KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) PLATFORM="$(platform-y)" +bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + VMLINUX_ENTRY_ADDRESS=$(entry-y) PLATFORM="$(platform-y)" ifdef CONFIG_64BIT bootvars-y += ADDR_BITS=64 @@ -113,6 +128,10 @@ vdso_install: # boot image targets (arch/loongarch/boot/) boot-y := vmlinux.bin +# compressed boot image targets (arch/loongarch/boot/compressed/) +bootz-y := vmlinuz +bootz-y += vmlinuz.bin + all: $(all-y) # boot @@ -120,15 +139,36 @@ $(boot-y): vmlinux FORCE $(Q)$(MAKE) $(build)=arch/loongarch/boot VMLINUX=vmlinux \ $(bootvars-y) arch/loongarch/boot/$@ +ifdef CONFIG_SYS_SUPPORTS_ZBOOT +# boot/compressed +$(bootz-y): vmlinux FORCE + $(Q)$(MAKE) $(build)=arch/loongarch/boot/compressed \ + $(bootvars-y) 64bit-bfd=$(64bit-bfd) $@ +else +vmlinuz: FORCE + @echo ' CONFIG_SYS_SUPPORTS_ZBOOT is not enabled' + /bin/false +endif + CLEAN_FILES += vmlinux install: $(Q)install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) +ifdef CONFIG_SYS_SUPPORTS_ZBOOT + $(Q)install -D -m 755 vmlinuz $(INSTALL_PATH)/vmlinuz-$(KERNELRELEASE) +endif $(Q)install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) $(Q)install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) +archclean: + $(Q)$(MAKE) $(clean)=arch/loongarch/boot + $(Q)$(MAKE) $(clean)=arch/loongarch/boot/compressed + $(Q)$(MAKE) $(clean)=arch/loongarch/tools + define archhelp echo ' install - install kernel into $(INSTALL_PATH)' echo ' vmlinux.bin - Raw binary boot image' + echo ' vmlinuz - Compressed boot(zboot) image' + echo ' vmlinuz.bin - Raw binary zboot image' echo endef diff --git a/arch/loongarch/boot/Makefile b/arch/loongarch/boot/Makefile new file mode 100644 index 000000000000..b09257a75756 --- /dev/null +++ b/arch/loongarch/boot/Makefile @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Huacai Chen +# Copyright (C) 2020 Loongson Technology Corporation Limited + +# +# Drop some uninteresting sections in the kernel. +# This is only relevant for ELF kernels but doesn't hurt a.out +# +drop-sections := .comment .note .options +strip-flags := $(addprefix --remove-section=,$(drop-sections)) + +suffix-y := bin +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 +suffix-$(CONFIG_KERNEL_ZSTD) := zst + +targets += vmlinux.bin +quiet_cmd_bin = OBJCOPY $@ + cmd_bin = $(OBJCOPY) -O binary $(strip-flags) $(VMLINUX) $@ +$(obj)/vmlinux.bin: $(VMLINUX) FORCE + $(call if_changed,bin) + +# +# Compressed vmlinux images +# + +extra-y += vmlinux.bin.gz +extra-y += vmlinux.bin.bz2 +extra-y += vmlinux.bin.lzma +extra-y += vmlinux.bin.lzo +extra-y += vmlinux.bin.lz4 +extra-y += vmlinux.bin.zst + +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2) + +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) + +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) + +$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzo) + +$(obj)/vmlinux.bin.lz4: $(obj)/vmlinux.bin FORCE + $(call if_changed,lz4) + +$(obj)/vmlinux.bin.zst: $(obj)/vmlinux.bin FORCE + $(call if_changed,zst22) diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile new file mode 100644 index 000000000000..e3ed0f54a264 --- /dev/null +++ b/arch/loongarch/boot/compressed/Makefile @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Huacai Chen +# Copyright (C) 2020 Loongson Technology Corporation Limited + +include $(srctree)/arch/loongarch/Kbuild.platforms + +# set the default size of the mallocing area for decompressing +BOOT_HEAP_SIZE := 0x400000 + +# Disable Function Tracer +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS)) + +KBUILD_CFLAGS := $(filter-out -fstack-protector, $(KBUILD_CFLAGS)) + +KBUILD_CFLAGS := $(KBUILD_CFLAGS) -D__KERNEL__ \ + -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull" + +KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ + -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ + -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) + +# decompressor objects (linked with vmlinuz) +vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o + +vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o + +extra-y += ashldi3.c +$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE + $(call if_changed,shipped) + +targets := $(notdir $(vmlinuzobjs-y)) + +targets += vmlinux.bin +OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +tool_$(CONFIG_KERNEL_GZIP) = gzip +tool_$(CONFIG_KERNEL_BZIP2) = bzip2 +tool_$(CONFIG_KERNEL_LZ4) = lz4 +tool_$(CONFIG_KERNEL_LZMA) = lzma +tool_$(CONFIG_KERNEL_LZO) = lzo +tool_$(CONFIG_KERNEL_XZ) = xzkern +tool_$(CONFIG_KERNEL_ZSTD) = zstd22 + +targets += vmlinux.bin.z +$(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE + $(call if_changed,$(tool_y)) + +targets += piggy.o dummy.o +OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ + --set-section-flags=.image=contents,alloc,load,readonly,data +$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE + $(call if_changed,objcopy) + +HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) + +# Calculate the load address of the compressed kernel image +hostprogs := calc_vmlinuz_load_addr + +ifneq ($(zload-y),) +VMLINUZ_LOAD_ADDRESS := $(zload-y) +else +VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ + $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) +endif +UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) + +vmlinuzobjs-y += $(obj)/piggy.o + +quiet_cmd_zld = LD $@ + cmd_zld = $(LD) $(KBUILD_LDFLAGS) -Ttext $(VMLINUZ_LOAD_ADDRESS) -T $< $(vmlinuzobjs-y) -o $@ +quiet_cmd_strip = STRIP $@ + cmd_strip = $(STRIP) -s $@ +vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr + $(call cmd,zld) + $(call cmd,strip) + +clean-files += $(objtree)/vmlinuz +clean-files += $(objtree)/vmlinuz.bin diff --git a/arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c b/arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c new file mode 100644 index 000000000000..d14f75ec8273 --- /dev/null +++ b/arch/loongarch/boot/compressed/calc_vmlinuz_load_addr.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2010 "Wu Zhangjin" + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + unsigned long long vmlinux_size, vmlinux_load_addr, vmlinuz_load_addr; + struct stat sb; + + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", + argv[0]); + return EXIT_FAILURE; + } + + if (stat(argv[1], &sb) == -1) { + perror("stat"); + return EXIT_FAILURE; + } + + /* Convert hex characters to dec number */ + errno = 0; + if (sscanf(argv[2], "%llx", &vmlinux_load_addr) != 1) { + if (errno != 0) + perror("sscanf"); + else + fprintf(stderr, "No matching characters\n"); + + return EXIT_FAILURE; + } + + vmlinux_size = (uint64_t)sb.st_size; + vmlinuz_load_addr = vmlinux_load_addr + vmlinux_size; + + /* + * Align with 64KB: KEXEC needs load sections to be aligned to PAGE_SIZE, + * which may be as large as 64KB depending on the kernel configuration. + */ + + vmlinuz_load_addr += (SZ_64K - vmlinux_size % SZ_64K); + + printf("0x%llx\n", vmlinuz_load_addr); + + return EXIT_SUCCESS; +} diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c new file mode 100644 index 000000000000..099aad621aba --- /dev/null +++ b/arch/loongarch/boot/compressed/decompress.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include + +#include + +/* + * These two variables specify the free mem region + * that can be used for temporary malloc area + */ +unsigned long free_mem_ptr; +unsigned long free_mem_end_ptr; + +/* The linker tells us where the image is. */ +extern unsigned char __image_begin, __image_end; + +/* debug interfaces */ +#define puts(s) do {} while (0) +#define puthex(val) do {} while (0) + +void error(char *x) +{ + puts("\n\n"); + puts(x); + puts("\n\n -- System halted"); + + while (1) + ; /* Halt */ +} + +/* activate the code for pre-boot environment */ +#define STATIC static + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +#ifdef CONFIG_KERNEL_ZSTD +#include "../../../../lib/decompress_unzstd.c" +#endif + +void decompress_kernel(unsigned long boot_heap_start) +{ + unsigned long zimage_start, zimage_size; + + zimage_start = (unsigned long)(&__image_begin); + zimage_size = (unsigned long)(&__image_end) - + (unsigned long)(&__image_begin); + + puts("zimage at: "); + puthex(zimage_start); + puts(" "); + puthex(zimage_size + zimage_start); + puts("\n"); + + /* This area are prepared for mallocing when decompressing */ + free_mem_ptr = boot_heap_start; + free_mem_end_ptr = boot_heap_start + BOOT_HEAP_SIZE; + + /* Display standard Linux/LoongArch boot prompt */ + puts("Uncompressing Linux at load address "); + puthex(VMLINUX_LOAD_ADDRESS_ULL); + puts("\n"); + + /* Decompress the kernel with according algorithm */ + __decompress((char *)zimage_start, zimage_size, 0, 0, + (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); + + puts("Now, booting the kernel...\n"); +} diff --git a/arch/loongarch/boot/compressed/dummy.c b/arch/loongarch/boot/compressed/dummy.c new file mode 100644 index 000000000000..31dbf45bf99c --- /dev/null +++ b/arch/loongarch/boot/compressed/dummy.c @@ -0,0 +1,4 @@ +int main(void) +{ + return 0; +} diff --git a/arch/loongarch/boot/compressed/head.S b/arch/loongarch/boot/compressed/head.S new file mode 100644 index 000000000000..ad34d02ccf66 --- /dev/null +++ b/arch/loongarch/boot/compressed/head.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include +#include + +SYM_CODE_START(start) + /* Save boot rom start args */ + move s0, a0 + move s1, a1 + move s2, a2 + + /* Config Direct Mapping */ + li.d t0, CSR_DMW0_INIT + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT + csrwr t0, LOONGARCH_CSR_DMWIN1 + + /* Clear BSS */ + la.abs a0, _edata + la.abs a2, _end +1: st.d zero, a0, 0 + addi.d a0, a0, 8 + bne a2, a0, 1b + + la.abs a0, .heap /* heap address */ + la.abs sp, .stack + 8192 /* stack address */ + + la.pcrel ra, 2f + la.pcrel t4, decompress_kernel + jirl zero, t4, 0 +2: + move a0, s0 + move a1, s1 + move a2, s2 + PTR_LI t4, KERNEL_ENTRY + jirl zero, t4, 0 +3: + b 3b +SYM_CODE_END(start) + + .comm .heap,BOOT_HEAP_SIZE,4 + .comm .stack,4096*2,4 diff --git a/arch/loongarch/boot/compressed/ld.script b/arch/loongarch/boot/compressed/ld.script new file mode 100644 index 000000000000..2e32d8a72fdf --- /dev/null +++ b/arch/loongarch/boot/compressed/ld.script @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ld.script for compressed kernel support of LoongArch + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +OUTPUT_ARCH(loongarch) +ENTRY(start) +PHDRS { + text PT_LOAD FLAGS(7); /* RWX */ +} +SECTIONS +{ + /* Text and read-only data */ + /* . = VMLINUZ_LOAD_ADDRESS; */ + .text : { + *(.text) + *(.rodata) + }: text + /* End of text section */ + + /* Writable data */ + .data : { + *(.data) + /* Put the compressed image here */ + __image_begin = .; + *(.image) + __image_end = .; + CONSTRUCTORS + . = ALIGN(16); + } + + _edata = .; + /* End of data section */ + + /* BSS */ + .bss : { + *(.bss) + } + . = ALIGN(16); + _end = .; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.options) + *(.comment) + *(.note) + } +} diff --git a/arch/loongarch/boot/compressed/string.c b/arch/loongarch/boot/compressed/string.c new file mode 100644 index 000000000000..67e2355ae50e --- /dev/null +++ b/arch/loongarch/boot/compressed/string.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/loongarch/boot/compressed/string.c + * + * Very small subset of simple string routines + */ + +#include + +void __weak *memset(void *s, int c, size_t n) +{ + int i; + char *ss = s; + + for (i = 0; i < n; i++) + ss[i] = c; + return s; +} + +void __weak *memcpy(void *dest, const void *src, size_t n) +{ + int i; + const char *s = src; + char *d = dest; + + for (i = 0; i < n; i++) + d[i] = s[i]; + return dest; +} + +void __weak *memmove(void *dest, const void *src, size_t n) +{ + int i; + const char *s = src; + char *d = dest; + + if (d < s) { + for (i = 0; i < n; i++) + d[i] = s[i]; + } else if (d > s) { + for (i = n - 1; i >= 0; i--) + d[i] = s[i]; + } + + return dest; +} diff --git a/arch/loongarch/tools/Makefile b/arch/loongarch/tools/Makefile new file mode 100644 index 000000000000..0fc5c25937ba --- /dev/null +++ b/arch/loongarch/tools/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +hostprogs := elf-entry +PHONY += elf-entry +elf-entry: $(obj)/elf-entry + @: diff --git a/arch/loongarch/tools/elf-entry.c b/arch/loongarch/tools/elf-entry.c new file mode 100644 index 000000000000..c80721e0dee1 --- /dev/null +++ b/arch/loongarch/tools/elf-entry.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +__attribute__((noreturn)) +static void die(const char *msg) +{ + fputs(msg, stderr); + exit(EXIT_FAILURE); +} + +int main(int argc, const char *argv[]) +{ + uint64_t entry; + size_t nread; + FILE *file; + union { + Elf32_Ehdr ehdr32; + Elf64_Ehdr ehdr64; + } hdr; + + if (argc != 2) + die("Usage: elf-entry \n"); + + file = fopen(argv[1], "r"); + if (!file) { + perror("Unable to open input file"); + return EXIT_FAILURE; + } + + nread = fread(&hdr, 1, sizeof(hdr), file); + if (nread != sizeof(hdr)) { + fclose(file); + perror("Unable to read input file"); + return EXIT_FAILURE; + } + + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) { + fclose(file); + die("Input is not an ELF\n"); + } + + switch (hdr.ehdr32.e_ident[EI_CLASS]) { + case ELFCLASS32: + /* Sign extend to form a canonical address */ + entry = (int64_t)(int32_t)hdr.ehdr32.e_entry; + break; + + case ELFCLASS64: + entry = hdr.ehdr64.e_entry; + break; + + default: + fclose(file); + die("Invalid ELF class\n"); + } + + fclose(file); + printf("0x%016" PRIx64 "\n", entry); + + return EXIT_SUCCESS; +} -- Gitee From 458ae467c7f4f0bdf04176dad7126d908e0bb30c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 30 May 2021 13:04:34 +0800 Subject: [PATCH 095/241] LoongArch: Add STACKTRACE and TRACE_IRQFLAGS support Change-Id: Ic28268c3336b8b41fabf233d72a7a677ae707c21 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 5 + arch/loongarch/Kconfig.debug | 25 +++ arch/loongarch/include/asm/inst.h | 12 ++ arch/loongarch/include/asm/processor.h | 9 ++ arch/loongarch/include/asm/stacktrace.h | 22 +++ arch/loongarch/include/asm/switch_to.h | 8 +- arch/loongarch/include/asm/unwind.h | 43 +++++ arch/loongarch/kernel/Makefile | 4 + arch/loongarch/kernel/asm-offsets.c | 2 + arch/loongarch/kernel/process.c | 118 +++++++++++++- arch/loongarch/kernel/stacktrace.c | 163 +++++++++++++++++++ arch/loongarch/kernel/switch.S | 5 +- arch/loongarch/kernel/traps.c | 29 ++-- arch/loongarch/kernel/unwind_guess.c | 67 ++++++++ arch/loongarch/kernel/unwind_prologue.c | 198 ++++++++++++++++++++++++ 15 files changed, 694 insertions(+), 16 deletions(-) create mode 100644 arch/loongarch/include/asm/unwind.h create mode 100644 arch/loongarch/kernel/stacktrace.c create mode 100644 arch/loongarch/kernel/unwind_guess.c create mode 100644 arch/loongarch/kernel/unwind_prologue.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6a9cbb19aba1..d3cbee5599b2 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -108,6 +108,7 @@ config LOONGARCH select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_EXCEPTION_TRACE + select USER_STACKTRACE_SUPPORT menu "Machine selection" @@ -599,6 +600,10 @@ config LOCKDEP_SUPPORT bool default y +config STACKTRACE_SUPPORT + bool + default y + config TRACE_IRQFLAGS_SUPPORT bool default y diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index e69de29bb2d1..8d90f57cc019 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -0,0 +1,25 @@ +choice + prompt "Choose kernel unwinder" + default UNWINDER_PROLOGUE if KALLSYMS + help + This determines which method will be used for unwinding kernel stack + traces for panics, oopses, bugs, warnings, perf, /proc//stack, + livepatch, lockdep, and more. + +config UNWINDER_GUESS + bool "Guess unwinder" + help + This option enables the "guess" unwinder for unwinding kernel stack + traces. It scans the stack and reports every kernel text address it + finds. Some of the addresses it reports may be incorrect. + +config UNWINDER_PROLOGUE + bool "Prologue unwinder" + depends on KALLSYMS + help + This option enables the "prologue" unwinder for unwinding kernel stack + traces. It unwind the stack frame based on prologue code analyze. Symbol + information is needed, at least the address and length of each function. + Some of the addresses it reports may be incorrect. + +endchoice diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index fc805abfbf84..05ebb222e0e7 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -82,4 +82,16 @@ void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned i unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign); unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n); +static inline bool is_branch_insn(union loongarch_instruction insn) +{ + return insn.reg1i21_format.opcode >= beqz_op && + insn.reg1i21_format.opcode <= bgeu_op; +} + +static inline bool is_pc_insn(union loongarch_instruction insn) +{ + return insn.reg1i20_format.opcode >= pcaddi_op && + insn.reg1i20_format.opcode <= pcaddu18i_op; +} + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index f0814b552f23..0ec57de98a4d 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -101,6 +101,10 @@ struct thread_struct { unsigned long reg23, reg24, reg25, reg26; /* s0-s3 */ unsigned long reg27, reg28, reg29, reg30, reg31; /* s4-s8 */ + /* __schedule() return address / call frame address */ + unsigned long sched_ra; + unsigned long sched_cfa; + /* CSR registers */ unsigned long csr_prmd; unsigned long csr_crmd; @@ -129,6 +133,9 @@ struct thread_struct { struct loongarch_fpu fpu FPU_ALIGN; }; +#define thread_saved_ra(tsk) (tsk->thread.sched_ra) +#define thread_saved_fp(tsk) (tsk->thread.sched_cfa) + #define INIT_THREAD { \ /* \ * Main processor registers \ @@ -145,6 +152,8 @@ struct thread_struct { .reg29 = 0, \ .reg30 = 0, \ .reg31 = 0, \ + .sched_ra = 0, \ + .sched_cfa = 0, \ .csr_crmd = 0, \ .csr_prmd = 0, \ .csr_euen = 0, \ diff --git a/arch/loongarch/include/asm/stacktrace.h b/arch/loongarch/include/asm/stacktrace.h index 53cbb9250c0c..4f27562d0e4b 100644 --- a/arch/loongarch/include/asm/stacktrace.h +++ b/arch/loongarch/include/asm/stacktrace.h @@ -10,6 +10,28 @@ #include #include +enum stack_type { + STACK_TYPE_UNKNOWN, + STACK_TYPE_TASK, + STACK_TYPE_IRQ, +}; + +struct stack_info { + enum stack_type type; + unsigned long begin, end, next_sp; +}; + +struct stack_frame { + unsigned long fp; + unsigned long ra; +}; + +bool in_task_stack(unsigned long stack, struct task_struct *task, + struct stack_info *info); +bool in_irq_stack(unsigned long stack, struct stack_info *info); +int get_stack_info(unsigned long stack, struct task_struct *task, + struct stack_info *info); + #define STR_LONG_L __stringify(LONG_L) #define STR_LONG_S __stringify(LONG_S) #define STR_LONGSIZE __stringify(LONGSIZE) diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index e2d4afefce24..0987ac30475b 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -15,12 +15,15 @@ struct task_struct; * @prev: The task previously executed. * @next: The task to begin executing. * @next_ti: task_thread_info(next). + * @sched_ra: __schedule return address. + * @sched_cfa: __schedule call frame address. * * This function is used whilst scheduling to save the context of prev & load * the context of next. Returns prev. */ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, - struct task_struct *next, struct thread_info *next_ti); + struct task_struct *next, struct thread_info *next_ti, + void *sched_ra, void *sched_cfa); /* * For newly created kernel threads switch_to() will return to @@ -31,7 +34,8 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ - (last) = __switch_to(prev, next, task_thread_info(next)); \ + (last) = __switch_to(prev, next, task_thread_info(next), \ + __builtin_return_address(0), __builtin_frame_address(0)); \ } while (0) #endif /* _ASM_SWITCH_TO_H */ diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h new file mode 100644 index 000000000000..71b36ccf786d --- /dev/null +++ b/arch/loongarch/include/asm/unwind.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Most of this ideas comes from x86. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_UNWIND_H +#define _ASM_UNWIND_H + +#include + +#include +#include + +struct unwind_state { + struct stack_info stack_info; + struct task_struct *task; +#ifdef CONFIG_UNWINDER_PROLOGUE + unsigned long sp, pc, ra; + bool enable; + bool first; +#else /* CONFIG_UNWINDER_GUESS */ + unsigned long sp, pc; + bool first; +#endif + bool error; +}; + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs); +bool unwind_next_frame(struct unwind_state *state); +unsigned long unwind_get_return_address(struct unwind_state *state); + +static inline bool unwind_done(struct unwind_state *state) +{ + return state->stack_info.type == STACK_TYPE_UNKNOWN; +} + +static inline bool unwind_error(struct unwind_state *state) +{ + return state->error; +} +#endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 840c1a8a8de2..ffbec54f0c0e 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_MODULES) += module.o module-sections.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_CPU_HAS_FPU) += fpu.o @@ -21,4 +22,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o +obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 1945ff69c232..bf125304de56 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -103,6 +103,8 @@ void output_thread_defines(void) OFFSET(THREAD_REG29, task_struct, thread.reg29); OFFSET(THREAD_REG30, task_struct, thread.reg30); OFFSET(THREAD_REG31, task_struct, thread.reg31); + OFFSET(THREAD_SCHED_RA, task_struct, thread.sched_ra); + OFFSET(THREAD_SCHED_CFA, task_struct, thread.sched_cfa); OFFSET(THREAD_CSRCRMD, task_struct, thread.csr_crmd); OFFSET(THREAD_CSRPRMD, task_struct, diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index dc71fe9b949c..11ef4d8806a2 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -37,7 +37,9 @@ #include #include #include +#include #include +#include #include /* @@ -132,6 +134,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; + p->thread.sched_cfa = 0; p->thread.csr_euen = 0; p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD); p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD); @@ -142,6 +145,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg24 = kthread_arg; p->thread.reg03 = childksp; p->thread.reg01 = (unsigned long) ret_from_kernel_thread; + p->thread.sched_ra = (unsigned long) ret_from_kernel_thread; memset(childregs, 0, sizeof(struct pt_regs)); childregs->csr_euen = p->thread.csr_euen; childregs->csr_crmd = p->thread.csr_crmd; @@ -158,6 +162,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.reg03 = (unsigned long) childregs; p->thread.reg01 = (unsigned long) ret_from_fork; + p->thread.sched_ra = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context @@ -177,9 +182,120 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } +bool in_task_stack(unsigned long stack, struct task_struct *task, + struct stack_info *info) +{ + unsigned long begin = (unsigned long)task_stack_page(task); + unsigned long end = begin + THREAD_SIZE; + + if (stack < begin || stack >= end) + return false; + + info->type = STACK_TYPE_TASK; + info->begin = begin; + info->end = end; + info->next_sp = 0; + + return true; +} + +bool in_irq_stack(unsigned long stack, struct stack_info *info) +{ + unsigned long nextsp; + unsigned long begin = (unsigned long)this_cpu_read(irq_stack); + unsigned long end = begin + IRQ_STACK_START; + + if (stack < begin || stack >= end) + return false; + + nextsp = *(unsigned long *)end; + if (nextsp & (SZREG - 1)) + return false; + + info->type = STACK_TYPE_IRQ; + info->begin = begin; + info->end = end; + info->next_sp = nextsp; + + return true; +} + +int get_stack_info(unsigned long stack, struct task_struct *task, + struct stack_info *info) +{ + task = task ? : current; + + if (!stack || stack & (SZREG - 1)) + goto unknown; + + if (in_task_stack(stack, task, info)) + return 0; + + if (task != current) + goto unknown; + + if (in_irq_stack(stack, info)) + return 0; + +unknown: + info->type = STACK_TYPE_UNKNOWN; + return -EINVAL; +} + unsigned long get_wchan(struct task_struct *task) { - return 0; + unsigned long stack_page; + unsigned long pc; + unsigned long stack_page_end __maybe_unused; + unsigned long frame __maybe_unused; + struct unwind_state state __maybe_unused; + + if (!task || task == current || task->state == TASK_RUNNING) + return 0; + + stack_page = (unsigned long)try_get_task_stack(task); + if (!stack_page) + return 0; + + pc = thread_saved_ra(task); + +#ifdef CONFIG_UNWINDER_GUESS + /* + * Use "guess" unwinder get wchan is not reliable. + * Just calculate a simpler wchan value here. + * Do nothing here. + */ +#else /* CONFIG_UNWINDER_PROLOGUE */ + stack_page_end = stack_page + THREAD_SIZE; + frame = thread_saved_fp(task); + + if (frame >= stack_page_end || frame < stack_page) + goto out; + + memset(&state, 0, sizeof(state)); + state.task = task; + state.sp = frame; + state.stack_info.type = STACK_TYPE_TASK; + state.stack_info.begin = stack_page; + state.stack_info.end = stack_page_end; + state.stack_info.next_sp = 0; + state.enable = true; + state.first = true; + state.pc = pc; + + while (in_sched_functions(pc)) { + if (!unwind_next_frame(&state)) { + pc = 0; + goto out; + } + pc = unwind_get_return_address(&state); + } + +out: +#endif /* CONFIG_UNWINDER_GUESS */ + + put_task_stack(task); + return pc; } unsigned long stack_top(void) diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c new file mode 100644 index 000000000000..f1a67285680a --- /dev/null +++ b/arch/loongarch/kernel/stacktrace.c @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Stack trace management functions + * + * Copyright (C) 2006 Atsushi Nemoto + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef bool (*stack_trace_consume_fn)(struct stack_trace *trace, + unsigned long addr); + +static bool consume_entry(struct stack_trace *trace, unsigned long addr) +{ + if (trace->nr_entries >= trace->max_entries) + return false; + + if (trace->skip > 0) { + trace->skip--; + return true; + } + + trace->entries[trace->nr_entries++] = addr; + return trace->nr_entries < trace->max_entries; +} + +static bool consume_entry_nosched(struct stack_trace *trace, + unsigned long addr) +{ + if (in_sched_functions(addr)) + return true; + return consume_entry(trace, addr); +} + +static void save_context_stack(struct task_struct *tsk, + struct stack_trace *trace, + struct pt_regs *regs, + stack_trace_consume_fn fn) +{ + struct pt_regs dummyregs; + struct unwind_state state; + unsigned long addr; + + regs = &dummyregs; + + if (tsk == current) { + regs->csr_era = (unsigned long)__builtin_return_address(0); + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + } else { + regs->csr_era = thread_saved_ra(tsk); + regs->regs[3] = thread_saved_fp(tsk); + } + + regs->regs[1] = 0; + regs->regs[22] = 0; + + for (unwind_start(&state, tsk, regs); + !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || !fn(trace, addr)) + return; + } +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +void save_stack_trace(struct stack_trace *trace) +{ + stack_trace_consume_fn consume = consume_entry; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + save_context_stack(current, trace, NULL, consume); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + stack_trace_consume_fn consume = consume_entry; + + /* We don't want this function nor the caller */ + trace->skip += 7; + WARN_ON(trace->nr_entries || !trace->max_entries); + + save_context_stack(current, trace, regs, consume); +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); + +void save_stack_trace_tsk(struct task_struct *tsk, + struct stack_trace *trace) +{ + stack_trace_consume_fn consume = consume_entry_nosched; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + save_context_stack(tsk, trace, NULL, consume); +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +static int +copy_stack_frame(unsigned long fp, struct stack_frame *frame) +{ + int ret; + unsigned long err; + unsigned long __user *user_frame_tail; + + user_frame_tail = (unsigned long *)(fp - sizeof(struct stack_frame)); + if (!access_ok(user_frame_tail, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + err = (__copy_from_user_inatomic(frame, user_frame_tail, sizeof(*frame))); + if (err || (unsigned long)user_frame_tail >= frame->fp) + ret = 0; + pagefault_enable(); + + return ret; +} + +static inline void __save_stack_trace_user(struct stack_trace *trace) +{ + const struct pt_regs *regs = task_pt_regs(current); + unsigned long fp = regs->regs[22]; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->csr_era; + + while (trace->nr_entries < trace->max_entries && fp && !((unsigned long)fp & 0xf)) { + struct stack_frame frame; + + frame.fp = 0; + frame.ra = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if (!frame.ra) + break; + trace->entries[trace->nr_entries++] = + frame.ra; + fp = frame.fp; + } +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) + __save_stack_trace_user(trace); + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index 3445cc59d730..75d63830f3c3 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -17,7 +17,8 @@ /* * task_struct *__switch_to(task_struct *prev, task_struct *next, - * struct thread_info *next_ti) + * struct thread_info *next_ti, + * void *sched_ra, void *sched_cfa) */ .align 5 SYM_FUNC_START(__switch_to) @@ -26,6 +27,8 @@ SYM_FUNC_START(__switch_to) cpu_save_nonscratch a0 stptr.d ra, a0, THREAD_REG01 + stptr.d a3, a0, THREAD_SCHED_RA + stptr.d a4, a0, THREAD_SCHED_CFA move tp, a2 cpu_restore_nonscratch a1 diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 85ecdc00ff0c..1c247b1fac1d 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "access-helper.h" @@ -64,20 +65,24 @@ extern asmlinkage void handle_vint(void); static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { - unsigned long addr; - unsigned long *sp = (unsigned long *)(regs->regs[3] & ~3); + unsigned long pc; + struct unwind_state state; + struct pt_regs *pregs = (struct pt_regs *)regs; - printk("%sCall Trace:", loglvl); -#ifdef CONFIG_KALLSYMS - printk("%s\n", loglvl); + if (!task) + task = current; + + unwind_start(&state, task, pregs); + +#ifdef CONFIG_UNWINDER_PROLOGUE + if (user_mode(regs)) + state.enable = false; #endif - while (!kstack_end(sp)) { - if (__get_addr(&addr, sp++, user)) { - printk("%s (Bad stack address)", loglvl); - break; - } - if (__kernel_text_address(addr)) - print_ip_sym(loglvl, addr); + + printk("%sCall Trace:\n", loglvl); + for (; !unwind_done(&state); unwind_next_frame(&state)) { + pc = unwind_get_return_address(&state); + print_ip_sym(loglvl, pc); } printk("%s\n", loglvl); } diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c new file mode 100644 index 000000000000..a3c47a9e2d8c --- /dev/null +++ b/arch/loongarch/kernel/unwind_guess.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include + +#include +#include + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + if (state->first) + return state->pc; + + return *(unsigned long *)(state->sp); +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + unsigned long addr; + + if (unwind_done(state)) + return false; + + if (state->first) + state->first = false; + + do { + for (state->sp += sizeof(unsigned long); + state->sp < info->end; + state->sp += sizeof(unsigned long)) { + addr = *(unsigned long *)(state->sp); + + if (__kernel_text_address(addr)) + return true; + } + + state->sp = info->next_sp; + + } while (!get_stack_info(state->sp, state->task, info)); + + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + memset(state, 0, sizeof(*state)); + + state->task = task; + + state->sp = regs->regs[3]; + state->pc = regs->csr_era; + state->first = true; + + get_stack_info(state->sp, state->task, &state->stack_info); + + if (!unwind_done(state) && !__kernel_text_address(state->pc)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(unwind_start); diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c new file mode 100644 index 000000000000..687644e0994f --- /dev/null +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include + +#include +#include +#include + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + if (state->enable) { + return state->pc; + } else { + if (state->first) + return state->pc; + + return *(unsigned long *)(state->sp); + } +} +EXPORT_SYMBOL_GPL(unwind_get_return_address); + +static inline bool is_stack_open_ins(union loongarch_instruction *ip) +{ + /* addi.d $sp, $sp, -imm */ + return ip->reg2i12_format.opcode == addid_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 3 && + ip->reg2i12_format.simmediate < 0; +} + +static inline bool is_ra_save_ins(union loongarch_instruction *ip) +{ + /* st.d $ra, $sp, offset */ + return ip->reg2i12_format.opcode == std_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 1; +} + +static inline bool is_branch_ins(union loongarch_instruction *ip) +{ + return is_branch_insn(*ip); +} + +static bool unwind_by_prologue(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + union loongarch_instruction *ip, *ip_end; + unsigned long frame_size = 0, frame_ra = -1; + unsigned long size, offset, pc = state->pc; + + if (state->sp >= info->end || state->sp < info->begin) + return false; + + if (!kallsyms_lookup_size_offset(pc, &size, &offset)) + return false; + + ip = (union loongarch_instruction *)(pc - offset); + ip_end = (union loongarch_instruction *)pc; + + while (ip < ip_end) { + if (is_stack_open_ins(ip)) { + frame_size = -ip->reg2i12_format.simmediate; + ip++; + break; + } + ip++; + } + + if (!frame_size) { + if (state->first) + goto first; + + return false; + } + + while (ip < ip_end) { + if (is_ra_save_ins(ip)) { + frame_ra = ip->reg2i12_format.simmediate; + break; + } + if (is_branch_ins(ip)) + break; + ip++; + } + + if (frame_ra < 0) { + if (state->first) { + state->sp = state->sp + frame_size; + goto first; + } + return false; + } + + if (state->first) + state->first = false; + + state->pc = *(unsigned long *)(state->sp + frame_ra); + state->sp = state->sp + frame_size; + return !!__kernel_text_address(state->pc); + +first: + state->first = false; + if (state->pc == state->ra) + return false; + + state->pc = state->ra; + + return !!__kernel_text_address(state->ra); +} + +static bool unwind_by_guess(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + unsigned long addr; + + for (state->sp += sizeof(unsigned long); + state->sp < info->end; + state->sp += sizeof(unsigned long)) { + addr = *(unsigned long *)(state->sp); + + if (__kernel_text_address(addr)) + return true; + } + + return false; +} + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + struct pt_regs *regs; + unsigned long pc; + + if (unwind_done(state)) + return false; + + do { + if (state->enable) { + if (unwind_by_prologue(state)) + return true; + + if (info->type == STACK_TYPE_IRQ && + info->end == state->sp) { + regs = (struct pt_regs *)info->next_sp; + pc = regs->csr_era; + if (user_mode(regs) || !__kernel_text_address(pc)) + return false; + + state->pc = pc; + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->first = true; + get_stack_info(state->sp, state->task, info); + + return true; + } + } else { + if (state->first) + state->first = false; + + if (unwind_by_guess(state)) + return true; + } + + state->sp = info->next_sp; + + } while (!get_stack_info(state->sp, state->task, info)); + + return false; +} +EXPORT_SYMBOL_GPL(unwind_next_frame); + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + memset(state, 0, sizeof(*state)); + + if (__kernel_text_address(regs->csr_era)) { + state->enable = true; + } + + state->task = task; + state->pc = regs->csr_era; + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->first = true; + + get_stack_info(state->sp, state->task, &state->stack_info); + + if (!unwind_done(state) && !__kernel_text_address(state->pc)) + unwind_next_frame(state); +} +EXPORT_SYMBOL_GPL(unwind_start); -- Gitee From fb9e7d4aa465b90293ac46c83516fc20764ab928 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 096/241] LoongArch: Add kenel hacking options support Add some kenel hacking options support for LoongArch, including sysrq and zboot debugging. Change-Id: I9b69b7759b1f71adea97cd0e72f6c70083aa02aa Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig.debug | 19 ++++++ arch/loongarch/boot/compressed/Makefile | 5 ++ arch/loongarch/boot/compressed/dbg.c | 37 ++++++++++++ arch/loongarch/boot/compressed/decompress.c | 5 ++ arch/loongarch/boot/compressed/uart-16550.c | 40 +++++++++++++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/sysrq.c | 65 +++++++++++++++++++++ 7 files changed, 172 insertions(+) create mode 100644 arch/loongarch/boot/compressed/dbg.c create mode 100644 arch/loongarch/boot/compressed/uart-16550.c create mode 100644 arch/loongarch/kernel/sysrq.c diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 8d90f57cc019..11f505e4aa05 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -23,3 +23,22 @@ config UNWINDER_PROLOGUE Some of the addresses it reports may be incorrect. endchoice + +config DEBUG_ZBOOT + bool "Enable compressed kernel support debugging" + depends on DEBUG_KERNEL && SYS_SUPPORTS_ZBOOT + default n + help + If you want to add compressed kernel support to a new board, and the + board supports uart16550 compatible serial port, please select + SYS_SUPPORTS_ZBOOT_UART16550 for your board and enable this option to + debug it. + + If your board doesn't support uart16550 compatible serial port, you + can try to select SYS_SUPPORTS_ZBOOT and use the other methods to + debug it. for example, add a new serial port support just as + arch/loongarch/boot/compressed/uart-16550.c does. + + After the compressed kernel support works, please disable this option + to reduce the kernel image size and speed up the booting procedure a + little. diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index e3ed0f54a264..76c5b19cb0e0 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -23,6 +23,11 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o +ifdef CONFIG_DEBUG_ZBOOT +vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/dbg.o +vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/uart-16550.o +endif + vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o extra-y += ashldi3.c diff --git a/arch/loongarch/boot/compressed/dbg.c b/arch/loongarch/boot/compressed/dbg.c new file mode 100644 index 000000000000..329193f0b059 --- /dev/null +++ b/arch/loongarch/boot/compressed/dbg.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch-specific debug support for pre-boot environment + * + * NOTE: putc() is board specific, if your board have a 16550 compatible uart, + * please select SYS_SUPPORTS_ZBOOT_UART16550 for your machine. othewise, you + * need to implement your own putc(). + */ +#include +#include + +void __weak putc(char c) +{ +} + +void puts(const char *s) +{ + char c; + while ((c = *s++) != '\0') { + putc(c); + if (c == '\n') + putc('\r'); + } +} + +void puthex(unsigned long long val) +{ + + unsigned char buf[10]; + int i; + for (i = 7; i >= 0; i--) { + buf[i] = "0123456789ABCDEF"[val & 0x0F]; + val >>= 4; + } + buf[8] = '\0'; + puts(buf); +} diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c index 099aad621aba..e3076445d2f4 100644 --- a/arch/loongarch/boot/compressed/decompress.c +++ b/arch/loongarch/boot/compressed/decompress.c @@ -22,8 +22,13 @@ unsigned long free_mem_end_ptr; extern unsigned char __image_begin, __image_end; /* debug interfaces */ +#ifdef CONFIG_DEBUG_ZBOOT +extern void puts(const char *s); +extern void puthex(unsigned long long val); +#else #define puts(s) do {} while (0) #define puthex(val) do {} while (0) +#endif void error(char *x) { diff --git a/arch/loongarch/boot/compressed/uart-16550.c b/arch/loongarch/boot/compressed/uart-16550.c new file mode 100644 index 000000000000..32051e8c717d --- /dev/null +++ b/arch/loongarch/boot/compressed/uart-16550.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * 16550 compatible uart based serial debug support for zboot + */ + +#include +#include + +#include + +#define UART_BASE 0x1fe001e0 +#define PORT(offset) (TO_UNCACHE(UART_BASE) + (offset)) + +#ifndef IOTYPE +#define IOTYPE char +#endif + +#ifndef PORT +#error please define the serial port address for your own machine +#endif + +static inline unsigned int serial_in(int offset) +{ + return *((volatile IOTYPE *)PORT(offset)) & 0xFF; +} + +static inline void serial_out(int offset, int value) +{ + *((volatile IOTYPE *)PORT(offset)) = value & 0xFF; +} + +void putc(char c) +{ + int timeout = 1000000; + + while (((serial_in(UART_LSR) & UART_LSR_THRE) == 0) && (timeout-- > 0)) + ; + + serial_out(UART_TX, c); +} diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index ffbec54f0c0e..bc71365a0cf4 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c new file mode 100644 index 000000000000..1f449b1610ca --- /dev/null +++ b/arch/loongarch/kernel/sysrq.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch specific sysrq operations. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include + +#include +#include + +/* + * Dump TLB entries on all CPUs. + */ + +static DEFINE_SPINLOCK(show_lock); + +static void sysrq_tlbdump_single(void *dummy) +{ + unsigned long flags; + + spin_lock_irqsave(&show_lock, flags); + + pr_info("CPU%d:\n", smp_processor_id()); + dump_tlb_regs(); + pr_info("\n"); + dump_tlb_all(); + pr_info("\n"); + + spin_unlock_irqrestore(&show_lock, flags); +} + +#ifdef CONFIG_SMP +static void sysrq_tlbdump_othercpus(struct work_struct *dummy) +{ + smp_call_function(sysrq_tlbdump_single, NULL, 0); +} + +static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus); +#endif + +static void sysrq_handle_tlbdump(int key) +{ + sysrq_tlbdump_single(NULL); +#ifdef CONFIG_SMP + schedule_work(&sysrq_tlbdump); +#endif +} + +static struct sysrq_key_op sysrq_tlbdump_op = { + .handler = sysrq_handle_tlbdump, + .help_msg = "show-tlbs(x)", + .action_msg = "Show TLB entries", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static int __init loongarch_sysrq_init(void) +{ + return register_sysrq_key('x', &sysrq_tlbdump_op); +} +arch_initcall(loongarch_sysrq_init); -- Gitee From b753008d24f218d3ced5bcd34f6a7fa6e142b2f8 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 097/241] LoongArch: Add relocatable kernel support Change-Id: Icee943f38b674e9f9a0c9e74cb1ddc8e7f7a6084 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 61 +++ arch/loongarch/Makefile | 6 + arch/loongarch/Makefile.postlink | 42 ++ arch/loongarch/boot/tools/Makefile | 9 + arch/loongarch/boot/tools/relocs.c | 568 ++++++++++++++++++++++++ arch/loongarch/boot/tools/relocs.h | 59 +++ arch/loongarch/boot/tools/relocs_32.c | 18 + arch/loongarch/boot/tools/relocs_64.c | 18 + arch/loongarch/boot/tools/relocs_main.c | 85 ++++ arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/head.S | 18 + arch/loongarch/kernel/relocate.c | 293 ++++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 20 + scripts/kallsyms.c | 3 +- 14 files changed, 1201 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/Makefile.postlink create mode 100644 arch/loongarch/boot/tools/Makefile create mode 100644 arch/loongarch/boot/tools/relocs.c create mode 100644 arch/loongarch/boot/tools/relocs.h create mode 100644 arch/loongarch/boot/tools/relocs_32.c create mode 100644 arch/loongarch/boot/tools/relocs_64.c create mode 100644 arch/loongarch/boot/tools/relocs_main.c create mode 100644 arch/loongarch/kernel/relocate.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d3cbee5599b2..0d3b9dc93c72 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -132,6 +132,7 @@ config MACH_LOONGSON64 select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_RELOCATABLE select ZONE_DMA32 help This enables the support of Loongson 64-bit family of machines. These @@ -163,6 +164,11 @@ config SYS_SUPPORTS_HOTPLUG_CPU config GENERIC_CSUM def_bool y +config SYS_SUPPORTS_RELOCATABLE + bool + help + Selected if the platform supports relocating the kernel. + config SYS_SUPPORTS_HUGETLBFS def_bool y @@ -411,6 +417,61 @@ config NUMA config SYS_SUPPORTS_NUMA bool +config RELOCATABLE + bool "Relocatable kernel" + depends on SYS_SUPPORTS_RELOCATABLE + help + This builds a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations make the kernel binary about 15% larger, + but are discarded at runtime + +config RELOCATION_TABLE_SIZE + hex "Relocation table size" + depends on RELOCATABLE + range 0x0 0x01000000 + default "0x00200000" + help + A table of relocation data will be appended to the kernel binary + and parsed at boot to fix up the relocated kernel. + + This option allows the amount of space reserved for the table to be + adjusted, although the default of 1Mb should be ok in most cases. + + The build will fail and a valid size suggested if this is too small. + + If unsure, leave at the default value. + +config RANDOMIZE_BASE + bool "Randomize the address of the kernel image" + depends on RELOCATABLE + help + Randomizes the physical and virtual address at which the + kernel image is loaded, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. + + Entropy is generated using any coprocessor 0 registers available. + + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET. + + If unsure, say N. + +config RANDOMIZE_BASE_MAX_OFFSET + hex "Maximum kASLR offset" if EXPERT + depends on RANDOMIZE_BASE + range 0x0 0x40000000 if 64BIT + range 0x0 0x08000000 + default "0x01000000" + help + When kASLR is active, this provides the maximum offset that will + be applied to the kernel image. It should be set according to the + amount of physical RAM available in the target system minus + PHYSICAL_START and must be a power of 2. + + This is limited by the size of KPRANGE1, 256Mb on 32-bit or 1Gb with + 64-bit. The default is 16Mb. + config NODES_SHIFT int default "6" diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index d0ab77b7bda5..15b1c394a00c 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -5,6 +5,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/loongarch/tools elf-entry + $(Q)$(MAKE) $(build)=arch/loongarch/boot/tools relocs # # Select the object file format to substitute into the linker script. @@ -61,6 +62,10 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs endif +ifeq ($(CONFIG_RELOCATABLE),y) +LDFLAGS_vmlinux += --emit-relocs +endif + cflags-y += -ffreestanding # @@ -163,6 +168,7 @@ endif archclean: $(Q)$(MAKE) $(clean)=arch/loongarch/boot $(Q)$(MAKE) $(clean)=arch/loongarch/boot/compressed + $(Q)$(MAKE) $(clean)=arch/loongarch/boot/tools $(Q)$(MAKE) $(clean)=arch/loongarch/tools define archhelp diff --git a/arch/loongarch/Makefile.postlink b/arch/loongarch/Makefile.postlink new file mode 100644 index 000000000000..46aa6050a7d1 --- /dev/null +++ b/arch/loongarch/Makefile.postlink @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 +# =========================================================================== +# Post-link LoongArch pass +# =========================================================================== +# +# 1. Insert relocations into vmlinux + +PHONY := __archpost +__archpost: + +-include include/config/auto.conf +include scripts/Kbuild.include + +CMD_RELOCS = arch/loongarch/boot/tools/relocs --keep +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $@ + +quiet_cmd_strip_relocs = RSTRIP $@ + cmd_strip_relocs = \ + $(OBJCOPY) --remove-section='.rel.*' --remove-section='.rel__*' \ + --remove-section='.rela.*' --remove-section='.rela__*' $@ + +# `@true` prevents complaint when there is nothing to be done + +vmlinux: FORCE + @true +ifeq ($(CONFIG_RELOCATABLE),y) + $(call cmd,relocs) + $(call cmd,strip_relocs) +endif + +%.ko: FORCE + @true + +clean: + @true + +PHONY += FORCE clean + +FORCE: + +.PHONY: $(PHONY) diff --git a/arch/loongarch/boot/tools/Makefile b/arch/loongarch/boot/tools/Makefile new file mode 100644 index 000000000000..592e05a51a4a --- /dev/null +++ b/arch/loongarch/boot/tools/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +hostprogs += relocs +relocs-objs += relocs_32.o +relocs-objs += relocs_64.o +relocs-objs += relocs_main.o +PHONY += relocs +relocs: $(obj)/relocs + @: diff --git a/arch/loongarch/boot/tools/relocs.c b/arch/loongarch/boot/tools/relocs.c new file mode 100644 index 000000000000..e5199c33d304 --- /dev/null +++ b/arch/loongarch/boot/tools/relocs.c @@ -0,0 +1,568 @@ +// SPDX-License-Identifier: GPL-2.0 +/* This is included from relocs_32/64.c */ + +#define ElfW(type) _ElfW(ELF_BITS, type) +#define _ElfW(bits, type) __ElfW(bits, type) +#define __ElfW(bits, type) Elf##bits##_##type + +#define Elf_Addr ElfW(Addr) +#define Elf_Ehdr ElfW(Ehdr) +#define Elf_Phdr ElfW(Phdr) +#define Elf_Shdr ElfW(Shdr) +#define Elf_Sym ElfW(Sym) + +static Elf_Ehdr ehdr; + +struct relocs { + uint32_t *offset; + unsigned long count; + unsigned long size; +}; + +static struct relocs relocs; + +struct section { + Elf_Shdr shdr; + struct section *link; + Elf_Sym *symtab; + Elf_Rel *reltab; + char *strtab; + long shdr_offset; +}; +static struct section *secs; + +static const char * const regex_sym_kernel = { +/* Symbols matching these regex's should never be relocated */ + "^(__crc_)", +}; + +static regex_t sym_regex_c; + +static int regex_skip_reloc(const char *sym_name) +{ + return !regexec(&sym_regex_c, sym_name, 0, NULL, 0); +} + +static void regex_init(void) +{ + char errbuf[128]; + int err; + + err = regcomp(&sym_regex_c, regex_sym_kernel, + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c, errbuf, sizeof(errbuf)); + die("%s", errbuf); + } +} + +static const char *rel_type(unsigned type) +{ + static const char * const type_name[] = { +#define REL_TYPE(X)[X] = #X + REL_TYPE(R_LARCH_NONE), + REL_TYPE(R_LARCH_32), + REL_TYPE(R_LARCH_64), + REL_TYPE(R_LARCH_MARK_LA), + REL_TYPE(R_LARCH_MARK_PCREL), + REL_TYPE(R_LARCH_SOP_PUSH_PCREL), + REL_TYPE(R_LARCH_SOP_PUSH_ABSOLUTE), + REL_TYPE(R_LARCH_SOP_PUSH_DUP), + REL_TYPE(R_LARCH_SOP_PUSH_PLT_PCREL), + REL_TYPE(R_LARCH_SOP_SUB), + REL_TYPE(R_LARCH_SOP_SL), + REL_TYPE(R_LARCH_SOP_SR), + REL_TYPE(R_LARCH_SOP_ADD), + REL_TYPE(R_LARCH_SOP_AND), + REL_TYPE(R_LARCH_SOP_IF_ELSE), + REL_TYPE(R_LARCH_SOP_POP_32_S_10_5), + REL_TYPE(R_LARCH_SOP_POP_32_U_10_12), + REL_TYPE(R_LARCH_SOP_POP_32_S_10_12), + REL_TYPE(R_LARCH_SOP_POP_32_S_10_16), + REL_TYPE(R_LARCH_SOP_POP_32_S_10_16_S2), + REL_TYPE(R_LARCH_SOP_POP_32_S_5_20), + REL_TYPE(R_LARCH_SOP_POP_32_S_0_5_10_16_S2), + REL_TYPE(R_LARCH_SOP_POP_32_S_0_10_10_16_S2), + REL_TYPE(R_LARCH_SOP_POP_32_U), + REL_TYPE(R_LARCH_ADD32), + REL_TYPE(R_LARCH_ADD64), + REL_TYPE(R_LARCH_SUB32), + REL_TYPE(R_LARCH_SUB64), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + + if (type < ARRAY_SIZE(type_name) && type_name[type]) + name = type_name[type]; + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + + sec_strtab = secs[ehdr.e_shstrndx].strtab; + if (shndx < ehdr.e_shnum) + name = sec_strtab + secs[shndx].shdr.sh_name; + else if (shndx == SHN_ABS) + name = "ABSOLUTE"; + else if (shndx == SHN_COMMON) + name = "COMMON"; + else + name = ""; + return name; +} + +static struct section *sec_lookup(const char *secname) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) + if (strcmp(secname, sec_name(i)) == 0) + return &secs[i]; + + return NULL; +} + +static const char *sym_name(const char *sym_strtab, Elf_Sym *sym) +{ + const char *name; + + if (sym->st_name) + name = sym_strtab + sym->st_name; + else + name = sec_name(sym->st_shndx); + return name; +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + die("Cannot read ELF header: %s\n", strerror(errno)); + + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) + die("No ELF magic\n"); + + if (ehdr.e_ident[EI_CLASS] != ELF_CLASS) + die("Not a %d bit executable\n", ELF_BITS); + + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) + die("Unsupported ELF Endianness\n"); + + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) + die("Unknown ELF version\n"); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) + die("Unsupported ELF header type\n"); + + if (ehdr.e_machine != ELF_MACHINE) + die("Not for %s\n", ELF_MACHINE_NAME); + + if (ehdr.e_version != EV_CURRENT) + die("Unknown ELF version\n"); + + if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) + die("Bad Elf header size\n"); + + if (ehdr.e_phentsize != sizeof(Elf_Phdr)) + die("Bad program header entry\n"); + + if (ehdr.e_shentsize != sizeof(Elf_Shdr)) + die("Bad section header entry\n"); + + if (ehdr.e_shstrndx >= ehdr.e_shnum) + die("String table index out of bounds\n"); +} + +static void read_shdrs(FILE *fp) +{ + int i; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) + die("Unable to allocate %d section headers\n", ehdr.e_shnum); + + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno)); + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + sec->shdr_offset = ftell(fp); + if (fread(&sec->shdr, sizeof(Elf_Shdr), 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; + } +} + +static void read_strtabs(FILE *fp) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_STRTAB) + continue; + + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) + die("malloc of %d bytes for strtab failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + } +} + +static void read_symtabs(FILE *fp) +{ + int i; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_SYMTAB) + continue; + + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) + die("malloc of %d bytes for symtab failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + } +} + +static void read_relocs(FILE *fp) +{ + static unsigned long base = 0; + int i, j; + + if (!base) { + struct section *sec = sec_lookup(".text"); + + if (!sec) + die("Could not find .text section\n"); + + base = sec->shdr.sh_addr; + } + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) + die("malloc of %d bytes for relocs failed\n", + sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != + sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { + Elf_Rel *rel = &sec->reltab[j]; + + /* Set offset into kernel image */ + rel->r_offset -= base; + } + } +} + +static void remove_relocs(FILE *fp) +{ + int i; + Elf_Shdr shdr; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr_offset, strerror(errno)); + + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + + /* Set relocation section size to 0, effectively removing it. + * This is necessary due to lack of support for relocations + * in objcopy when creating 32bit elf from 64bit elf. + */ + shdr.sh_size = 0; + + if (fseek(fp, sec->shdr_offset, SEEK_SET) < 0) + die("Seek to %d failed: %s\n", + sec->shdr_offset, strerror(errno)); + + if (fwrite(&shdr, sizeof(shdr), 1, fp) != 1) + die("Cannot write ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + } +} + +static void add_reloc(struct relocs *r, uint32_t offset, unsigned type) +{ + /* Relocation representation in binary table: + * |76543210|76543210|76543210|76543210| + * | Type | offset from _text >> 2 | + */ + offset >>= 2; + if (offset > 0x00FFFFFF) + die("Kernel image exceeds maximum size for relocation!\n"); + + offset = (offset & 0x00FFFFFF) | ((type & 0xFF) << 24); + + if (r->count == r->size) { + unsigned long newsize = r->size + 50000; + void *mem = realloc(r->offset, newsize * sizeof(r->offset[0])); + + if (!mem) + die("realloc failed\n"); + + r->offset = mem; + r->size = newsize; + } + r->offset[r->count++] = offset; +} + +static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, + Elf_Sym *sym, const char *symname)) +{ + int i; + + /* Walk through the relocations */ + for (i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf_Sym *sh_symtab; + struct section *sec_applies, *sec_symtab; + int j; + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL_TYPE) + continue; + + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) + continue; + + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { + Elf_Rel *rel = &sec->reltab[j]; + Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)]; + const char *symname = sym_name(sym_strtab, sym); + + process(sec, rel, sym, symname); + } + } +} + +static int do_reloc(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, + const char *symname) +{ + unsigned r_type = ELF_R_TYPE(rel->r_info); + unsigned bind = ELF_ST_BIND(sym->st_info); + + if ((bind == STB_WEAK) && (sym->st_value == 0)) { + /* Don't relocate weak symbols without a target */ + return 0; + } + + if (regex_skip_reloc(symname)) + return 0; + + switch (r_type) { + case R_LARCH_NONE: + case R_LARCH_MARK_PCREL: + case R_LARCH_SOP_PUSH_PCREL: + case R_LARCH_SOP_PUSH_ABSOLUTE: + case R_LARCH_SOP_PUSH_DUP: + case R_LARCH_SOP_PUSH_PLT_PCREL: + case R_LARCH_SOP_SUB: + case R_LARCH_SOP_SL: + case R_LARCH_SOP_SR: + case R_LARCH_SOP_ADD: + case R_LARCH_SOP_AND: + case R_LARCH_SOP_IF_ELSE: + case R_LARCH_SOP_POP_32_S_10_5: + case R_LARCH_SOP_POP_32_U_10_12: + case R_LARCH_SOP_POP_32_S_10_12: + case R_LARCH_SOP_POP_32_S_10_16: + case R_LARCH_SOP_POP_32_S_10_16_S2: + case R_LARCH_SOP_POP_32_S_5_20: + case R_LARCH_SOP_POP_32_S_0_5_10_16_S2: + case R_LARCH_SOP_POP_32_S_0_10_10_16_S2: + case R_LARCH_SOP_POP_32_U: + case R_LARCH_ADD32: + case R_LARCH_ADD64: + case R_LARCH_SUB32: + case R_LARCH_SUB64: + break; + + case R_LARCH_32: + case R_LARCH_64: + case R_LARCH_MARK_LA: + add_reloc(&relocs, rel->r_offset, r_type); + break; + + default: + die("Unsupported relocation type: %s (%d)\n", rel_type(r_type), r_type); + break; + } + + return 0; +} + +static int write_reloc_as_bin(uint32_t v, FILE *f) +{ + return fwrite(&v, 4, 1, f); +} + +static int write_reloc_as_text(uint32_t v, FILE *f) +{ + int res; + + res = fprintf(f, "\t.long 0x%08"PRIx32"\n", v); + if (res < 0) + return res; + else + return sizeof(uint32_t); +} + +static void emit_relocs(int as_text, int as_bin, FILE *outf) +{ + int i; + int (*write_reloc)(uint32_t, FILE *) = write_reloc_as_bin; + int size = 0; + int size_reserved; + struct section *sec_reloc; + + sec_reloc = sec_lookup(".data.reloc"); + if (!sec_reloc) + die("Could not find relocation section\n"); + + size_reserved = sec_reloc->shdr.sh_size; + + /* Collect up the relocations */ + walk_relocs(do_reloc); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + /* Output text to stdout */ + write_reloc = write_reloc_as_text; + outf = stdout; + } else if (as_bin) { + /* Output raw binary to stdout */ + outf = stdout; + } else { + /* Seek to offset of the relocation section. + * Each relocation is then written into the + * vmlinux kernel image. + */ + if (fseek(outf, sec_reloc->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec_reloc->shdr.sh_offset, strerror(errno)); + } + } + + for (i = 0; i < relocs.count; i++) + size += write_reloc(relocs.offset[i], outf); + + /* Print a stop, but only if we've actually written some relocs */ + if (size) + size += write_reloc(0, outf); + + if (size > size_reserved) + /* Die, but suggest a value for CONFIG_RELOCATION_TABLE_SIZE + * which will fix this problem and allow a bit of headroom + * if more kernel features are enabled + */ + die("Relocations overflow available space!\n" \ + "Please adjust CONFIG_RELOCATION_TABLE_SIZE " \ + "to at least 0x%08x\n", (size + 0x1000) & ~0xFFF); +} + +/* + * As an aid to debugging problems with different linkers + * print summary information about the relocs. + * Since different linkers tend to emit the sections in + * different orders we use the section names in the output. + */ +static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, + const char *symname) +{ + printf("%-16s 0x%08x %-35s %-40s %-16s\n", + sec_name(sec->shdr.sh_info), + (unsigned int)rel->r_offset, + rel_type(ELF_R_TYPE(rel->r_info)), + symname, + sec_name(sym->st_shndx)); + return 0; +} + +static void print_reloc_info(void) +{ + printf("%-16s %-10s %-35s %-40s %-16s\n", + "reloc section", + "offset", + "reloc type", + "symbol", + "symbol section"); + walk_relocs(do_reloc_info); +} + +#if ELF_BITS == 64 +# define process process_64 +#else +# define process process_32 +#endif + +void process(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs) +{ + regex_init(); + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_reloc_info) { + print_reloc_info(); + return; + } + emit_relocs(as_text, as_bin, fp); + if (!keep_relocs) + remove_relocs(fp); +} diff --git a/arch/loongarch/boot/tools/relocs.h b/arch/loongarch/boot/tools/relocs.h new file mode 100644 index 000000000000..483091f112ee --- /dev/null +++ b/arch/loongarch/boot/tools/relocs.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef RELOCS_H +#define RELOCS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void die(char *fmt, ...); + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#endif +#define R_LARCH_NONE 0 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_MARK_PCREL 21 +#define R_LARCH_SOP_PUSH_PCREL 22 +#define R_LARCH_SOP_PUSH_ABSOLUTE 23 +#define R_LARCH_SOP_PUSH_DUP 24 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#define R_LARCH_SOP_SUB 32 +#define R_LARCH_SOP_SL 33 +#define R_LARCH_SOP_SR 34 +#define R_LARCH_SOP_ADD 35 +#define R_LARCH_SOP_AND 36 +#define R_LARCH_SOP_IF_ELSE 37 +#define R_LARCH_SOP_POP_32_S_10_5 38 +#define R_LARCH_SOP_POP_32_U_10_12 39 +#define R_LARCH_SOP_POP_32_S_10_12 40 +#define R_LARCH_SOP_POP_32_S_10_16 41 +#define R_LARCH_SOP_POP_32_S_10_16_S2 42 +#define R_LARCH_SOP_POP_32_S_5_20 43 +#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2 44 +#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2 45 +#define R_LARCH_SOP_POP_32_U 46 +#define R_LARCH_ADD32 50 +#define R_LARCH_ADD64 51 +#define R_LARCH_SUB32 55 +#define R_LARCH_SUB64 56 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +void process_32(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs); +void process_64(FILE *fp, int as_text, int as_bin, + int show_reloc_info, int keep_relocs); + +#endif /* RELOCS_H */ diff --git a/arch/loongarch/boot/tools/relocs_32.c b/arch/loongarch/boot/tools/relocs_32.c new file mode 100644 index 000000000000..7cae3d8403bb --- /dev/null +++ b/arch/loongarch/boot/tools/relocs_32.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "relocs.h" + +#define ELF_BITS 32 + +#define ELF_MACHINE EM_LOONGARCH +#define ELF_MACHINE_NAME "LOONGARCH32" +#define SHT_REL_TYPE SHT_RELA +#define Elf_Rel ElfW(Rela) + +#define ELF_CLASS ELFCLASS32 +#define ELF_R_SYM(val) ELF32_R_SYM(val) +#define ELF_R_TYPE(val) ELF32_R_TYPE(val) +#define ELF_ST_TYPE(o) ELF32_ST_TYPE(o) +#define ELF_ST_BIND(o) ELF32_ST_BIND(o) +#define ELF_ST_VISIBILITY(o) ELF32_ST_VISIBILITY(o) + +#include "relocs.c" diff --git a/arch/loongarch/boot/tools/relocs_64.c b/arch/loongarch/boot/tools/relocs_64.c new file mode 100644 index 000000000000..1024133af8ec --- /dev/null +++ b/arch/loongarch/boot/tools/relocs_64.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "relocs.h" + +#define ELF_BITS 64 + +#define ELF_MACHINE EM_LOONGARCH +#define ELF_MACHINE_NAME "LOONGARCH64" +#define SHT_REL_TYPE SHT_RELA +#define Elf_Rel Elf64_Rela + +#define ELF_CLASS ELFCLASS64 +#define ELF_R_SYM(val) ELF64_R_SYM(val) +#define ELF_R_TYPE(val) ELF64_R_TYPE(val) +#define ELF_ST_TYPE(o) ELF64_ST_TYPE(o) +#define ELF_ST_BIND(o) ELF64_ST_BIND(o) +#define ELF_ST_VISIBILITY(o) ELF64_ST_VISIBILITY(o) + +#include "relocs.c" diff --git a/arch/loongarch/boot/tools/relocs_main.c b/arch/loongarch/boot/tools/relocs_main.c new file mode 100644 index 000000000000..e2453a564b11 --- /dev/null +++ b/arch/loongarch/boot/tools/relocs_main.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "relocs.h" + +void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static void usage(void) +{ + die("relocs [--reloc-info|--text|--bin|--keep] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_reloc_info, as_text, as_bin, keep_relocs; + const char *fname; + FILE *fp; + int i; + unsigned char e_ident[EI_NIDENT]; + + show_reloc_info = 0; + as_text = 0; + as_bin = 0; + keep_relocs = 0; + fname = NULL; + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + + if (*arg == '-') { + if (strcmp(arg, "--reloc-info") == 0) { + show_reloc_info = 1; + continue; + } + if (strcmp(arg, "--text") == 0) { + as_text = 1; + continue; + } + if (strcmp(arg, "--bin") == 0) { + as_bin = 1; + continue; + } + if (strcmp(arg, "--keep") == 0) { + keep_relocs = 1; + continue; + } + } else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) + usage(); + + fp = fopen(fname, "r+"); + if (!fp) + die("Cannot open %s: %s\n", fname, strerror(errno)); + + if (fread(&e_ident, 1, EI_NIDENT, fp) != EI_NIDENT) + die("Cannot read %s: %s", fname, strerror(errno)); + + rewind(fp); + if (e_ident[EI_CLASS] == ELFCLASS64) + process_64(fp, as_text, as_bin, show_reloc_info, keep_relocs); + else + process_32(fp, as_text, as_bin, show_reloc_info, keep_relocs); + fclose(fp); + return 0; +} diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index bc71365a0cf4..5329ec5a896b 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -23,6 +23,8 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o +obj-$(CONFIG_RELOCATABLE) += relocate.o + obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 6e742e9fbf0b..af79139f1d31 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -65,7 +65,25 @@ SYM_CODE_START(kernel_entry) # kernel entry point PTR_ADD sp, sp, tp set_saved_sp sp, t0, t1 +#ifdef CONFIG_RELOCATABLE + /* Copy kernel and apply the relocations */ + bl relocate_kernel + + /* Repoint the sp into the new kernel image */ + PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) + PTR_ADD sp, sp, tp + set_saved_sp sp, t0, t1 + PTR_ADDI sp, sp, -4 * SZREG # init stack pointer + + /* + * relocate_kernel returns the entry point either + * in the relocated kernel or the original if for + * some reason relocation failed. + */ + jirl zero, a0, 0 +#else bl start_kernel +#endif SYM_CODE_END(kernel_entry) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c new file mode 100644 index 000000000000..ba04dd1b1bab --- /dev/null +++ b/arch/loongarch/kernel/relocate.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Support for Kernel relocation at boot time + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Authors: Huacai Chen (chenhuacai@loongson.cn) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RELOCATED(x) ((void *)((long)x + offset)) + +extern u32 _relocation_start[]; /* End kernel image / start relocation table */ +extern u32 _relocation_end[]; /* End relocation table */ + +static void __init apply_r_loongarch_32_rel(u32 *loc_new, long offset) +{ + *loc_new += offset; +} + +static void __init apply_r_loongarch_64_rel(u32 *loc_new, long offset) +{ + *(u64 *)loc_new += offset; +} + +/* + * The details about la.abs $r1, x on LoongArch + * + * lu12i.w $r1, 0 + * ori $r1, $r1, 0x0 + * lu32i.d $r1, 0 + * lu52i.d $r1, $r1, 0 + * + * LoongArch use lu12i.w, ori, lu32i.d, lu52i.d to load a 64bit imm. + * lu12i.w load bit31~bit12, ori load bit11~bit0, + * lu32i.d load bit51~bit32, lu52i.d load bit63~bit52 + */ +static void __init apply_r_loongarch_mark_la_rel(u32 *loc_new, long offset) +{ + unsigned long long dest; + union loongarch_instruction *ori, *lu12iw, *lu32id, *lu52id; + + ori = (union loongarch_instruction *)&loc_new[1]; + lu12iw = (union loongarch_instruction *)&loc_new[0]; + lu32id = (union loongarch_instruction *)&loc_new[2]; + lu52id = (union loongarch_instruction *)&loc_new[3]; + + dest = ori->reg2ui12_format.simmediate & 0xfff; + dest |= (lu12iw->reg1i20_format.simmediate & 0xfffff) << 12; + dest |= ((u64)lu32id->reg1i20_format.simmediate & 0xfffff) << 32; + dest |= ((u64)lu52id->reg2i12_format.simmediate & 0xfff) << 52; + dest += offset; + + ori->reg2ui12_format.simmediate = dest & 0xfff; + lu12iw->reg1i20_format.simmediate = (dest >> 12) & 0xfffff; + lu32id->reg1i20_format.simmediate = (dest >> 32) & 0xfffff; + lu52id->reg2i12_format.simmediate = (dest >> 52) & 0xfff; +} + +static void (*reloc_handlers_rel[]) (u32 *, long) __initdata = { + [R_LARCH_32] = apply_r_loongarch_32_rel, + [R_LARCH_64] = apply_r_loongarch_64_rel, + [R_LARCH_MARK_LA] = apply_r_loongarch_mark_la_rel, +}; + +static int __init do_relocations(void *kbase_old, void *kbase_new, long offset) +{ + int type; + u32 *r; + u32 *loc_new; + u32 *loc_orig; + + for (r = _relocation_start; r < _relocation_end; r++) { + /* Sentinel for last relocation */ + if (*r == 0) + break; + + type = (*r >> 24) & 0xff; + loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2)); + loc_new = RELOCATED(loc_orig); + + if (reloc_handlers_rel[type] == NULL) { + /* Unsupported relocation */ + pr_err("Unhandled relocation type %d at 0x%pK\n", + type, loc_orig); + return -ENOEXEC; + } + + reloc_handlers_rel[type](loc_new, offset); + } + + return 0; +} + +#ifdef CONFIG_RANDOMIZE_BASE + +static inline __init unsigned long rotate_xor(unsigned long hash, + const void *area, size_t size) +{ + size_t i, diff; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +static inline __init unsigned long get_random_boot(void) +{ + unsigned long hash = 0; + unsigned long entropy = random_get_entropy(); + + /* Attempt to create a simple but unpredictable starting entropy. */ + hash = rotate_xor(hash, linux_banner, strlen(linux_banner)); + + /* Add in any runtime entropy we can get */ + hash = rotate_xor(hash, &entropy, sizeof(entropy)); + + return hash; +} + +static inline __init bool kaslr_disabled(void) +{ + char *str; + + const char *builtin_cmdline = CONFIG_CMDLINE; + + str = strstr(builtin_cmdline, "nokaslr"); + if (str == builtin_cmdline || + (str > builtin_cmdline && *(str - 1) == ' ')) + return true; + + str = strstr(boot_command_line, "nokaslr"); + if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' ')) + return true; + + return false; +} + +static inline void __init *determine_relocation_address(void) +{ + /* Choose a new address for the kernel */ + unsigned long kernel_length; + void *dest = &_text; + unsigned long offset; + + if (kaslr_disabled()) + return dest; + + kernel_length = (long)_end - (long)(&_text); + + offset = get_random_boot() << 16; + offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1); + if (offset < kernel_length) + offset += ALIGN(kernel_length, 0xffff); + + return RELOCATED(dest); +} + +#else + +static inline void __init *determine_relocation_address(void) +{ + /* + * Choose a new address for the kernel + * For now we'll hard code the destination + */ + return (void *)(CACHE_BASE + 0x02000000); +} + +#endif + +static inline int __init relocation_addr_valid(void *loc_new) +{ + if ((unsigned long)loc_new & 0x00000ffff) + return 0; /* Inappropriately aligned new location */ + + if ((unsigned long)loc_new < (unsigned long)_end) + return 0; /* New location overlaps original kernel */ + + return 1; +} + +static inline void __init update_kaslr_offset(unsigned long *addr, long offset) +{ + unsigned long *new_addr = (unsigned long *)RELOCATED(addr); + + *new_addr = (unsigned long)offset; +} + +void *__init relocate_kernel(void) +{ + void *loc_new; + unsigned long kernel_length; + unsigned long bss_length; + long offset = 0; + int res = 1; + /* Default to original kernel entry point */ + void *kernel_entry = start_kernel; + + early_init_dt_scan(early_ioremap(fw_arg1, SZ_64K)); + + kernel_length = (long)(&_relocation_start) - (long)(&_text); + bss_length = (long)&__bss_stop - (long)&__bss_start; + + loc_new = determine_relocation_address(); + + /* Sanity check relocation address */ + if (relocation_addr_valid(loc_new)) + offset = (unsigned long)loc_new - (unsigned long)(&_text); + + if (offset) { + /* Copy the kernel to it's new location */ + memcpy(loc_new, &_text, kernel_length); + + /* Perform relocations on the new kernel */ + res = do_relocations(&_text, loc_new, offset); + if (res < 0) + goto out; + + /* Sync the caches ready for execution of new kernel */ + asm volatile ( + " ibar 0 \n" + " dbar 0 \n"); + + /* + * The original .bss has already been cleared, and + * some variables such as command line parameters + * stored to it so make a copy in the new location. + */ + memcpy(RELOCATED(&__bss_start), &__bss_start, bss_length); + + /* The current thread is now within the relocated image */ + __current_thread_info = RELOCATED(__current_thread_info); + + /* Return the new kernel's entry point */ + kernel_entry = RELOCATED(start_kernel); + + update_kaslr_offset(&__kaslr_offset, offset); + } +out: + return kernel_entry; +} + +/* + * Show relocation information on panic. + */ +void show_kernel_relocation(const char *level) +{ + if (__kaslr_offset > 0) { + printk(level); + pr_cont("Kernel relocated by 0x%pK\n", (void *)__kaslr_offset); + pr_cont(" .text @ 0x%pK\n", _text); + pr_cont(" .data @ 0x%pK\n", _sdata); + pr_cont(" .bss @ 0x%pK\n", __bss_start); + } +} + +static int kernel_location_notifier_fn(struct notifier_block *self, + unsigned long v, void *p) +{ + show_kernel_relocation(KERN_EMERG); + return NOTIFY_DONE; +} + +static struct notifier_block kernel_location_notifier = { + .notifier_call = kernel_location_notifier_fn +}; + +static int __init register_kernel_offset_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &kernel_location_notifier); + return 0; +} +__initcall(register_kernel_offset_dumper); diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index fcb5493232a6..2d98def8ef44 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -81,6 +81,26 @@ SECTIONS .rela.dyn : ALIGN(8) { *(.rela.dyn) *(.rela*) } +#ifdef CONFIG_RELOCATABLE + . = ALIGN(4); + + .data.reloc : { + _relocation_start = .; + /* + * Space for relocation table + * This needs to be filled so that the + * relocs tool can overwrite the content. + * An invalid value is left at the start of the + * section to abort relocation if the table + * has not been filled in. + */ + LONG(0xFFFFFFFF); + FILL(0); + . += CONFIG_RELOCATION_TABLE_SIZE - 4; + _relocation_end = .; + } +#endif + /* * Align to 64K in attempt to eliminate holes before the * .bss..swapper_pg_dir section at the start of .bss. This diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 54ad86d13784..dd700c587f5d 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -108,7 +108,8 @@ static bool is_ignored_symbol(const char *name, char type) /* Symbol names that begin with the following are ignored.*/ static const char * const ignored_prefixes[] = { "$", /* local symbols for ARM, MIPS, etc. */ - ".LASANPC", /* s390 kasan local symbols */ + "L0", /* LoongArch local symbols */ + ".L", /* LoongArch/S390 local symbols */ "__crc_", /* modversions */ "__efistub_", /* arm64 EFI stub namespace */ "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ -- Gitee From 46b45fae16eb61bd4c4df6bc12d54e96f0f4de12 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Fri, 23 Sep 2022 15:44:31 +0800 Subject: [PATCH 098/241] LoongArch: Add kexec support Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the LoongArch architecture, so as to add support for the kexec re-boot mechanism (CONFIG_KEXEC) on LoongArch platforms. Kexec supports loading vmlinux.elf in ELF format and vmlinux.efi in PE format. I tested kexec on LoongArch machines (Loongson-3A5000) and it works as expected: $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline $ sudo kexec -e Change-Id: I6f12a0f1d29fcbdcca6affd32deb3b63c328c741 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 11 ++ arch/loongarch/include/asm/kexec.h | 60 +++++++ arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/machine_kexec.c | 216 ++++++++++++++++++++++++ arch/loongarch/kernel/relocate_kernel.S | 106 ++++++++++++ 5 files changed, 395 insertions(+) create mode 100644 arch/loongarch/include/asm/kexec.h create mode 100644 arch/loongarch/kernel/machine_kexec.c create mode 100644 arch/loongarch/kernel/relocate_kernel.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0d3b9dc93c72..b773b702aecd 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -622,6 +622,17 @@ config NR_CPUS source "kernel/Kconfig.hz" +config KEXEC + bool "Kexec system call" + select KEXEC_CORE + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h new file mode 100644 index 000000000000..cf95cd3eb2de --- /dev/null +++ b/arch/loongarch/include/asm/kexec.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * kexec.h for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_KEXEC_H +#define _ASM_KEXEC_H + +#include +#include + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + /* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +/* Reserve a page for the control code buffer */ +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + if (oldregs) + memcpy(newregs, oldregs, sizeof(*newregs)); + else + prepare_frametrace(newregs); +} + +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + unsigned long efi_boot; + unsigned long cmdline_ptr; + unsigned long systable_ptr; +}; + +typedef void (*do_kexec_t)(unsigned long efi_boot, + unsigned long cmdline_ptr, + unsigned long systable_ptr, + unsigned long start_addr, + unsigned long first_ind_entry); + +struct kimage; +extern const unsigned char relocate_new_kernel[]; +extern const size_t relocate_new_kernel_size; +extern void kexec_reboot(void); + +#ifdef CONFIG_SMP +extern atomic_t kexec_ready_to_reboot; +extern const unsigned char kexec_smp_wait[]; +#endif + +#endif /* !_ASM_KEXEC_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 5329ec5a896b..bec906460189 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -25,6 +25,8 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o + obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c new file mode 100644 index 000000000000..d5037573ed66 --- /dev/null +++ b/arch/loongarch/kernel/machine_kexec.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * machine_kexec.c for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* 0x100000 ~ 0x200000 is safe */ +#define KEXEC_CONTROL_CODE TO_CACHE(0x100000UL) +#define KEXEC_CMDLINE_ADDR TO_CACHE(0x108000UL) + +static unsigned long reboot_code_buffer; + +#ifdef CONFIG_SMP +static void (*relocated_kexec_smp_wait)(void *); +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0); +#endif + +static unsigned long efi_boot; +static unsigned long cmdline_ptr; +static unsigned long systable_ptr; +static unsigned long start_addr; +static unsigned long first_ind_entry; + +static void kexec_image_info(const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("kexec kimage info:\n"); + pr_debug("\ttype: %d\n", kimage->type); + pr_debug("\tstart: %lx\n", kimage->start); + pr_debug("\thead: %lx\n", kimage->head); + pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug("\t segment[%lu]: %016lx - %016lx", i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz); + pr_debug("\t\t0x%lx bytes, %lu pages\n", + (unsigned long)kimage->segment[i].memsz, + (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); + } +} + +int machine_kexec_prepare(struct kimage *kimage) +{ + int i; + char *bootloader = "kexec"; + void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; + + kexec_image_info(kimage); + + kimage->arch.efi_boot = fw_arg0; + kimage->arch.systable_ptr = fw_arg2; + + /* Find the command line */ + for (i = 0; i < kimage->nr_segments; i++) { + if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) { + if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE)) + kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr; + break; + } + } + + if (!kimage->arch.cmdline_ptr) { + pr_err("Command line not included in the provided image\n"); + return -EINVAL; + } + + /* kexec need a safe page to save reboot_code_buffer */ + kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE); + + reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page); + memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); + +#ifdef CONFIG_SMP + /* All secondary cpus now may jump to kexec_smp_wait cycle */ + relocated_kexec_smp_wait = reboot_code_buffer + (void *)(kexec_smp_wait - relocate_new_kernel); +#endif + + return 0; +} + +void machine_kexec_cleanup(struct kimage *kimage) +{ +} + +void kexec_reboot(void) +{ + do_kexec_t do_kexec = NULL; + + /* + * We know we were online, and there will be no incoming IPIs at + * this point. + */ + set_cpu_online(smp_processor_id(), true); + + /* Ensure remote CPUs observe that we're online before rebooting. */ + smp_mb__after_atomic(); + + /* + * Make sure we get correct instructions written by the + * machine_kexec_prepare() CPU. + */ + __asm__ __volatile__ ("\tibar 0\n"::); + +#ifdef CONFIG_SMP + /* All secondary cpus go to kexec_smp_wait */ + if (smp_processor_id() > 0) { + relocated_kexec_smp_wait(NULL); + unreachable(); + } +#endif + + do_kexec = (void *)reboot_code_buffer; + do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); + + unreachable(); +} + + +#ifdef CONFIG_SMP +static void kexec_shutdown_secondary(void *regs) +{ + int cpu = smp_processor_id(); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); +} +#endif + +void machine_shutdown(void) +{ + int cpu; + + /* All CPUs go to reboot_code_buffer */ + for_each_possible_cpu(cpu) + if (!cpu_online(cpu)) + cpu_device_up(get_cpu_device(cpu)); + +#ifdef CONFIG_SMP + smp_call_function(kexec_shutdown_secondary, NULL, 0); +#endif +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ +} + +void machine_kexec(struct kimage *image) +{ + unsigned long entry, *ptr; + struct kimage_arch *internal = &image->arch; + + efi_boot = internal->efi_boot; + cmdline_ptr = internal->cmdline_ptr; + systable_ptr = internal->systable_ptr; + + start_addr = (unsigned long)phys_to_virt(image->start); + + first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK); + + /* + * The generic kexec code builds a page list with physical + * addresses. they are directly accessible through XKPRANGE + * hence the phys_to_virt() call. + */ + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); + ptr = (entry & IND_INDIRECTION) ? + phys_to_virt(entry & PAGE_MASK) : ptr + 1) { + if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION || + *ptr & IND_DESTINATION) + *ptr = (unsigned long) phys_to_virt(*ptr); + } + + /* Mark offline before disabling local irq. */ + set_cpu_online(smp_processor_id(), false); + + /* We do not want to be bothered. */ + local_irq_disable(); + + pr_notice("EFI boot flag 0x%lx\n", efi_boot); + pr_notice("Command line at 0x%lx\n", cmdline_ptr); + pr_notice("System table at 0x%lx\n", systable_ptr); + pr_notice("We will call new kernel at 0x%lx\n", start_addr); + pr_notice("Bye ...\n"); + + /* Make reboot code buffer available to the boot CPU. */ + flush_cache_all(); + +#ifdef CONFIG_SMP + atomic_set(&kexec_ready_to_reboot, 1); +#endif + + kexec_reboot(); +} diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S new file mode 100644 index 000000000000..a271b5c2f5ae --- /dev/null +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * relocate_kernel.S for kexec + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ + +#include + +#include +#include +#include +#include +#include +#include + +SYM_CODE_START(relocate_new_kernel) + /* + * a0: EFI boot flag for the new kernel + * a1: Command line pointer for the new kernel + * a2: System table pointer for the new kernel + * a3: Start address to jump to after relocation + * a4: Pointer to the current indirection page entry + */ + move s0, a4 + +process_entry: + PTR_L s1, s0, 0 + PTR_ADDI s0, s0, SZREG + + /* destination page */ + andi s2, s1, IND_DESTINATION + beqz s2, 1f + li.w t0, ~0x1 + and s3, s1, t0 /* store destination addr in s3 */ + b process_entry + +1: + /* indirection page, update s0 */ + andi s2, s1, IND_INDIRECTION + beqz s2, 1f + li.w t0, ~0x2 + and s0, s1, t0 + b process_entry + +1: + /* done page */ + andi s2, s1, IND_DONE + beqz s2, 1f + b done + +1: + /* source page */ + andi s2, s1, IND_SOURCE + beqz s2, process_entry + li.w t0, ~0x8 + and s1, s1, t0 + li.w s5, (1 << _PAGE_SHIFT) / SZREG + +copy_word: + /* copy page word by word */ + REG_L s4, s1, 0 + REG_S s4, s3, 0 + PTR_ADDI s3, s3, SZREG + PTR_ADDI s1, s1, SZREG + LONG_ADDI s5, s5, -1 + beqz s5, process_entry + b copy_word + b process_entry + +done: + ibar 0 + dbar 0 + + /* + * Jump to the new kernel, + * make sure the values of a0, a1, a2 and a3 are not changed. + */ + jr a3 +SYM_CODE_END(relocate_new_kernel) + +#ifdef CONFIG_SMP +/* + * Other CPUs should wait until code is relocated and + * then start at the entry point from LOONGARCH_IOCSR_MBUF0. + */ +SYM_CODE_START(kexec_smp_wait) +1: li.w t0, 0x100 /* wait for init loop */ +2: addi.w t0, t0, -1 /* limit mailbox access */ + bnez t0, 2b + li.w t1, LOONGARCH_IOCSR_MBUF0 + iocsrrd.w s0, t1 /* check PC as an indicator */ + beqz s0, 1b + iocsrrd.d s0, t1 /* get PC via mailbox */ + + li.d t0, CACHE_BASE + or s0, s0, t0 /* s0 = TO_CACHE(s0) */ + jr s0 /* jump to initial PC */ +SYM_CODE_END(kexec_smp_wait) +#endif + +relocate_new_kernel_end: + +SYM_DATA_START(relocate_new_kernel_size) + PTR relocate_new_kernel_end - relocate_new_kernel +SYM_DATA_END(relocate_new_kernel_size) -- Gitee From 737fd3611a172c909a0ac3cc6384dbfae03a69e4 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Fri, 23 Sep 2022 15:44:32 +0800 Subject: [PATCH 099/241] LoongArch: Add kdump support This patch adds support for kdump. In kdump case the normal kernel will reserve a region for the crash kernel and jump there on panic. Arch-specific functions are added to allow for implementing a crash dump file interface, /proc/vmcore, which can be viewed as a ELF file. A user-space tool, such as kexec-tools, is responsible for allocating a separate region for the core's ELF header within the crash kdump kernel memory and filling it in when executing kexec_load(). Then, its location will be advertised to the crash dump kernel via a command line argument "elfcorehdr=", and the crash dump kernel will preserve this region for later use with arch_reserve_vmcore() at boot time. At the same time, the crash kdump kernel is also limited within the "crashkernel" area via a command line argument "mem=", so as not to destroy the original kernel dump data. In the crash dump kernel environment, /proc/vmcore is used to access the primary kernel's memory with copy_oldmem_page(). I tested kdump on LoongArch machines (Loongson-3A5000) and it works as expected (suggested crashkernel parameter is "crashkernel=512M@2560M"), you may test it by triggering a crash through /proc/sysrq-trigger: $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1" # echo c > /proc/sysrq-trigger Change-Id: Ia458b82125cf87ce5f23ddb2ca5080d6c1987aed Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 22 ++++++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/crash_dump.c | 42 +++++++++++ arch/loongarch/kernel/machine_kexec.c | 98 +++++++++++++++++++++++-- arch/loongarch/kernel/relocate_kernel.S | 6 ++ arch/loongarch/kernel/setup.c | 71 ++++++++++++++++++ arch/loongarch/kernel/traps.c | 4 + arch/loongarch/loongson64/init.c | 5 ++ arch/loongarch/loongson64/mem.c | 3 - 9 files changed, 244 insertions(+), 8 deletions(-) create mode 100644 arch/loongarch/kernel/crash_dump.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b773b702aecd..bd3b83a5bfef 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -633,6 +633,28 @@ config KEXEC The name comes from the similarity to the exec system call. +config CRASH_DUMP + bool "Build kdump crash kernel" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. + + For more details see Documentation/admin-guide/kdump/kdump.rst + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" + default "0x90000000a0000000" + depends on CRASH_DUMP + help + This gives the XKPRANGE address where the kernel is loaded. + If you plan to use kernel for capturing the crash dump change + this value to start of the reserved region (the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel). + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index bec906460189..21587bd8f4c1 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c new file mode 100644 index 000000000000..7aac7bb7d5af --- /dev/null +++ b/arch/loongarch/kernel/crash_dump.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +/* + * copy_oldmem_page - copy one page from "oldmem" + * @pfn: page frame number to be copied + * @buf: target memory address for the copy; this can be in kernel address + * space or user address space (see @userbuf) + * @csize: number of bytes to copy + * @offset: offset in bytes into the page (based on pfn) to begin the copy + * @userbuf: if set, @buf is in user address space, use copy_to_user(), + * otherwise @buf is in kernel address space, use memcpy(). + * + * Copy a page from "oldmem". For this page, there is no pte mapped + * in the current kernel. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + if (!userbuf) { + memcpy(buf, vaddr + offset, csize); + } else { + if (copy_to_user(buf, vaddr + offset, csize)) { + memunmap(vaddr); + csize = -EFAULT; + } + } + + memunmap(vaddr); + + return csize; +} diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index d5037573ed66..2dcb9e003657 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -7,10 +7,15 @@ #include #include #include -#include +#include #include +#include #include +#include #include +#include +#include +#include #include #include @@ -21,6 +26,7 @@ #define KEXEC_CMDLINE_ADDR TO_CACHE(0x108000UL) static unsigned long reboot_code_buffer; +static cpumask_t cpus_in_crash = CPU_MASK_NONE; #ifdef CONFIG_SMP static void (*relocated_kexec_smp_wait)(void *); @@ -78,7 +84,7 @@ int machine_kexec_prepare(struct kimage *kimage) return -EINVAL; } - /* kexec need a safe page to save reboot_code_buffer */ + /* kexec/kdump need a safe page to save reboot_code_buffer */ kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE); reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page); @@ -102,7 +108,8 @@ void kexec_reboot(void) /* * We know we were online, and there will be no incoming IPIs at - * this point. + * this point. Mark online again before rebooting so that the crash + * analysis tool will see us correctly. */ set_cpu_online(smp_processor_id(), true); @@ -147,7 +154,74 @@ static void kexec_shutdown_secondary(void *regs) kexec_reboot(); } -#endif + +static void crash_shutdown_secondary(void *passed_regs) +{ + int cpu = smp_processor_id(); + struct pt_regs *regs = passed_regs; + + /* + * If we are passed registers, use those. Otherwise get the + * regs from the last interrupt, which should be correct, as + * we are in an interrupt. But if the regs are not there, + * pull them from the top of the stack. They are probably + * wrong, but we need something to keep from crashing again. + */ + if (!regs) + regs = get_irq_regs(); + if (!regs) + regs = task_pt_regs(current); + + if (!cpu_online(cpu)) + return; + + /* We won't be sent IPIs any more. */ + set_cpu_online(cpu, false); + + local_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_in_crash); + + while (!atomic_read(&kexec_ready_to_reboot)) + cpu_relax(); + + kexec_reboot(); +} + +void crash_smp_send_stop(void) +{ + unsigned int ncpus; + unsigned long timeout; + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we should execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* Excluding the panic cpu */ + ncpus = num_online_cpus() - 1; + + smp_call_function(crash_shutdown_secondary, NULL, 0); + smp_wmb(); + + /* + * The crash CPU sends an IPI and wait for other CPUs to + * respond. Delay of at least 10 seconds. + */ + timeout = MSEC_PER_SEC * 10; + pr_emerg("Sending IPI to other cpus...\n"); + while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) { + mdelay(1); + cpu_relax(); + } +} +#endif /* defined(CONFIG_SMP) */ void machine_shutdown(void) { @@ -165,6 +239,19 @@ void machine_shutdown(void) void machine_crash_shutdown(struct pt_regs *regs) { + int crashing_cpu; + + local_irq_disable(); + + crashing_cpu = smp_processor_id(); + crash_save_cpu(regs, crashing_cpu); + +#ifdef CONFIG_SMP + crash_smp_send_stop(); +#endif + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + pr_info("Starting crashdump kernel...\n"); } void machine_kexec(struct kimage *image) @@ -178,7 +265,8 @@ void machine_kexec(struct kimage *image) start_addr = (unsigned long)phys_to_virt(image->start); - first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK); + first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ? + (unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0; /* * The generic kexec code builds a page list with physical diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index a271b5c2f5ae..2bbcacb0ea92 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -24,6 +24,12 @@ SYM_CODE_START(relocate_new_kernel) */ move s0, a4 + /* + * In case of a kdump/crash kernel, the indirection page is not + * populated as the kernel is directly copied to a reserved location + */ + beqz s0, done + process_entry: PTR_L s1, s0, 0 PTR_ADDI s0, s0, SZREG diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 9dbbd288472a..24ad67d6b8af 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -113,6 +115,65 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); +void __init arch_reserve_vmcore(void) +{ +#ifdef CONFIG_PROC_VMCORE + u64 i; + phys_addr_t start, end; + + if (!is_kdump_kernel()) + return; + + if (!elfcorehdr_size) { + for_each_mem_range(i, &start, &end) { + if (elfcorehdr_addr >= start && elfcorehdr_addr < end) { + /* + * Reserve from the elf core header to the end of + * the memory segment, that should all be kdump + * reserved memory. + */ + elfcorehdr_size = end - elfcorehdr_addr; + break; + } + } + } + + if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) { + pr_warn("elfcorehdr is overlapped\n"); + return; + } + + memblock_reserve(elfcorehdr_addr, elfcorehdr_size); + + pr_info("Reserving %llu KiB of memory at 0x%llx for elfcorehdr\n", + elfcorehdr_size >> 10, elfcorehdr_addr); +#endif +} + +void __init arch_parse_crashkernel(void) +{ +#ifdef CONFIG_KEXEC + int ret; + unsigned long long start; + unsigned long long total_mem; + unsigned long long crash_base, crash_size; + + total_mem = memblock_phys_mem_size(); + ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); + if (ret < 0 || crash_size <= 0) + return; + + start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size); + if (start != crash_base) { + pr_warn("Invalid memory region reserved for crash kernel\n"); + return; + } + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +#endif +} + static void __init check_kernel_sections_mem(void) { phys_addr_t start = __pa_symbol(&_text); @@ -200,6 +261,15 @@ static void __init resource_init(void) request_resource(res, &data_resource); request_resource(res, &bss_resource); } + +#ifdef CONFIG_KEXEC + if (crashk_res.start < crashk_res.end) { + insert_resource(&iomem_resource, &crashk_res); + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n", + (unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20), + (unsigned long)(crashk_res.start >> 20)); + } +#endif } static int __init reserve_memblock_reserved_regions(void) @@ -260,6 +330,7 @@ void __init setup_arch(char **cmdline_p) early_init(); parse_early_param(); + reserve_initrd_mem(); platform_init(); pagetable_init(); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 1c247b1fac1d..8413ccc37f6f 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,9 @@ void __noreturn die(const char *str, struct pt_regs *regs) oops_exit(); + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + if (in_interrupt()) panic("Fatal exception in interrupt"); diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index 13778d3dd1b6..a8a1276c06cb 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -27,6 +27,8 @@ struct loongson_board_info b_info; static const char dmi_empty_string[] = " "; +extern void __init arch_reserve_vmcore(void); +extern void __init arch_parse_crashkernel(void); static const char *dmi_string_parse(const struct dmi_header *dm, u8 s) { @@ -114,6 +116,9 @@ void __init early_init(void) void __init platform_init(void) { efi_init(); + arch_reserve_vmcore(); + arch_parse_crashkernel(); + #ifdef CONFIG_ACPI_TABLE_UPGRADE acpi_table_upgrade(); #endif diff --git a/arch/loongarch/loongson64/mem.c b/arch/loongarch/loongson64/mem.c index c3be7db59000..35a23146252e 100644 --- a/arch/loongarch/loongson64/mem.c +++ b/arch/loongarch/loongson64/mem.c @@ -59,7 +59,4 @@ void __init memblock_init(void) /* Reserve the kernel text/data/bss */ memblock_reserve(__pa_symbol(&_text), __pa_symbol(&_end) - __pa_symbol(&_text)); - - /* Reserve the initrd */ - reserve_initrd_mem(); } -- Gitee From 35ee6cdf46b7bf126955ad7261eb938f6b76d8f9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 100/241] LoongArch: Add perf events support Change-Id: Ia0c51adf319e72535a187ebc1fa6f9f8e5735fd6 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/uapi/asm/perf_regs.h | 40 + arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/perf_event.c | 900 ++++++++++++++++++++ arch/loongarch/kernel/perf_regs.c | 57 ++ 5 files changed, 1001 insertions(+) create mode 100644 arch/loongarch/include/uapi/asm/perf_regs.h create mode 100644 arch/loongarch/kernel/perf_event.c create mode 100644 arch/loongarch/kernel/perf_regs.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bd3b83a5bfef..0ff42272ec8e 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -91,6 +91,8 @@ config LOONGARCH select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/loongarch/include/uapi/asm/perf_regs.h b/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..29d69c00fc7a --- /dev/null +++ b/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 21587bd8f4c1..0667ffb2007e 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -31,4 +31,6 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c new file mode 100644 index 000000000000..ff3c35d0c27d --- /dev/null +++ b/arch/loongarch/kernel/perf_event.c @@ -0,0 +1,900 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux performance counter support for LoongArch. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include /* For perf_irq */ +#include + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long +user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) +{ + unsigned long err; + unsigned long __user *user_frame_tail; + struct stack_frame buftail; + + user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame)); + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail)); + pagefault_enable(); + + if (err || (unsigned long)user_frame_tail >= buftail.fp) + return 0; + + perf_callchain_store(entry, buftail.ra); + + return buftail.fp; +} + +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->csr_era); + + fp = regs->regs[22]; + + while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf)) + fp = user_backtrace(entry, fp); +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct unwind_state state; + unsigned long addr; + + for (unwind_start(&state, current, regs); + !unwind_done(&state); unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } +} + +#define LOONGARCH_MAX_HWEVENTS 32 + +struct cpu_hw_events { + /* Array of events on this cpu. */ + struct perf_event *events[LOONGARCH_MAX_HWEVENTS]; + + /* + * Set the bit (indexed by the counter number) when the counter + * is used for an event. + */ + unsigned long used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)]; + + /* + * Software copy of the control register for each performance counter. + */ + unsigned int saved_ctrl[LOONGARCH_MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .saved_ctrl = {0}, +}; + +/* The description of LoongArch performance events. */ +struct loongarch_perf_event { + unsigned int event_id; +}; + +static struct loongarch_perf_event raw_event; +static DEFINE_MUTEX(raw_event_mutex); + +#define C(x) PERF_COUNT_HW_CACHE_##x +#define HW_OP_UNSUPPORTED 0xffffffff +#define CACHE_OP_UNSUPPORTED 0xffffffff + +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED} + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED}, \ + }, \ +} + +struct loongarch_pmu { + u64 max_period; + u64 valid_count; + u64 overflow; + const char *name; + unsigned int num_counters; + u64 (*read_counter)(unsigned int idx); + void (*write_counter)(unsigned int idx, u64 val); + const struct loongarch_perf_event *(*map_raw_event)(u64 config); + const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX]; + const struct loongarch_perf_event (*cache_event_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +}; + +static struct loongarch_pmu loongarch_pmu; + +#define M_PERFCTL_EVENT(event) (event & CSR_PERFCTRL_EVENT) + +#define M_PERFCTL_COUNT_EVENT_WHENEVER (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3 | \ + CSR_PERFCTRL_IE) + +#define M_PERFCTL_CONFIG_MASK 0x1f0000 + +static void pause_local_counters(void); +static void resume_local_counters(void); + +static u64 loongarch_pmu_read_counter(unsigned int idx) +{ + u64 val = -1; + + switch (idx) { + case 0: + val = read_csr_perfcntr0(); + break; + case 1: + val = read_csr_perfcntr1(); + break; + case 2: + val = read_csr_perfcntr2(); + break; + case 3: + val = read_csr_perfcntr3(); + break; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return 0; + } + + return val; +} + +static void loongarch_pmu_write_counter(unsigned int idx, u64 val) +{ + switch (idx) { + case 0: + write_csr_perfcntr0(val); + return; + case 1: + write_csr_perfcntr1(val); + return; + case 2: + write_csr_perfcntr2(val); + return; + case 3: + write_csr_perfcntr3(val); + return; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return; + } +} + +static unsigned int loongarch_pmu_read_control(unsigned int idx) +{ + unsigned int val = -1; + + switch (idx) { + case 0: + val = read_csr_perfctrl0(); + break; + case 1: + val = read_csr_perfctrl1(); + break; + case 2: + val = read_csr_perfctrl2(); + break; + case 3: + val = read_csr_perfctrl3(); + break; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return 0; + } + + return val; +} + +static void loongarch_pmu_write_control(unsigned int idx, unsigned int val) +{ + switch (idx) { + case 0: + write_csr_perfctrl0(val); + return; + case 1: + write_csr_perfctrl1(val); + return; + case 2: + write_csr_perfctrl2(val); + return; + case 3: + write_csr_perfctrl3(val); + return; + default: + WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); + return; + } +} + +static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) +{ + int i; + + for (i = 0; i < loongarch_pmu.num_counters; i++) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + + return -EAGAIN; +} + +static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) +{ + unsigned int cpu; + struct perf_event *event = container_of(evt, struct perf_event, hw); + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + /* Make sure interrupt enabled. */ + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; + + cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); + + /* + * We do not actually let the counter run. Leave it until start(). + */ + pr_debug("Enabling perf counter for CPU%d\n", cpu); +} + +static void loongarch_pmu_disable_event(int idx) +{ + unsigned long flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + local_irq_save(flags); + cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) & + ~M_PERFCTL_COUNT_EVENT_WHENEVER; + loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]); + local_irq_restore(flags); +} + +static int loongarch_pmu_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + int ret = 0; + u64 left = local64_read(&hwc->period_left); + u64 period = hwc->sample_period; + + if (unlikely((left + period) & (1ULL << 63))) { + /* left underflowed by more than period. */ + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } else if (unlikely((left + period) <= period)) { + /* left underflowed by less than period. */ + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > loongarch_pmu.max_period) { + left = loongarch_pmu.max_period; + local64_set(&hwc->period_left, left); + } + + local64_set(&hwc->prev_count, loongarch_pmu.overflow - left); + + loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left); + + perf_event_update_userpage(event); + + return ret; +} + +static void loongarch_pmu_event_update(struct perf_event *event, + struct hw_perf_event *hwc, + int idx) +{ + u64 prev_raw_count, new_raw_count; + u64 delta; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = loongarch_pmu.read_counter(idx); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = new_raw_count - prev_raw_count; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); +} + +static void loongarch_pmu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Set the period for the event. */ + loongarch_pmu_event_set_period(event, hwc, hwc->idx); + + /* Enable the event. */ + loongarch_pmu_enable_event(hwc, hwc->idx); +} + +static void loongarch_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + /* We are working on a local event. */ + loongarch_pmu_disable_event(hwc->idx); + barrier(); + loongarch_pmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static int loongarch_pmu_add(struct perf_event *event, int flags) +{ + int idx, err = 0; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + perf_pmu_disable(event->pmu); + + /* To look for a free counter for this event. */ + idx = loongarch_pmu_alloc_counter(cpuc, hwc); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then + * make sure it is disabled. + */ + event->hw.idx = idx; + loongarch_pmu_disable_event(idx); + cpuc->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + loongarch_pmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static void loongarch_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); + + loongarch_pmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void loongarch_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; + + loongarch_pmu_event_update(event, hwc, hwc->idx); +} + +static void loongarch_pmu_enable(struct pmu *pmu) +{ + resume_local_counters(); +} + +static void loongarch_pmu_disable(struct pmu *pmu) +{ + pause_local_counters(); +} + +static DEFINE_MUTEX(pmu_reserve_mutex); +static atomic_t active_events = ATOMIC_INIT(0); + +static void reset_counters(void *arg); +static int __hw_perf_event_init(struct perf_event *event); + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { + on_each_cpu(reset_counters, NULL, 1); + free_irq(get_pmc_irq(), &loongarch_pmu); + mutex_unlock(&pmu_reserve_mutex); + } +} + +static void handle_associated_event(struct cpu_hw_events *cpuc, int idx, + struct perf_sample_data *data, struct pt_regs *regs) +{ + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc = &event->hw; + + loongarch_pmu_event_update(event, hwc, idx); + data->period = event->hw.last_period; + if (!loongarch_pmu_event_set_period(event, hwc, idx)) + return; + + if (perf_event_overflow(event, data, regs)) + loongarch_pmu_disable_event(idx); +} + +static irqreturn_t pmu_handle_irq(int irq, void *dev) +{ + int n; + int handled = IRQ_NONE; + uint64_t counter; + struct pt_regs *regs; + struct perf_sample_data data; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * First we pause the local counters, so that when we are locked + * here, the counters are all paused. When it gets locked due to + * perf_disable(), the timer interrupt handler will be delayed. + * + * See also loongarch_pmu_start(). + */ + pause_local_counters(); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0, 0); + + for (n = 0; n < loongarch_pmu.num_counters; n++) { + if (test_bit(n, cpuc->used_mask)) { + counter = loongarch_pmu.read_counter(n); + if (counter & loongarch_pmu.overflow) { + handle_associated_event(cpuc, n, &data, regs); + handled = IRQ_HANDLED; + } + } + } + + resume_local_counters(); + + /* + * Do all the work for the pending perf events. We can do this + * in here because the performance counter interrupt is a regular + * interrupt, not NMI. + */ + if (handled == IRQ_HANDLED) + irq_work_run(); + + return handled; +} + +static int loongarch_pmu_event_init(struct perf_event *event) +{ + int r, irq; + unsigned long flags; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + /* Init it to avoid false validate_group */ + event->hw.event_base = 0xffffffff; + return -ENOENT; + } + + if (event->cpu >= 0 && !cpu_online(event->cpu)) + return -ENODEV; + + irq = get_pmc_irq(); + flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED; + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmu_reserve_mutex); + if (atomic_read(&active_events) == 0) { + r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu); + if (r < 0) { + mutex_unlock(&pmu_reserve_mutex); + pr_warn("PMU IRQ request failed\n"); + return -ENODEV; + } + } + atomic_inc(&active_events); + mutex_unlock(&pmu_reserve_mutex); + } + + return __hw_perf_event_init(event); +} + +static struct pmu pmu = { + .pmu_enable = loongarch_pmu_enable, + .pmu_disable = loongarch_pmu_disable, + .event_init = loongarch_pmu_event_init, + .add = loongarch_pmu_add, + .del = loongarch_pmu_del, + .start = loongarch_pmu_start, + .stop = loongarch_pmu_stop, + .read = loongarch_pmu_read, +}; + +static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) +{ + return (pev->event_id & 0xff); +} + +static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) +{ + const struct loongarch_perf_event *pev; + + pev = &(*loongarch_pmu.general_event_map)[idx]; + + if (pev->event_id == HW_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + return pev; +} + +static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config) +{ + unsigned int cache_type, cache_op, cache_result; + const struct loongarch_perf_event *pev; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return ERR_PTR(-EINVAL); + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return ERR_PTR(-EINVAL); + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return ERR_PTR(-EINVAL); + + pev = &((*loongarch_pmu.cache_event_map) + [cache_type] + [cache_op] + [cache_result]); + + if (pev->event_id == CACHE_OP_UNSUPPORTED) + return ERR_PTR(-ENOENT); + + return pev; +} + +static int validate_group(struct perf_event *event) +{ + struct cpu_hw_events fake_cpuc; + struct perf_event *sibling, *leader = event->group_leader; + + memset(&fake_cpuc, 0, sizeof(fake_cpuc)); + + if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0) + return -EINVAL; + } + + if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0) + return -EINVAL; + + return 0; +} + +static void reset_counters(void *arg) +{ + int n; + int counters = loongarch_pmu.num_counters; + + for (n = 0; n < counters; n++) { + loongarch_pmu_write_control(n, 0); + loongarch_pmu.write_counter(n, 0); + } +} + +static const struct loongarch_perf_event loongson_new_event_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00 }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x09 }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 }, +}; + +static const struct loongarch_perf_event loongson_new_cache_map + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +PERF_CACHE_MAP_ALL_UNSUPPORTED, +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x8 }, + [C(RESULT_MISS)] = { 0x9 }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x8 }, + [C(RESULT_MISS)] = { 0x9 }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0xaa }, + [C(RESULT_MISS)] = { 0xa9 }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x6 }, + [C(RESULT_MISS)] = { 0x7 }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0xc }, + [C(RESULT_MISS)] = { 0xd }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0xc }, + [C(RESULT_MISS)] = { 0xd }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x3b }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x4 }, + [C(RESULT_MISS)] = { 0x3c }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x4 }, + [C(RESULT_MISS)] = { 0x3c }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02 }, + [C(RESULT_MISS)] = { 0x03 }, + }, +}, +}; + +static int __hw_perf_event_init(struct perf_event *event) +{ + int err; + struct hw_perf_event *hwc = &event->hw; + struct perf_event_attr *attr = &event->attr; + const struct loongarch_perf_event *pev; + + /* Returning LoongArch event descriptor for generic perf event. */ + if (PERF_TYPE_HARDWARE == event->attr.type) { + if (event->attr.config >= PERF_COUNT_HW_MAX) + return -EINVAL; + pev = loongarch_pmu_map_general_event(event->attr.config); + } else if (PERF_TYPE_HW_CACHE == event->attr.type) { + pev = loongarch_pmu_map_cache_event(event->attr.config); + } else if (PERF_TYPE_RAW == event->attr.type) { + /* We are working on the global raw event. */ + mutex_lock(&raw_event_mutex); + pev = loongarch_pmu.map_raw_event(event->attr.config); + } else { + /* The event type is not (yet) supported. */ + return -EOPNOTSUPP; + } + + if (IS_ERR(pev)) { + if (PERF_TYPE_RAW == event->attr.type) + mutex_unlock(&raw_event_mutex); + return PTR_ERR(pev); + } + + /* + * We allow max flexibility on how each individual counter shared + * by the single CPU operates (the mode exclusion and the range). + */ + hwc->config_base = CSR_PERFCTRL_IE; + + hwc->event_base = loongarch_pmu_perf_event_encode(pev); + if (PERF_TYPE_RAW == event->attr.type) + mutex_unlock(&raw_event_mutex); + + if (!attr->exclude_user) { + hwc->config_base |= CSR_PERFCTRL_PLV3; + hwc->config_base |= CSR_PERFCTRL_PLV2; + } + if (!attr->exclude_kernel) { + hwc->config_base |= CSR_PERFCTRL_PLV0; + } + if (!attr->exclude_hv) { + hwc->config_base |= CSR_PERFCTRL_PLV1; + } + + hwc->config_base &= M_PERFCTL_CONFIG_MASK; + /* + * The event can belong to another cpu. We do not assign a local + * counter for it for now. + */ + hwc->idx = -1; + hwc->config = 0; + + if (!hwc->sample_period) { + hwc->sample_period = loongarch_pmu.max_period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + err = 0; + if (event->group_leader != event) + err = validate_group(event); + + event->destroy = hw_perf_event_destroy; + + if (err) + event->destroy(event); + + return err; +} + +static void pause_local_counters(void) +{ + unsigned long flags; + int ctr = loongarch_pmu.num_counters; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + local_irq_save(flags); + do { + ctr--; + cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr); + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] & + ~M_PERFCTL_COUNT_EVENT_WHENEVER); + } while (ctr > 0); + local_irq_restore(flags); +} + +static void resume_local_counters(void) +{ + int ctr = loongarch_pmu.num_counters; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + do { + ctr--; + loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]); + } while (ctr > 0); +} + +static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) +{ + /* currently most cores have 7-bit event numbers */ + unsigned int raw_id = config & 0xff; + unsigned int base_id = raw_id & 0x7f; + + switch (current_cpu_data.cputype) { + case CPU_LOONGSON64: + base_id = raw_id; + break; + default: + panic("Unkown cpu type '0x%x\n", current_cpu_data.cputype); + } + + raw_event.event_id = base_id; + + return &raw_event; +} + +static int __init init_hw_perf_events(void) +{ + int counters; + + if (!cpu_has_pmp) + return -ENODEV; + + pr_info("Performance counters: "); + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; + + loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event; + + switch (current_cpu_data.cputype) { + case CPU_LOONGSON64: + loongarch_pmu.name = "loongarch/loongson64"; + loongarch_pmu.general_event_map = &loongson_new_event_map; + loongarch_pmu.cache_event_map = &loongson_new_cache_map; + break; + default: + pr_cont("Either hardware does not support performance " + "counters, or not yet implemented.\n"); + return -ENODEV; + } + + loongarch_pmu.num_counters = counters; + loongarch_pmu.max_period = (1ULL << 63) - 1; + loongarch_pmu.valid_count = (1ULL << 63) - 1; + loongarch_pmu.overflow = 1ULL << 63; + loongarch_pmu.read_counter = loongarch_pmu_read_counter; + loongarch_pmu.write_counter = loongarch_pmu_write_counter; + + on_each_cpu(reset_counters, NULL, 1); + + pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", + loongarch_pmu.name, counters, 64); + + perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); + + return 0; +} +early_initcall(init_hw_perf_events); diff --git a/arch/loongarch/kernel/perf_regs.c b/arch/loongarch/kernel/perf_regs.c new file mode 100644 index 000000000000..b13b1a4475b4 --- /dev/null +++ b/arch/loongarch/kernel/perf_regs.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Some parts derived from x86 version of this file. + * + * Copyright (C) 2013 Cavium, Inc. + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ + +#include + +#include + +#ifdef CONFIG_32BIT +u64 perf_reg_abi(struct task_struct *tsk) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* Must be CONFIG_64BIT */ +u64 perf_reg_abi(struct task_struct *tsk) +{ + if (test_tsk_thread_flag(tsk, TIF_32BIT_REGS)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_32BIT */ + +int perf_reg_validate(u64 mask) +{ + if (!mask) + return -EINVAL; + if (mask & ~((1ull << PERF_REG_LOONGARCH_MAX) - 1)) + return -EINVAL; + return 0; +} + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_LOONGARCH_MAX)) + return 0; + + if ((u32)idx == PERF_REG_LOONGARCH_PC) + return regs->csr_era; + + return regs->regs[idx]; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} -- Gitee From 38ff3efde2996250b6adfabcf355c857da5fd0a3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 101/241] LoongArch: Add hardware watchpoint support Change-Id: Ie9693bc9f97bdc7bc93eb8476ed6300121af3160 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 + arch/loongarch/include/asm/processor.h | 18 + arch/loongarch/include/asm/ptrace.h | 2 + arch/loongarch/include/asm/switch_to.h | 2 + arch/loongarch/include/asm/watch.h | 39 ++ arch/loongarch/include/uapi/asm/ptrace.h | 52 +++ arch/loongarch/kernel/Makefile | 1 + arch/loongarch/kernel/cpu-probe.c | 3 + arch/loongarch/kernel/ptrace.c | 124 +++++ arch/loongarch/kernel/traps.c | 36 +- arch/loongarch/kernel/watch.c | 564 +++++++++++++++++++++++ 11 files changed, 843 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/watch.h create mode 100644 arch/loongarch/kernel/watch.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0ff42272ec8e..6be6512cf4f2 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -222,6 +222,9 @@ config CPU_SUPPORTS_32BIT_KERNEL bool config CPU_SUPPORTS_64BIT_KERNEL bool +config HARDWARE_WATCHPOINTS + bool + default y menu "Kernel type and options" diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 0ec57de98a4d..23e563cc47ac 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -88,6 +88,14 @@ struct loongarch_fpu { {0,} \ } +struct loongarch_watch_reg_state { + unsigned long addr[NUM_WATCH_REGS]; + unsigned long mask[NUM_WATCH_REGS]; + unsigned char irw[NUM_WATCH_REGS]; + unsigned char irwstat[NUM_WATCH_REGS]; + unsigned char irwmask[NUM_WATCH_REGS]; +}; + #define ARCH_MIN_TASKALIGN 32 struct loongarch_vdso_info; @@ -121,6 +129,12 @@ struct thread_struct { /* Eflags register */ unsigned long eflags; + /* Used by ptrace single_step */ + unsigned long single_step; + + /* Watch register state, if available. */ + struct loongarch_watch_reg_state watch; + /* Other stuff associated with the thread. */ unsigned long trap_nr; unsigned long error_code; @@ -159,6 +173,10 @@ struct thread_struct { .csr_euen = 0, \ .csr_ecfg = 0, \ .csr_badvaddr = 0, \ + /* \ + * Saved watch register stuff \ + */ \ + .watch = {{0,},}, \ /* \ * Other stuff associated with the process \ */ \ diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index b394d9a74157..50991a55d77e 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -153,4 +153,6 @@ static inline void user_stack_pointer_set(struct pt_regs *regs, regs->regs[3] = val; } +#define arch_has_single_step() (1) + #endif /* _ASM_PTRACE_H */ diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index 0987ac30475b..44793ed2ef3e 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -7,6 +7,7 @@ #include #include +#include struct task_struct; @@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ + __process_watch(prev, next); \ (last) = __switch_to(prev, next, task_thread_info(next), \ __builtin_return_address(0), __builtin_frame_address(0)); \ } while (0) diff --git a/arch/loongarch/include/asm/watch.h b/arch/loongarch/include/asm/watch.h new file mode 100644 index 000000000000..ae68e441eded --- /dev/null +++ b/arch/loongarch/include/asm/watch.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Chong Qiao + * Huacai Chen + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#ifndef _ASM_WATCH_H +#define _ASM_WATCH_H + +#include + +#include + +unsigned long watch_csrrd(unsigned int reg); +void watch_csrwr(unsigned long val, unsigned int reg); + +void loongarch_probe_watch_registers(struct cpuinfo_loongarch *c); +void loongarch_clear_watch_registers(void); +void loongarch_install_watch_registers(struct task_struct *t); +void loongarch_clear_prev_watch_registers(struct task_struct *prev); +void loongarch_install_next_watch_registers(struct task_struct *next); +void loongarch_read_watch_registers(struct pt_regs *regs); +void loongarch_update_watch_registers(struct task_struct *t); + +#ifdef CONFIG_HARDWARE_WATCHPOINTS +#define __process_watch(prev, next) do { \ + if (test_bit(TIF_LOAD_WATCH, &task_thread_info(prev)->flags) \ + || test_bit(TIF_SINGLESTEP, &task_thread_info(prev)->flags)) \ + loongarch_clear_prev_watch_registers(prev); \ + if (test_bit(TIF_LOAD_WATCH, &task_thread_info(prev)->flags) \ + || test_bit(TIF_SINGLESTEP, &task_thread_info(prev)->flags)) \ + loongarch_install_next_watch_registers(next); \ +} while (0) +#else +#define __process_watch(prev, next) do {} while (0) +#endif + +#endif /* _ASM_WATCH_H */ diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index 17250e696421..b48bf163f5a9 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -56,7 +56,59 @@ struct user_lasx_state { uint64_t vregs[32*4]; }; +/* Read and write watchpoint registers. */ +#define NUM_WATCH_REGS 16 + +enum pt_watch_style { + pt_watch_style_la32, + pt_watch_style_la64 +}; + +struct la32_watch_regs { + uint32_t addr; + uint32_t mask; + /* irw/irwsta/irwmask I R W bits. + * bit 0 -- 1 if W bit is usable. + * bit 1 -- 1 if R bit is usable. + * bit 2 -- 1 if I bit is usable. + */ + uint8_t irw; + uint8_t irwstat; + uint8_t irwmask; +} __attribute__((aligned(8))); + +struct la64_watch_regs { + uint64_t addr; + uint64_t mask; + /* irw/irwsta/irwmask I R W bits. + * bit 0 -- 1 if W bit is usable. + * bit 1 -- 1 if R bit is usable. + * bit 2 -- 1 if I bit is usable. + */ + uint8_t irw; + uint8_t irwstat; + uint8_t irwmask; +} __attribute__((aligned(8))); + +struct pt_watch_regs { + int16_t max_valid; + int16_t num_valid; + enum pt_watch_style style; + union { + struct la32_watch_regs la32[NUM_WATCH_REGS]; + struct la64_watch_regs la64[NUM_WATCH_REGS]; + }; +}; + #define PTRACE_SYSEMU 0x1f #define PTRACE_SYSEMU_SINGLESTEP 0x20 +#define PTRACE_GET_WATCH_REGS 0xd0 +#define PTRACE_SET_WATCH_REGS 0xd1 + +/* Watch irw/irwmask/irwstat bit definitions */ +#define LA_WATCH_W (1 << 0) +#define LA_WATCH_R (1 << 1) +#define LA_WATCH_I (1 << 2) +#define LA_WATCH_IRW (LA_WATCH_W | LA_WATCH_R | LA_WATCH_I) #endif /* _UAPI_ASM_PTRACE_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 0667ffb2007e..ffd701d5dffc 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -32,5 +32,6 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o +obj-$(CONFIG_HARDWARE_WATCHPOINTS) += watch.o CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 023b43b55238..cae5dca8ca8c 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -19,6 +19,7 @@ #include #include #include +#include /* Hardware capabilities */ unsigned int elf_hwcap __read_mostly; @@ -186,6 +187,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) default: pr_warn("Warning: unknown TLB type\n"); } + + loongarch_probe_watch_registers(c); } #define MAX_NAME_LEN 32 diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index 9dc348fcb987..e12d7febdc97 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -34,7 +34,9 @@ #include #include #include +#include #include +#include static void init_fp_ctx(struct task_struct *target) { @@ -512,10 +514,105 @@ static inline int write_user(struct task_struct *target, unsigned long addr, return 0; } +static int ptrace_get_watch_regs(struct task_struct *child, + struct pt_watch_regs __user *addr) +{ + enum pt_watch_style style; + int i; + unsigned int cnt; + + if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0) + return -EIO; + if (!access_ok(addr, sizeof(struct pt_watch_regs))) + return -EIO; + +#ifdef CONFIG_32BIT + style = pt_watch_style_la32; +#define WATCH_STYLE la32 +#else + style = pt_watch_style_la64; +#define WATCH_STYLE la64 +#endif + + loongarch_update_watch_registers(child); + + /* Reserve the first instruction watchpoint if TIF_SINGLESTEP is set. */ + if (unlikely(test_thread_flag(TIF_SINGLESTEP))) + child->thread.watch.irwmask[boot_cpu_data.watch_dreg_count] = 0; + + __get_user(cnt, &addr->max_valid); + cnt = min(boot_cpu_data.watch_reg_use_cnt, cnt); + __put_user(cnt, &addr->num_valid); + __put_user(style, &addr->style); + for (i = 0; i < cnt; i++) { + __put_user(child->thread.watch.addr[i], &addr->WATCH_STYLE[i].addr); + __put_user(child->thread.watch.mask[i], &addr->WATCH_STYLE[i].mask); + __put_user(child->thread.watch.irw[i], &addr->WATCH_STYLE[i].irw); + __put_user(child->thread.watch.irwstat[i], &addr->WATCH_STYLE[i].irwstat); + __put_user(child->thread.watch.irwmask[i], &addr->WATCH_STYLE[i].irwmask); + } + + return 0; +} + +static int ptrace_set_watch_regs(struct task_struct *child, + struct pt_watch_regs __user *addr) +{ + int i; + unsigned int cnt; + int watch_active = 0; + unsigned long addrt[NUM_WATCH_REGS]; + unsigned long maskt[NUM_WATCH_REGS]; + unsigned char irwt[NUM_WATCH_REGS]; + + if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0) + return -EIO; + if (!access_ok(addr, sizeof(struct pt_watch_regs))) + return -EIO; + + __get_user(cnt, &addr->max_valid); + cnt = min(boot_cpu_data.watch_reg_use_cnt, cnt); + /* Check the values. */ + for (i = 0; i < cnt; i++) { + __get_user(addrt[i], &addr->WATCH_STYLE[i].addr); +#ifdef CONFIG_32BIT + if (addrt[i] & __UA_LIMIT) + return -EINVAL; +#else + if (test_tsk_thread_flag(child, TIF_32BIT_ADDR)) { + if (addrt[i] & 0xffffffff80000000UL) + return -EINVAL; + } else { + if (addrt[i] & __UA_LIMIT) + return -EINVAL; + } +#endif + __get_user(maskt[i], &addr->WATCH_STYLE[i].mask); + __get_user(irwt[i], &addr->WATCH_STYLE[i].irw); + } + /* Install them. */ + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + if (irwt[i] & LA_WATCH_IRW) + watch_active = 1; + child->thread.watch.addr[i] = addrt[i]; + child->thread.watch.mask[i] = maskt[i]; + child->thread.watch.irw[i] = irwt[i]; + } + + if (watch_active) + set_tsk_thread_flag(child, TIF_LOAD_WATCH); + else + clear_tsk_thread_flag(child, TIF_LOAD_WATCH); + + return 0; +} + + long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; + void __user *addrp = (void __user *) addr; unsigned long __user *datap = (void __user *) data; switch (request) { @@ -527,6 +624,14 @@ long arch_ptrace(struct task_struct *child, long request, ret = write_user(child, addr, data); break; + case PTRACE_GET_WATCH_REGS: + ret = ptrace_get_watch_regs(child, addrp); + break; + + case PTRACE_SET_WATCH_REGS: + ret = ptrace_set_watch_regs(child, addrp); + break; + default: ret = ptrace_request(child, request, addr, data); break; @@ -534,3 +639,22 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } + +void user_enable_single_step(struct task_struct *task) +{ + int i = boot_cpu_data.watch_dreg_count; + struct thread_info *ti = task_thread_info(task); + + task->thread.watch.addr[i] = task_pt_regs(task)->csr_era; + task->thread.watch.mask[i] = -1UL; + task->thread.watch.irw[i] = LA_WATCH_I; + task->thread.single_step = task_pt_regs(task)->csr_era; + set_ti_thread_flag(ti, TIF_SINGLESTEP); +} +EXPORT_SYMBOL(user_enable_single_step); + +void user_disable_single_step(struct task_struct *task) +{ + clear_tsk_thread_flag(task, TIF_SINGLESTEP); +} +EXPORT_SYMBOL(user_disable_single_step); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 8413ccc37f6f..9139ba70f03a 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -493,7 +494,40 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) asmlinkage void noinstr do_watch(struct pt_regs *regs) { - pr_warn("Hardware watch point handler not implemented!\n"); + int i; + siginfo_t info; + irqentry_state_t state = irqentry_enter(regs); + struct loongarch_watch_reg_state *watches = ¤t->thread.watch; + + if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) { + loongarch_read_watch_registers(regs); + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + if ((watch_csrrd(LOONGARCH_CSR_MWPS) & (0x1 << i))) { + info.si_addr = (void __user *)watches->addr[i]; + watch_csrwr(0x1 << i, LOONGARCH_CSR_MWPS); + } + } + force_sig_fault(SIGTRAP, TRAP_HWBKPT, info.si_addr); + } else if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) { + int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1); + unsigned long pc = regs->csr_era; + + if (llbit) { + csr_write32(0x10000, LOONGARCH_CSR_FWPS); + csr_write32(0x4, LOONGARCH_CSR_LLBCTL); + } else if (pc == current->thread.single_step) { + csr_write32(0x10000, LOONGARCH_CSR_FWPS); + } else { + loongarch_read_watch_registers(regs); + force_sig(SIGTRAP); + } + } else { + if (notify_die(DIE_TRAP, "Break", regs, 0, + current->thread.trap_nr, SIGTRAP) != NOTIFY_STOP) + loongarch_clear_watch_registers(); + } + + irqentry_exit(regs, state); } asmlinkage void noinstr do_ri(struct pt_regs *regs) diff --git a/arch/loongarch/kernel/watch.c b/arch/loongarch/kernel/watch.c new file mode 100644 index 000000000000..659582239709 --- /dev/null +++ b/arch/loongarch/kernel/watch.c @@ -0,0 +1,564 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Chong Qiao + * Huacai Chen + * + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include + +#include +#include +#include + +unsigned long watch_csrrd(unsigned int reg) +{ + switch (reg) { + case LOONGARCH_CSR_IB0ADDR: + return csr_read64(LOONGARCH_CSR_IB0ADDR); + case LOONGARCH_CSR_IB1ADDR: + return csr_read64(LOONGARCH_CSR_IB1ADDR); + case LOONGARCH_CSR_IB2ADDR: + return csr_read64(LOONGARCH_CSR_IB2ADDR); + case LOONGARCH_CSR_IB3ADDR: + return csr_read64(LOONGARCH_CSR_IB3ADDR); + case LOONGARCH_CSR_IB4ADDR: + return csr_read64(LOONGARCH_CSR_IB4ADDR); + case LOONGARCH_CSR_IB5ADDR: + return csr_read64(LOONGARCH_CSR_IB5ADDR); + case LOONGARCH_CSR_IB6ADDR: + return csr_read64(LOONGARCH_CSR_IB6ADDR); + case LOONGARCH_CSR_IB7ADDR: + return csr_read64(LOONGARCH_CSR_IB7ADDR); + + case LOONGARCH_CSR_IB0MASK: + return csr_read64(LOONGARCH_CSR_IB0MASK); + case LOONGARCH_CSR_IB1MASK: + return csr_read64(LOONGARCH_CSR_IB1MASK); + case LOONGARCH_CSR_IB2MASK: + return csr_read64(LOONGARCH_CSR_IB2MASK); + case LOONGARCH_CSR_IB3MASK: + return csr_read64(LOONGARCH_CSR_IB3MASK); + case LOONGARCH_CSR_IB4MASK: + return csr_read64(LOONGARCH_CSR_IB4MASK); + case LOONGARCH_CSR_IB5MASK: + return csr_read64(LOONGARCH_CSR_IB5MASK); + case LOONGARCH_CSR_IB6MASK: + return csr_read64(LOONGARCH_CSR_IB6MASK); + case LOONGARCH_CSR_IB7MASK: + return csr_read64(LOONGARCH_CSR_IB7MASK); + + case LOONGARCH_CSR_IB0ASID: + return csr_read64(LOONGARCH_CSR_IB0ASID); + case LOONGARCH_CSR_IB1ASID: + return csr_read64(LOONGARCH_CSR_IB1ASID); + case LOONGARCH_CSR_IB2ASID: + return csr_read64(LOONGARCH_CSR_IB2ASID); + case LOONGARCH_CSR_IB3ASID: + return csr_read64(LOONGARCH_CSR_IB3ASID); + case LOONGARCH_CSR_IB4ASID: + return csr_read64(LOONGARCH_CSR_IB4ASID); + case LOONGARCH_CSR_IB5ASID: + return csr_read64(LOONGARCH_CSR_IB5ASID); + case LOONGARCH_CSR_IB6ASID: + return csr_read64(LOONGARCH_CSR_IB6ASID); + case LOONGARCH_CSR_IB7ASID: + return csr_read64(LOONGARCH_CSR_IB7ASID); + + case LOONGARCH_CSR_IB0CTL: + return csr_read64(LOONGARCH_CSR_IB0CTL); + case LOONGARCH_CSR_IB1CTL: + return csr_read64(LOONGARCH_CSR_IB1CTL); + case LOONGARCH_CSR_IB2CTL: + return csr_read64(LOONGARCH_CSR_IB2CTL); + case LOONGARCH_CSR_IB3CTL: + return csr_read64(LOONGARCH_CSR_IB3CTL); + case LOONGARCH_CSR_IB4CTL: + return csr_read64(LOONGARCH_CSR_IB4CTL); + case LOONGARCH_CSR_IB5CTL: + return csr_read64(LOONGARCH_CSR_IB5CTL); + case LOONGARCH_CSR_IB6CTL: + return csr_read64(LOONGARCH_CSR_IB6CTL); + case LOONGARCH_CSR_IB7CTL: + return csr_read64(LOONGARCH_CSR_IB7CTL); + + case LOONGARCH_CSR_DB0ADDR: + return csr_read64(LOONGARCH_CSR_DB0ADDR); + case LOONGARCH_CSR_DB1ADDR: + return csr_read64(LOONGARCH_CSR_DB1ADDR); + case LOONGARCH_CSR_DB2ADDR: + return csr_read64(LOONGARCH_CSR_DB2ADDR); + case LOONGARCH_CSR_DB3ADDR: + return csr_read64(LOONGARCH_CSR_DB3ADDR); + case LOONGARCH_CSR_DB4ADDR: + return csr_read64(LOONGARCH_CSR_DB4ADDR); + case LOONGARCH_CSR_DB5ADDR: + return csr_read64(LOONGARCH_CSR_DB5ADDR); + case LOONGARCH_CSR_DB6ADDR: + return csr_read64(LOONGARCH_CSR_DB6ADDR); + case LOONGARCH_CSR_DB7ADDR: + return csr_read64(LOONGARCH_CSR_DB7ADDR); + + case LOONGARCH_CSR_DB0MASK: + return csr_read64(LOONGARCH_CSR_DB0MASK); + case LOONGARCH_CSR_DB1MASK: + return csr_read64(LOONGARCH_CSR_DB1MASK); + case LOONGARCH_CSR_DB2MASK: + return csr_read64(LOONGARCH_CSR_DB2MASK); + case LOONGARCH_CSR_DB3MASK: + return csr_read64(LOONGARCH_CSR_DB3MASK); + case LOONGARCH_CSR_DB4MASK: + return csr_read64(LOONGARCH_CSR_DB4MASK); + case LOONGARCH_CSR_DB5MASK: + return csr_read64(LOONGARCH_CSR_DB5MASK); + case LOONGARCH_CSR_DB6MASK: + return csr_read64(LOONGARCH_CSR_DB6MASK); + case LOONGARCH_CSR_DB7MASK: + return csr_read64(LOONGARCH_CSR_DB7MASK); + + case LOONGARCH_CSR_DB0ASID: + return csr_read64(LOONGARCH_CSR_DB0ASID); + case LOONGARCH_CSR_DB1ASID: + return csr_read64(LOONGARCH_CSR_DB1ASID); + case LOONGARCH_CSR_DB2ASID: + return csr_read64(LOONGARCH_CSR_DB2ASID); + case LOONGARCH_CSR_DB3ASID: + return csr_read64(LOONGARCH_CSR_DB3ASID); + case LOONGARCH_CSR_DB4ASID: + return csr_read64(LOONGARCH_CSR_DB4ASID); + case LOONGARCH_CSR_DB5ASID: + return csr_read64(LOONGARCH_CSR_DB5ASID); + case LOONGARCH_CSR_DB6ASID: + return csr_read64(LOONGARCH_CSR_DB6ASID); + case LOONGARCH_CSR_DB7ASID: + return csr_read64(LOONGARCH_CSR_DB7ASID); + + case LOONGARCH_CSR_DB0CTL: + return csr_read64(LOONGARCH_CSR_DB0CTL); + case LOONGARCH_CSR_DB1CTL: + return csr_read64(LOONGARCH_CSR_DB1CTL); + case LOONGARCH_CSR_DB2CTL: + return csr_read64(LOONGARCH_CSR_DB2CTL); + case LOONGARCH_CSR_DB3CTL: + return csr_read64(LOONGARCH_CSR_DB3CTL); + case LOONGARCH_CSR_DB4CTL: + return csr_read64(LOONGARCH_CSR_DB4CTL); + case LOONGARCH_CSR_DB5CTL: + return csr_read64(LOONGARCH_CSR_DB5CTL); + case LOONGARCH_CSR_DB6CTL: + return csr_read64(LOONGARCH_CSR_DB6CTL); + case LOONGARCH_CSR_DB7CTL: + return csr_read64(LOONGARCH_CSR_DB7CTL); + + case LOONGARCH_CSR_FWPS: + return csr_read64(LOONGARCH_CSR_FWPS); + case LOONGARCH_CSR_MWPS: + return csr_read64(LOONGARCH_CSR_MWPS); + case LOONGARCH_CSR_FWPC: + return csr_read64(LOONGARCH_CSR_FWPC); + case LOONGARCH_CSR_MWPC: + return csr_read64(LOONGARCH_CSR_MWPC); + default: + printk("read watch register number error %d\n", reg); + } + return 0; +} +EXPORT_SYMBOL(watch_csrrd); + +void watch_csrwr(unsigned long val, unsigned int reg) +{ + switch (reg) { + case LOONGARCH_CSR_IB0ADDR: + csr_write64(val, LOONGARCH_CSR_IB0ADDR); + break; + case LOONGARCH_CSR_IB1ADDR: + csr_write64(val, LOONGARCH_CSR_IB1ADDR); + break; + case LOONGARCH_CSR_IB2ADDR: + csr_write64(val, LOONGARCH_CSR_IB2ADDR); + break; + case LOONGARCH_CSR_IB3ADDR: + csr_write64(val, LOONGARCH_CSR_IB3ADDR); + break; + case LOONGARCH_CSR_IB4ADDR: + csr_write64(val, LOONGARCH_CSR_IB4ADDR); + break; + case LOONGARCH_CSR_IB5ADDR: + csr_write64(val, LOONGARCH_CSR_IB5ADDR); + break; + case LOONGARCH_CSR_IB6ADDR: + csr_write64(val, LOONGARCH_CSR_IB6ADDR); + break; + case LOONGARCH_CSR_IB7ADDR: + csr_write64(val, LOONGARCH_CSR_IB7ADDR); + break; + + case LOONGARCH_CSR_IB0MASK: + csr_write64(val, LOONGARCH_CSR_IB0MASK); + break; + case LOONGARCH_CSR_IB1MASK: + csr_write64(val, LOONGARCH_CSR_IB1MASK); + break; + case LOONGARCH_CSR_IB2MASK: + csr_write64(val, LOONGARCH_CSR_IB2MASK); + break; + case LOONGARCH_CSR_IB3MASK: + csr_write64(val, LOONGARCH_CSR_IB3MASK); + break; + case LOONGARCH_CSR_IB4MASK: + csr_write64(val, LOONGARCH_CSR_IB4MASK); + break; + case LOONGARCH_CSR_IB5MASK: + csr_write64(val, LOONGARCH_CSR_IB5MASK); + break; + case LOONGARCH_CSR_IB6MASK: + csr_write64(val, LOONGARCH_CSR_IB6MASK); + break; + case LOONGARCH_CSR_IB7MASK: + csr_write64(val, LOONGARCH_CSR_IB7MASK); + break; + + case LOONGARCH_CSR_IB0ASID: + csr_write64(val, LOONGARCH_CSR_IB0ASID); + break; + case LOONGARCH_CSR_IB1ASID: + csr_write64(val, LOONGARCH_CSR_IB1ASID); + break; + case LOONGARCH_CSR_IB2ASID: + csr_write64(val, LOONGARCH_CSR_IB2ASID); + break; + case LOONGARCH_CSR_IB3ASID: + csr_write64(val, LOONGARCH_CSR_IB3ASID); + break; + case LOONGARCH_CSR_IB4ASID: + csr_write64(val, LOONGARCH_CSR_IB4ASID); + break; + case LOONGARCH_CSR_IB5ASID: + csr_write64(val, LOONGARCH_CSR_IB5ASID); + break; + case LOONGARCH_CSR_IB6ASID: + csr_write64(val, LOONGARCH_CSR_IB6ASID); + break; + case LOONGARCH_CSR_IB7ASID: + csr_write64(val, LOONGARCH_CSR_IB7ASID); + break; + + case LOONGARCH_CSR_IB0CTL: + csr_write64(val, LOONGARCH_CSR_IB0CTL); + break; + case LOONGARCH_CSR_IB1CTL: + csr_write64(val, LOONGARCH_CSR_IB1CTL); + break; + case LOONGARCH_CSR_IB2CTL: + csr_write64(val, LOONGARCH_CSR_IB2CTL); + break; + case LOONGARCH_CSR_IB3CTL: + csr_write64(val, LOONGARCH_CSR_IB3CTL); + break; + case LOONGARCH_CSR_IB4CTL: + csr_write64(val, LOONGARCH_CSR_IB4CTL); + break; + case LOONGARCH_CSR_IB5CTL: + csr_write64(val, LOONGARCH_CSR_IB5CTL); + break; + case LOONGARCH_CSR_IB6CTL: + csr_write64(val, LOONGARCH_CSR_IB6CTL); + break; + case LOONGARCH_CSR_IB7CTL: + csr_write64(val, LOONGARCH_CSR_IB7CTL); + break; + + case LOONGARCH_CSR_DB0ADDR: + csr_write64(val, LOONGARCH_CSR_DB0ADDR); + break; + case LOONGARCH_CSR_DB1ADDR: + csr_write64(val, LOONGARCH_CSR_DB1ADDR); + break; + case LOONGARCH_CSR_DB2ADDR: + csr_write64(val, LOONGARCH_CSR_DB2ADDR); + break; + case LOONGARCH_CSR_DB3ADDR: + csr_write64(val, LOONGARCH_CSR_DB3ADDR); + break; + case LOONGARCH_CSR_DB4ADDR: + csr_write64(val, LOONGARCH_CSR_DB4ADDR); + break; + case LOONGARCH_CSR_DB5ADDR: + csr_write64(val, LOONGARCH_CSR_DB5ADDR); + break; + case LOONGARCH_CSR_DB6ADDR: + csr_write64(val, LOONGARCH_CSR_DB6ADDR); + break; + case LOONGARCH_CSR_DB7ADDR: + csr_write64(val, LOONGARCH_CSR_DB7ADDR); + break; + + case LOONGARCH_CSR_DB0MASK: + csr_write64(val, LOONGARCH_CSR_DB0MASK); + break; + case LOONGARCH_CSR_DB1MASK: + csr_write64(val, LOONGARCH_CSR_DB1MASK); + break; + case LOONGARCH_CSR_DB2MASK: + csr_write64(val, LOONGARCH_CSR_DB2MASK); + break; + case LOONGARCH_CSR_DB3MASK: + csr_write64(val, LOONGARCH_CSR_DB3MASK); + break; + case LOONGARCH_CSR_DB4MASK: + csr_write64(val, LOONGARCH_CSR_DB4MASK); + break; + case LOONGARCH_CSR_DB5MASK: + csr_write64(val, LOONGARCH_CSR_DB5MASK); + break; + case LOONGARCH_CSR_DB6MASK: + csr_write64(val, LOONGARCH_CSR_DB6MASK); + break; + case LOONGARCH_CSR_DB7MASK: + csr_write64(val, LOONGARCH_CSR_DB7MASK); + break; + + case LOONGARCH_CSR_DB0ASID: + csr_write64(val, LOONGARCH_CSR_DB0ASID); + break; + case LOONGARCH_CSR_DB1ASID: + csr_write64(val, LOONGARCH_CSR_DB1ASID); + break; + case LOONGARCH_CSR_DB2ASID: + csr_write64(val, LOONGARCH_CSR_DB2ASID); + break; + case LOONGARCH_CSR_DB3ASID: + csr_write64(val, LOONGARCH_CSR_DB3ASID); + break; + case LOONGARCH_CSR_DB4ASID: + csr_write64(val, LOONGARCH_CSR_DB4ASID); + break; + case LOONGARCH_CSR_DB5ASID: + csr_write64(val, LOONGARCH_CSR_DB5ASID); + break; + case LOONGARCH_CSR_DB6ASID: + csr_write64(val, LOONGARCH_CSR_DB6ASID); + break; + case LOONGARCH_CSR_DB7ASID: + csr_write64(val, LOONGARCH_CSR_DB7ASID); + break; + + case LOONGARCH_CSR_DB0CTL: + csr_write64(val, LOONGARCH_CSR_DB0CTL); + break; + case LOONGARCH_CSR_DB1CTL: + csr_write64(val, LOONGARCH_CSR_DB1CTL); + break; + case LOONGARCH_CSR_DB2CTL: + csr_write64(val, LOONGARCH_CSR_DB2CTL); + break; + case LOONGARCH_CSR_DB3CTL: + csr_write64(val, LOONGARCH_CSR_DB3CTL); + break; + case LOONGARCH_CSR_DB4CTL: + csr_write64(val, LOONGARCH_CSR_DB4CTL); + break; + case LOONGARCH_CSR_DB5CTL: + csr_write64(val, LOONGARCH_CSR_DB5CTL); + break; + case LOONGARCH_CSR_DB6CTL: + csr_write64(val, LOONGARCH_CSR_DB6CTL); + break; + case LOONGARCH_CSR_DB7CTL: + csr_write64(val, LOONGARCH_CSR_DB7CTL); + break; + + case LOONGARCH_CSR_FWPS: + csr_write64(val, LOONGARCH_CSR_FWPS); + break; + case LOONGARCH_CSR_MWPS: + csr_write64(val, LOONGARCH_CSR_MWPS); + break; + default: + printk("write watch register number error %d\n", reg); + } +} +EXPORT_SYMBOL(watch_csrwr); + +void loongarch_probe_watch_registers(struct cpuinfo_loongarch *c) +{ + unsigned int ibcn, dbcn, total; + + ibcn = watch_csrrd(LOONGARCH_CSR_FWPC) & 0x3f; + dbcn = watch_csrrd(LOONGARCH_CSR_MWPC) & 0x3f; + c->watch_ireg_count = ibcn; + c->watch_dreg_count = dbcn; + total = ibcn + dbcn; + c->watch_reg_use_cnt = (total < NUM_WATCH_REGS) ? total : NUM_WATCH_REGS; +} + +/* + * Clear all watch registers. + */ +void loongarch_clear_watch_registers(void) +{ + unsigned int i, j, ibst, dbst, dbcn; + + ibst = watch_csrrd(LOONGARCH_CSR_FWPS); + dbst = watch_csrrd(LOONGARCH_CSR_MWPS); + dbcn = boot_cpu_data.watch_dreg_count; + + /* + * bit 16: skip next event + * bit 0-15: breakpoint triggered, W1C + */ + + if (ibst & 0x1ffff) + watch_csrwr(ibst&~0x10000, LOONGARCH_CSR_FWPS); + if (dbst & 0x1ffff) + watch_csrwr(dbst&~0x10000, LOONGARCH_CSR_MWPS); + + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + j = i - dbcn; + + if (i < dbcn) { + watch_csrwr(0, LOONGARCH_CSR_DB0ADDR + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0MASK + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0CTL + 8*i); + } else { + watch_csrwr(0, LOONGARCH_CSR_IB0ADDR + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0CTL + 8*j); + } + } +} +EXPORT_SYMBOL(loongarch_clear_watch_registers); + +/* + * Install the watch registers for the current thread. + */ +void loongarch_install_watch_registers(struct task_struct *t) +{ + int i, j, dbcn; + unsigned int cntl = 0x1e; + struct loongarch_watch_reg_state *watches = &t->thread.watch; + + dbcn = boot_cpu_data.watch_dreg_count; + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + j = i - dbcn; + + if (watches->irw[i] & (LA_WATCH_R | LA_WATCH_W)) { + if ((watches->irw[i] & LA_WATCH_R)) + cntl |= 1<<8; + if ((watches->irw[i] & LA_WATCH_W)) + cntl |= 1<<9; + + watch_csrwr(watches->addr[i], LOONGARCH_CSR_DB0ADDR + 8*i); + watch_csrwr(watches->mask[i], LOONGARCH_CSR_DB0MASK + 8*i); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8*i); + watch_csrwr(cntl, LOONGARCH_CSR_DB0CTL + 8*i); + + } else if (watches->irw[i] & LA_WATCH_I) { + if ((task_pt_regs(t)->csr_era & ~watches->mask[i]) + == (watches->addr[i] & ~watches->mask[i])) + watch_csrwr(0x10000, LOONGARCH_CSR_FWPS); + + watch_csrwr(watches->addr[i], LOONGARCH_CSR_IB0ADDR + 8*j); + watch_csrwr(watches->mask[i], LOONGARCH_CSR_IB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8*j); + watch_csrwr(cntl, LOONGARCH_CSR_IB0CTL + 8*j); + } else if (watches->irwmask[i]) { + if (i < dbcn) + watch_csrwr(0x0, LOONGARCH_CSR_DB0CTL + 8*i); + else + watch_csrwr(0x0, LOONGARCH_CSR_IB0CTL + 8*j); + } + } +} +EXPORT_SYMBOL(loongarch_install_watch_registers); + +#define PRMD_WE (1UL<<3) /* PRMD watch enable, restore to CRMD when eret */ + +void loongarch_clear_prev_watch_registers(struct task_struct *prev) +{ + struct pt_regs *regs = task_pt_regs(prev); + + loongarch_clear_watch_registers(); + prev->thread.csr_prmd &= ~PRMD_WE; + regs->csr_prmd &= ~PRMD_WE; +} + +void loongarch_install_next_watch_registers(struct task_struct *next) +{ + struct pt_regs *regs = task_pt_regs(next); + + loongarch_install_watch_registers(next); + next->thread.csr_prmd |= PRMD_WE; + regs->csr_prmd |= PRMD_WE; +} + +/* + * Read back the watchhi registers so the user space debugger has + * access to the I, R, and W bits. + */ +void loongarch_read_watch_registers(struct pt_regs *regs) +{ + unsigned int i, j, ibst, dbst, dbc, dbcn; + struct loongarch_watch_reg_state *watches = ¤t->thread.watch; + + ibst = watch_csrrd(LOONGARCH_CSR_FWPS); + dbst = watch_csrrd(LOONGARCH_CSR_MWPS); + dbcn = boot_cpu_data.watch_dreg_count; + + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + j = i - dbcn; + watches->irwstat[i] = 0; + + if (i < dbcn) { + watches->addr[i] = watch_csrrd(LOONGARCH_CSR_DB0ADDR + 8*i); + watches->mask[i] = watch_csrrd(LOONGARCH_CSR_DB0MASK + 8*i); + if (dbst & (1u << i)) { + dbc = watch_csrrd(LOONGARCH_CSR_DB0CTL + i*8); + + if (dbc & (1<<8)) + watches->irwstat[i] |= LA_WATCH_R; + if (dbc & (1<<9)) + watches->irwstat[i] |= LA_WATCH_W; + } + } else { + watches->addr[i] = watch_csrrd(LOONGARCH_CSR_IB0ADDR + 8*j); + watches->mask[i] = watch_csrrd(LOONGARCH_CSR_IB0MASK + 8*j); + if (ibst & (1u << j)) + watches->irwstat[i] |= LA_WATCH_I; + } + } + + if (ibst & 0xffff) + watch_csrwr(ibst, LOONGARCH_CSR_FWPS); +} + +#define CTRL_WE 1 /* Breakpoint can only be written by hw debugger */ + +void loongarch_update_watch_registers(struct task_struct *t) +{ + int i, j, ctl, dbcn; + struct loongarch_watch_reg_state *watches = &t->thread.watch; + + dbcn = boot_cpu_data.watch_dreg_count; + for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { + j = i - dbcn; + + if (i < dbcn) { + ctl = watch_csrrd(LOONGARCH_CSR_DB0CTL + 8*i); + if (ctl & CTRL_WE) + watches->irwmask[i] = 0; + else + watches->irwmask[i] = LA_WATCH_R | LA_WATCH_W; + } else { + ctl = watch_csrrd(LOONGARCH_CSR_IB0CTL + 8*j); + if (ctl & CTRL_WE) + watches->irwmask[i] = 0; + else + watches->irwmask[i] = LA_WATCH_I; + } + } +} -- Gitee From a703a95d34b2042fca4879d654070e1a26fbd551 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 31 Oct 2018 14:13:26 +0800 Subject: [PATCH 102/241] LoongArch: Add efistub support Add efistub basic support for LoongArch. When CONFIG_EFI_STUB is enabled, A file named vmlinuz.efi is created which can be executed by UEFI directly with no need of traditional OS Loader. The EFI boot stub supports multiple initrds, but they must exist on the same partition as the vmlinuz.efi. Command-line arguments for the kernel can be appended after the vmlinuz.efi name when run from the EFI shell, e.g. Shell> vmlinuz.efi console=ttyS0 root=/dev/sdb initrd=initrd.img Change-Id: I17ff3853f52fcbc352ded6856428e4c6eb377c46 Signed-off-by: Yun Liu Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- .gitignore | 1 + arch/loongarch/Kconfig | 9 ++ arch/loongarch/Makefile | 4 + arch/loongarch/boot/compressed/Makefile | 25 +++- arch/loongarch/boot/compressed/efi-header.S | 132 ++++++++++++++++++ arch/loongarch/boot/compressed/ld.script | 22 ++- arch/loongarch/boot/compressed/string.c | 68 +++++++++ arch/loongarch/include/asm/efi.h | 1 + arch/loongarch/kernel/efi.c | 23 +++ arch/loongarch/loongson64/env.c | 12 +- arch/loongarch/loongson64/init.c | 10 +- drivers/firmware/efi/Kconfig | 2 +- drivers/firmware/efi/libstub/Makefile | 23 ++- drivers/firmware/efi/libstub/efi-stub.c | 14 +- drivers/firmware/efi/libstub/loongarch-stub.c | 122 ++++++++++++++++ include/linux/pe.h | 1 + 16 files changed, 439 insertions(+), 30 deletions(-) create mode 100644 arch/loongarch/boot/compressed/efi-header.S create mode 100644 drivers/firmware/efi/libstub/loongarch-stub.c diff --git a/.gitignore b/.gitignore index 372e57abbf41..369f698dd804 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,7 @@ modules.order /vmlinux.symvers /vmlinux-gdb.py /vmlinuz +/vmlinuz.efi /System.map /Module.markers /modules.builtin.modinfo diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6be6512cf4f2..bd9bc18ec149 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -557,6 +557,15 @@ config EFI This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). +config EFI_STUB + bool "EFI stub support" + default y + depends on EFI + select EFI_GENERIC_STUB + help + This kernel feature allows a vmlinuz.efi to be loaded directly + by EFI firmware without the use of a bootloader. + config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 15b1c394a00c..090a155f7c5b 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -36,6 +36,9 @@ endif all-y := vmlinux all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlinuz +all-$(CONFIG_EFI_STUB) += vmlinuz.efi + +vmlinuz.efi: vmlinuz cflags-y += -G0 -pipe -msoft-float LDFLAGS_vmlinux += -G0 -static -n -nostdlib @@ -136,6 +139,7 @@ boot-y := vmlinux.bin # compressed boot image targets (arch/loongarch/boot/compressed/) bootz-y := vmlinuz bootz-y += vmlinuz.bin +bootz-$(CONFIG_EFI_STUB)+= vmlinuz.efi all: $(all-y) diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index 76c5b19cb0e0..7e78077b6723 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -13,8 +13,12 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS)) KBUILD_CFLAGS := $(filter-out -fstack-protector, $(KBUILD_CFLAGS)) +KBUILD_CFLAGS += -fpie -include $(srctree)/include/linux/hidden.h + KBUILD_CFLAGS := $(KBUILD_CFLAGS) -D__KERNEL__ \ - -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull" + -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ + -D"VMLINUX_LOAD_ADDRESS_ULL=$(VMLINUX_LOAD_ADDRESS)ull" \ + -D"KERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)ull" \ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ @@ -82,5 +86,24 @@ vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr $(call cmd,zld) $(call cmd,strip) +efiobjs-y := $(obj)/efi-header.o $(obj)/decompress.o $(obj)/piggy.o \ + $(obj)/string.o $(objtree)/drivers/firmware/efi/libstub/lib.a + +efiobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/dbg.o +efiobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/uart-16550.o + +targets += $(notdir $(efiobjs-y)) + +quiet_cmd_efild = LD $@ + cmd_efild = $(LD) $(KBUILD_LDFLAGS) -T $< $(efiobjs-y) -o $@ + +quiet_cmd_eficopy = OBJCOPY $@ +cmd_eficopy = $(OBJCOPY) $(OBJCOPYFLAGS) -O binary -R .comment -S $@ $@ + +vmlinuz.efi: $(src)/ld.script $(efiobjs-y) + $(call cmd,efild) + $(call cmd,eficopy) + clean-files += $(objtree)/vmlinuz clean-files += $(objtree)/vmlinuz.bin +clean-files += $(objtree)/vmlinuz.efi diff --git a/arch/loongarch/boot/compressed/efi-header.S b/arch/loongarch/boot/compressed/efi-header.S new file mode 100644 index 000000000000..f2e536cdbf72 --- /dev/null +++ b/arch/loongarch/boot/compressed/efi-header.S @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2013 - 2017 Linaro, Ltd. + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Copyright (C) 2020, 2021 Loongson, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: + .short IMAGE_FILE_MACHINE_LOONGARCH /* Machine */ + .short section_count /* NumberOfSections */ + .long 0 /* TimeDateStamp */ + .long 0 /* PointerToSymbolTable */ + .long 0 /* NumberOfSymbols */ + .short section_table - optional_header /* SizeOfOptionalHeader */ + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED /* Characteristics */ + +optional_header: + .short PE_OPT_MAGIC_PE32PLUS /* PE32+ format */ + .byte 0x02 /* MajorLinkerVersion */ + .byte 0x14 /* MinorLinkerVersion */ + .long _data - efi_header_end /* SizeOfCode */ + .long _end - _data /* SizeOfInitializedData */ + .long 0 /* SizeOfUninitializedData */ + .long efi_pe_entry - _text /* AddressOfEntryPoint */ + .long efi_header_end - _text /* BaseOfCode */ + +extra_header_fields: + .quad 0 /* ImageBase */ + .long PECOFF_SEGMENT_ALIGN /* SectionAlignment */ + .long PECOFF_FILE_ALIGN /* FileAlignment */ + .short 0 /* MajorOperatingSystemVersion */ + .short 0 /* MinorOperatingSystemVersion */ + .short LINUX_EFISTUB_MAJOR_VERSION /* MajorImageVersion */ + .short LINUX_EFISTUB_MINOR_VERSION /* MinorImageVersion */ + .short 0 /* MajorSubsystemVersion */ + .short 0 /* MinorSubsystemVersion */ + .long 0 /* Win32VersionValue */ + + .long _end - _text /* SizeOfImage */ + + /* Everything before the kernel image is considered part of the header */ + .long efi_header_end - _head /* SizeOfHeaders */ + .long 0 /* CheckSum */ + .short IMAGE_SUBSYSTEM_EFI_APPLICATION /* Subsystem */ + .short 0 /* DllCharacteristics */ + .quad 0 /* SizeOfStackReserve */ + .quad 0 /* SizeOfStackCommit */ + .quad 0 /* SizeOfHeapReserve */ + .quad 0 /* SizeOfHeapCommit */ + .long 0 /* LoaderFlags */ + .long (section_table - .) / 8 /* NumberOfRvaAndSizes */ + + .quad 0 /* ExportTable */ + .quad 0 /* ImportTable */ + .quad 0 /* ResourceTable */ + .quad 0 /* ExceptionTable */ + .quad 0 /* CertificationTable */ + .quad 0 /* BaseRelocationTable */ + + /* Section table */ +section_table: + .ascii ".text\0\0\0" + .long _data - efi_header_end /* VirtualSize */ + .long efi_header_end - _text /* VirtualAddress */ + .long _data - efi_header_end /* SizeOfRawData */ + .long efi_header_end - _text /* PointerToRawData */ + + .long 0 /* PointerToRelocations */ + .long 0 /* PointerToLineNumbers */ + .short 0 /* NumberOfRelocations */ + .short 0 /* NumberOfLineNumbers */ + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE /* Characteristics */ + + .ascii ".data\0\0\0" + .long _end - _data /* VirtualSize */ + .long _data - _text /* VirtualAddress */ + .long _edata - _data /* SizeOfRawData */ + .long _data - _text /* PointerToRawData */ + + .long 0 /* PointerToRelocations */ + .long 0 /* PointerToLineNumbers */ + .short 0 /* NumberOfRelocations */ + .short 0 /* NumberOfLineNumbers */ + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE /* Characteristics */ + + .org 0x20e + .word kernel_version - 512 - _head + + .set section_count, (. - section_table) / 40 + + .balign 0x10000 /* PECOFF_SEGMENT_ALIGN */ +efi_header_end: + .endm + + .section ".head.text","ax" +_head: + /* "MZ", MS-DOS header */ + .byte 0x4d + .byte 0x5a + .org 0x3c + /* Offset to the PE header */ + .long pe_header - _head + +pe_header: + __EFI_PE_HEADER + +start: + .globl start + +kernel_entaddr: + .quad KERNEL_ENTRY + .globl kernel_entaddr + +kernel_version: + .ascii UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION "\0" diff --git a/arch/loongarch/boot/compressed/ld.script b/arch/loongarch/boot/compressed/ld.script index 2e32d8a72fdf..671d2027c7b0 100644 --- a/arch/loongarch/boot/compressed/ld.script +++ b/arch/loongarch/boot/compressed/ld.script @@ -6,6 +6,13 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ +/* + * Max avaliable Page Size is 64K, so we set SectionAlignment + * field of EFI application to 64K. + */ +PECOFF_FILE_ALIGN = 0x200; +PECOFF_SEGMENT_ALIGN = 0x10000; + OUTPUT_ARCH(loongarch) ENTRY(start) PHDRS { @@ -14,22 +21,30 @@ PHDRS { SECTIONS { /* Text and read-only data */ - /* . = VMLINUZ_LOAD_ADDRESS; */ + _text = .; + .head.text : { + *(.head.text) + } + .text : { *(.text) + *(.init.text) *(.rodata) }: text /* End of text section */ + . = ALIGN(PECOFF_SEGMENT_ALIGN); + _data = .; /* Writable data */ .data : { *(.data) + *(.init.data) /* Put the compressed image here */ __image_begin = .; *(.image) __image_end = .; CONSTRUCTORS - . = ALIGN(16); + . = ALIGN(PECOFF_FILE_ALIGN); } _edata = .; @@ -38,8 +53,9 @@ SECTIONS /* BSS */ .bss : { *(.bss) + *(.init.bss) } - . = ALIGN(16); + . = ALIGN(PECOFF_SEGMENT_ALIGN); _end = .; /* Sections to be discarded */ diff --git a/arch/loongarch/boot/compressed/string.c b/arch/loongarch/boot/compressed/string.c index 67e2355ae50e..3efff97c5293 100644 --- a/arch/loongarch/boot/compressed/string.c +++ b/arch/loongarch/boot/compressed/string.c @@ -7,6 +7,30 @@ #include +void * __weak memchr(const void *s, int c, size_t n) +{ + const unsigned char *p = s; + while (n-- != 0) { + if ((unsigned char)c == *p++) { + return (void *)(p - 1); + } + } + return NULL; +} + +int __weak memcmp(const void *cs, const void *ct, size_t count) +{ + int res = 0; + const unsigned char *su1, *su2; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) { + res = *su1 - *su2; + if (res != 0) + break; + } + return res; +} + void __weak *memset(void *s, int c, size_t n) { int i; @@ -44,3 +68,47 @@ void __weak *memmove(void *dest, const void *src, size_t n) return dest; } + +int __weak strcmp(const char *str1, const char *str2) +{ + int delta = 0; + const unsigned char *s1 = (const unsigned char *)str1; + const unsigned char *s2 = (const unsigned char *)str2; + + while (*s1 || *s2) { + delta = *s1 - *s2; + if (delta) + return delta; + s1++; + s2++; + } + return 0; +} + +size_t __weak strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +size_t __weak strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * __weak strrchr(const char *s, int c) +{ + const char *last = NULL; + do { + if (*s == (char)c) + last = s; + } while (*s++); + return (char *)last; +} diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 8a1f6521781a..5ae447c4b21e 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -32,6 +32,7 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) }) #define EFI_ALLOC_ALIGN SZ_64K +#define EFI_RT_VIRTUAL_OFFSET CSR_DMW0_BASE struct screen_info *alloc_screen_info(void); void free_screen_info(struct screen_info *si); diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 7bcd8fc57956..2ea536621120 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -28,10 +28,12 @@ static unsigned long efi_nr_tables; static unsigned long efi_config_table; +static unsigned long boot_memmap __initdata = EFI_INVALID_TABLE_ADDR; static unsigned long screen_info_table __initdata = EFI_INVALID_TABLE_ADDR; static efi_system_table_t *efi_systab; static efi_config_table_type_t arch_tables[] __initdata = { + {LINUX_EFI_BOOT_MEMMAP_GUID, &boot_memmap, "MEMMAP" }, {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, &screen_info_table, "SINFO"}, {}, }; @@ -77,6 +79,7 @@ void __init efi_init(void) { int size; void *config_tables; + struct efi_boot_memmap *tbl; if (!efi_system_table) return; @@ -87,6 +90,8 @@ void __init efi_init(void) return; } + efi_systab_report_header(&efi_systab->hdr, efi_systab->fw_vendor); + set_bit(EFI_64BIT, &efi.flags); efi_nr_tables = efi_systab->nr_tables; efi_config_table = (unsigned long)efi_systab->tables; @@ -97,4 +102,22 @@ void __init efi_init(void) early_memunmap(config_tables, efi_nr_tables * size); init_screen_info(); + + if (boot_memmap == EFI_INVALID_TABLE_ADDR) + return; + + tbl = early_memremap_ro(boot_memmap, sizeof(*tbl)); + if (tbl) { + struct efi_memory_map_data data; + + data.phys_map = boot_memmap + sizeof(*tbl); + data.size = tbl->map_size; + data.desc_size = tbl->desc_size; + data.desc_version = tbl->desc_ver; + + if (efi_memmap_init_early(&data) < 0) + panic("Unable to map EFI memory map.\n"); + + early_memunmap(tbl, sizeof(*tbl)); + } } diff --git a/arch/loongarch/loongson64/env.c b/arch/loongarch/loongson64/env.c index d35ac838308c..9383bb209d41 100644 --- a/arch/loongarch/loongson64/env.c +++ b/arch/loongarch/loongson64/env.c @@ -31,21 +31,17 @@ static void __init register_addrs_set(u64 *registers, const u64 addr, int num) void __init init_environ(void) { int efi_boot = fw_arg0; - struct efi_memory_map_data data; - void *fdt_ptr = early_memremap_ro(fw_arg1, SZ_64K); + char *cmdline = early_memremap_ro(fw_arg1, COMMAND_LINE_SIZE); if (efi_boot) set_bit(EFI_BOOT, &efi.flags); else clear_bit(EFI_BOOT, &efi.flags); - early_init_dt_scan(fdt_ptr); - early_init_fdt_reserve_self(); - efi_system_table = efi_get_fdt_params(&data); + strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); + early_memunmap(cmdline, COMMAND_LINE_SIZE); - efi_memmap_init_early(&data); - memblock_reserve(data.phys_map & PAGE_MASK, - PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK))); + efi_system_table = fw_arg2; register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16); } diff --git a/arch/loongarch/loongson64/init.c b/arch/loongarch/loongson64/init.c index a8a1276c06cb..d5ffdc82b3c2 100644 --- a/arch/loongarch/loongson64/init.c +++ b/arch/loongarch/loongson64/init.c @@ -74,16 +74,9 @@ static void __init parse_cpu_table(const struct dmi_header *dm) static void __init parse_bios_table(const struct dmi_header *dm) { - int bios_extern; char *dmi_data = (char *)dm; - bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET); b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6; - - if (bios_extern & LOONGSON_EFI_ENABLE) - set_bit(EFI_BOOT, &efi.flags); - else - clear_bit(EFI_BOOT, &efi.flags); } static void __init find_tokens(const struct dmi_header *dm, void *dummy) @@ -97,6 +90,7 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy) break; } } + static void __init smbios_parse(void) { b_info.bios_vendor = (void *)dmi_get_system_info(DMI_BIOS_VENDOR); @@ -110,12 +104,12 @@ static void __init smbios_parse(void) void __init early_init(void) { init_environ(); + efi_init(); memblock_init(); } void __init platform_init(void) { - efi_init(); arch_reserve_vmcore(); arch_parse_crashkernel(); diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 3aded32bdfdb..29dc13fee019 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -106,7 +106,7 @@ config EFI_GENERIC_STUB config EFI_ARMSTUB_DTB_LOADER bool "Enable the DTB loader" - depends on EFI_GENERIC_STUB && !RISCV + depends on EFI_GENERIC_STUB && !RISCV && !LOONGARCH default y help Select this config option to add support for the dtb= command diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 2c67f71f2375..6c4be42170c5 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -26,8 +26,10 @@ cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ $(call cc-option,-mno-single-pic-base) cflags-$(CONFIG_RISCV) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fpic +cflags-$(CONFIG_LOONGARCH) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ + -fpie -cflags-$(CONFIG_EFI_GENERIC_STUB) += -I$(srctree)/scripts/dtc/libfdt +cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \ -include $(srctree)/include/linux/hidden.h \ @@ -64,19 +66,24 @@ lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ alignedmem.o relocate.o vsprintf.o -# include the stub's generic dependencies from lib/ when building for ARM/arm64 -efi-deps-y := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c +# include the stub's libfdt dependencies from lib/ when needed +libfdt-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c \ + fdt_empty_tree.c fdt_sw.c + +lib-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdt.o \ + $(patsubst %.c,lib-%.o,$(libfdt-deps)) $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE $(call if_changed_rule,cc_o_c) -lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o fdt.o string.o \ - $(patsubst %.c,lib-%.o,$(efi-deps-y)) +lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM64) += arm64-stub.o lib-$(CONFIG_X86) += x86-stub.o lib-$(CONFIG_RISCV) += riscv-stub.o +lib-$(CONFIG_LOONGARCH) += loongarch-stub.o + CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) # Even when -mbranch-protection=none is set, Clang will generate a @@ -104,6 +111,7 @@ STUBCOPY_FLAGS-$(CONFIG_ARM) += --rename-section .data=.data.efistub \ --rename-section .bss=.bss.efistub,load,alloc STUBCOPY_RELOC-$(CONFIG_ARM) := R_ARM_ABS + # # arm64 puts the stub in the kernel proper, which will unnecessarily retain all # code indefinitely unless it is annotated as __init/__initdata/__initconst etc. @@ -132,6 +140,11 @@ STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \ --prefix-symbols=__efistub_ STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20 +# For LoongArch, keep all the symbols in .init section and make sure that no +# absolute symbols references doesn't exist. +STUBCOPY_FLAGS-$(CONFIG_LOONGARCH) += --prefix-alloc-sections=.init +STUBCOPY_RELOC-$(CONFIG_LOONGARCH) := R_LARCH_MARK_LA + $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,stubcopy) diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 84c881e84229..fd95a6e97f2f 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -39,12 +39,18 @@ #ifdef CONFIG_ARM64 # define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64 +#elif defined(CONFIG_RISCV) || defined(CONFIG_LOONGARCH) +# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN #else # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE #endif +#ifndef EFI_RT_VIRTUAL_OFFSET +#define EFI_RT_VIRTUAL_OFFSET 0 +#endif + static u64 virtmap_base = EFI_RT_VIRTUAL_BASE; -static bool flat_va_mapping; +static bool flat_va_mapping = !!EFI_RT_VIRTUAL_OFFSET; const efi_system_table_t *efi_system_table; @@ -220,8 +226,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, * The easiest way to achieve that is to simply use a 1:1 mapping. */ prop_tbl = get_efi_config_table(EFI_PROPERTIES_TABLE_GUID); - flat_va_mapping = prop_tbl && - (prop_tbl->memory_protection_attribute & + flat_va_mapping |= prop_tbl && + (prop_tbl->memory_protection_attribute & EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA); /* force efi_novamap if SetVirtualAddressMap() is unsupported */ @@ -318,7 +324,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, paddr = in->phys_addr; size = in->num_pages * EFI_PAGE_SIZE; - in->virt_addr = in->phys_addr; + in->virt_addr = in->phys_addr + EFI_RT_VIRTUAL_OFFSET; if (efi_novamap) { continue; } diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c new file mode 100644 index 000000000000..d27dea9c940e --- /dev/null +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Yun Liu + * Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include "efistub.h" + +#define BOOT_HEAP_SIZE 0x400000 + +typedef void __noreturn (*kernel_entry_t)(bool efi, unsigned long cmdline, + unsigned long systab); + +extern long kernel_entaddr; +extern void decompress_kernel(unsigned long boot_heap_start); + +static unsigned char efi_heap[BOOT_HEAP_SIZE]; +static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID; +static kernel_entry_t kernel_entry; + +struct screen_info *alloc_screen_info(void) +{ + efi_status_t status; + struct screen_info *si; + + status = efi_bs_call(allocate_pool, + EFI_RUNTIME_SERVICES_DATA, sizeof(*si), (void **)&si); + if (status != EFI_SUCCESS) + return NULL; + + memset(si, 0, sizeof(*si)); + + status = efi_bs_call(install_configuration_table, &screen_info_guid, si); + if (status == EFI_SUCCESS) + return si; + + efi_bs_call(free_pool, si); + + return NULL; +} + +void free_screen_info(struct screen_info *si) +{ + if (!si) + return; + + efi_bs_call(install_configuration_table, &screen_info_guid, NULL); + efi_bs_call(free_pool, si); +} + +efi_status_t check_platform_features(void) +{ + return EFI_SUCCESS; +} + +efi_status_t handle_kernel_image(unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + efi_loaded_image_t *image) +{ + /* Config Direct Mapping */ + csr_write64(CSR_DMW0_INIT, LOONGARCH_CSR_DMWIN0); + csr_write64(CSR_DMW1_INIT, LOONGARCH_CSR_DMWIN1); + + decompress_kernel((unsigned long)efi_heap); + kernel_entry = (kernel_entry_t)kernel_entaddr; + + return EFI_SUCCESS; +} + +struct exit_boot_struct { + efi_memory_desc_t *runtime_map; + int runtime_entry_count; +}; + +static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) +{ + struct exit_boot_struct *p = priv; + + /* + * Update the memory map with virtual addresses. The function will also + * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME + * entries so that we can pass it straight to SetVirtualAddressMap() + */ + efi_get_virtmap(map->map, map->map_size, map->desc_size, + p->runtime_map, &p->runtime_entry_count); + + return EFI_SUCCESS; +} + +efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, + unsigned long kernel_addr, char *cmdline_ptr) +{ + struct exit_boot_struct priv; + unsigned long desc_size; + efi_status_t status; + u32 desc_ver; + + status = efi_alloc_virtmap(&priv.runtime_map, &desc_size, &desc_ver); + if (status != EFI_SUCCESS) { + efi_err("Unable to retrieve UEFI memory map.\n"); + return status; + } + + efi_info("Exiting boot services\n"); + + efi_novamap = false; + status = efi_exit_boot_services(handle, &priv, exit_boot_func); + if (status != EFI_SUCCESS) + return status; + + /* Install the new virtual address map */ + efi_rt_call(set_virtual_address_map, + priv.runtime_entry_count * desc_size, desc_size, + desc_ver, priv.runtime_map); + + kernel_entry(true, (unsigned long)cmdline_ptr, (unsigned long)efi_system_table); +} diff --git a/include/linux/pe.h b/include/linux/pe.h index daf09ffffe38..f4bb0b6a416d 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -65,6 +65,7 @@ #define IMAGE_FILE_MACHINE_SH5 0x01a8 #define IMAGE_FILE_MACHINE_THUMB 0x01c2 #define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 +#define IMAGE_FILE_MACHINE_LOONGARCH 0x6264 /* flags */ #define IMAGE_FILE_RELOCS_STRIPPED 0x0001 -- Gitee From f3c886ec044b78dc21dbbfd3d2128de47b897b88 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 103/241] LoongArch: Add suspend and hibernation support Add suspend (ACPI S3) and hibernation (ACPI S4) support for LoongArch. Change-Id: I93f9d9b70a430b5ec5950674b79bf66e7f08eea1 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 10 ++ arch/loongarch/Makefile | 3 + arch/loongarch/include/asm/acpi.h | 10 ++ .../include/asm/mach-loongson64/loongson.h | 3 + arch/loongarch/include/asm/time.h | 1 + arch/loongarch/kernel/acpi.c | 6 ++ arch/loongarch/kernel/asm-offsets.c | 12 +++ arch/loongarch/kernel/reset.c | 4 + arch/loongarch/kernel/setup.c | 5 + arch/loongarch/kernel/time.c | 11 ++- arch/loongarch/power/Makefile | 4 + arch/loongarch/power/hibernate.c | 62 +++++++++++++ arch/loongarch/power/hibernate_asm.S | 70 ++++++++++++++ arch/loongarch/power/platform.c | 45 +++++++++ arch/loongarch/power/suspend.c | 74 +++++++++++++++ arch/loongarch/power/suspend_asm.S | 93 +++++++++++++++++++ 16 files changed, 410 insertions(+), 3 deletions(-) create mode 100644 arch/loongarch/power/Makefile create mode 100644 arch/loongarch/power/hibernate.c create mode 100644 arch/loongarch/power/hibernate_asm.S create mode 100644 arch/loongarch/power/platform.c create mode 100644 arch/loongarch/power/suspend.c create mode 100644 arch/loongarch/power/suspend_asm.S diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bd9bc18ec149..e0eaf2e51a0d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -45,6 +45,7 @@ config LOONGARCH select ARCH_WANT_LD_ORPHAN_WARN select BUILDTIME_TABLE_SORT select COMMON_CLK + select CPU_PM select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE @@ -736,6 +737,15 @@ config ZONE_DMA32 menu "Power management options" +config ARCH_SUSPEND_POSSIBLE + def_bool y + depends on SYS_SUPPORTS_HOTPLUG_CPU || !SMP + +config ARCH_HIBERNATION_POSSIBLE + def_bool y + depends on SYS_SUPPORTS_HOTPLUG_CPU || !SMP + +source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" endmenu diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 090a155f7c5b..1688503b6597 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -123,6 +123,9 @@ libs-y += arch/loongarch/lib/ # See arch/loongarch/Kbuild for content of core part of the kernel core-y += arch/loongarch/ +# suspend and hibernation support +drivers-$(CONFIG_PM) += arch/loongarch/power/ + ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index b6cf884c6da5..0a503df02ba2 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -39,4 +39,14 @@ extern struct list_head acpi_wakeup_device_list; #define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT +extern int loongarch_acpi_suspend(void); +extern int (*acpi_suspend_lowlevel)(void); +extern void loongarch_suspend_enter(void); + +static inline unsigned long acpi_get_wakeup_address(void) +{ + extern void loongarch_wakeup_start(void); + return (unsigned long)loongarch_wakeup_start; +} + #endif /* _ASM_LOONGARCH_ACPI_H */ diff --git a/arch/loongarch/include/asm/mach-loongson64/loongson.h b/arch/loongarch/include/asm/mach-loongson64/loongson.h index 115f20d8ff16..48ae4f03e872 100644 --- a/arch/loongarch/include/asm/mach-loongson64/loongson.h +++ b/arch/loongarch/include/asm/mach-loongson64/loongson.h @@ -138,4 +138,7 @@ typedef enum { #define ls7a_writel(val, addr) *(volatile unsigned int *)TO_UNCACHE(addr) = (val) #define ls7a_writeq(val, addr) *(volatile unsigned long *)TO_UNCACHE(addr) = (val) +void enable_gpe_wakeup(void); +void enable_pci_wakeup(void); + #endif /* __ASM_MACH_LOONGSON64_LOONGSON_H */ diff --git a/arch/loongarch/include/asm/time.h b/arch/loongarch/include/asm/time.h index 41597b076db6..2c21e36f2c8d 100644 --- a/arch/loongarch/include/asm/time.h +++ b/arch/loongarch/include/asm/time.h @@ -17,6 +17,7 @@ extern u64 cpu_clock_freq; extern u64 const_clock_freq; +extern void save_counter(void); extern void sync_counter(void); static inline unsigned int calc_const_freq(void) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 6c85a7560ffb..1263019fc125 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -331,6 +331,12 @@ static void __init acpi_process_madt(void) } } +#ifndef CONFIG_SUSPEND +int (*acpi_suspend_lowlevel)(void); +#else +int (*acpi_suspend_lowlevel)(void) = loongarch_acpi_suspend; +#endif + void __init acpi_boot_table_init(void) { /* diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index bf125304de56..9bb72bb19ce3 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -263,3 +263,15 @@ void output_smpboot_defines(void) BLANK(); } #endif + +#ifdef CONFIG_HIBERNATION +void output_pbe_defines(void) +{ + COMMENT(" Linux struct pbe offsets. "); + OFFSET(PBE_ADDRESS, pbe, address); + OFFSET(PBE_ORIG_ADDRESS, pbe, orig_address); + OFFSET(PBE_NEXT, pbe, next); + DEFINE(PBE_SIZE, sizeof(struct pbe)); + BLANK(); +} +#endif diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index c98dbfafe1b3..2b83631a7aae 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -13,6 +13,7 @@ #include #include #include +#include static void machine_hang(void) { @@ -47,6 +48,9 @@ void machine_power_off(void) #ifdef CONFIG_SMP preempt_disable(); smp_send_stop(); +#endif +#ifdef CONFIG_PM + enable_pci_wakeup(); #endif pm_power_off(); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 24ad67d6b8af..4ec2cda3772f 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -213,6 +214,10 @@ static void __init arch_mem_init(char **cmdline_p) dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); + /* Reserve for hibernation. */ + register_nosave_region(PFN_DOWN(__pa_symbol(&__nosave_begin)), + PFN_UP(__pa_symbol(&__nosave_end))); + memblock_dump_all(); early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index eab3ce17781e..8331b937503b 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -120,12 +120,17 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_timeval; +static long init_offset __nosavedata; + +void save_counter(void) +{ + init_offset = drdtime(); +} void sync_counter(void) { /* Ensure counter begin at 0 */ - csr_write64(-init_timeval, LOONGARCH_CSR_CNTC); + csr_write64(init_offset, LOONGARCH_CSR_CNTC); } int constant_clockevent_init(void) @@ -212,7 +217,7 @@ void __init time_init(void) else const_clock_freq = calc_const_freq(); - init_timeval = drdtime() - csr_read64(LOONGARCH_CSR_CNTC); + init_offset = -(drdtime() - csr_read64(LOONGARCH_CSR_CNTC)); constant_clockevent_init(); constant_clocksource_init(); diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile new file mode 100644 index 000000000000..58151d003e40 --- /dev/null +++ b/arch/loongarch/power/Makefile @@ -0,0 +1,4 @@ +obj-y += platform.o + +obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o +obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate_asm.o diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c new file mode 100644 index 000000000000..853b5d9c801b --- /dev/null +++ b/arch/loongarch/power/hibernate.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include + +static u32 saved_crmd; +static u32 saved_prmd; +static u32 saved_euen; +static u32 saved_ecfg; +static u64 saved_pcpu_base; +struct pt_regs saved_regs; + +void save_processor_state(void) +{ + saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); + saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); + saved_euen = csr_read32(LOONGARCH_CSR_EUEN); + saved_ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_pcpu_base = csr_read64(PERCPU_BASE_KS); + + if (is_fpu_owner()) + save_fp(current); +} + +void restore_processor_state(void) +{ + csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); + csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); + csr_write32(saved_euen, LOONGARCH_CSR_EUEN); + csr_write32(saved_ecfg, LOONGARCH_CSR_ECFG); + csr_write64(saved_pcpu_base, PERCPU_BASE_KS); + + if (is_fpu_owner()) + restore_fp(current); +} + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); + unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end)); + + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +extern int swsusp_asm_suspend(void); + +int swsusp_arch_suspend(void) +{ + enable_pci_wakeup(); + return swsusp_asm_suspend(); +} + +extern int swsusp_asm_resume(void); + +int swsusp_arch_resume(void) +{ + /* Avoid TLB mismatch during and after kernel resume */ + local_flush_tlb_all(); + return swsusp_asm_resume(); +} diff --git a/arch/loongarch/power/hibernate_asm.S b/arch/loongarch/power/hibernate_asm.S new file mode 100644 index 000000000000..c606cae7756b --- /dev/null +++ b/arch/loongarch/power/hibernate_asm.S @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hibernation support specific for loongarch - temporary page tables + * + * Licensed under the GPLv2 + * + * Copyright (C) 2009 Lemote Inc. + * Author: Hu Hongbing + * Wu Zhangjin + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include + +.text +SYM_FUNC_START(swsusp_asm_suspend) + la.pcrel t0, saved_regs + PTR_S ra, t0, PT_R1 + PTR_S tp, t0, PT_R2 + PTR_S sp, t0, PT_R3 + PTR_S u0, t0, PT_R21 + PTR_S fp, t0, PT_R22 + PTR_S s0, t0, PT_R23 + PTR_S s1, t0, PT_R24 + PTR_S s2, t0, PT_R25 + PTR_S s3, t0, PT_R26 + PTR_S s4, t0, PT_R27 + PTR_S s5, t0, PT_R28 + PTR_S s6, t0, PT_R29 + PTR_S s7, t0, PT_R30 + PTR_S s8, t0, PT_R31 + b swsusp_save +SYM_FUNC_END(swsusp_asm_suspend) + +SYM_FUNC_START(swsusp_asm_resume) + la.pcrel t0, restore_pblist + PTR_L t0, t0, 0 +0: + PTR_L t1, t0, PBE_ADDRESS /* source */ + PTR_L t2, t0, PBE_ORIG_ADDRESS /* destination */ + PTR_LI t3, _PAGE_SIZE + PTR_ADD t3, t3, t1 +1: + REG_L t8, t1, 0 + REG_S t8, t2, 0 + PTR_ADDI t1, t1, SZREG + PTR_ADDI t2, t2, SZREG + bne t1, t3, 1b + PTR_L t0, t0, PBE_NEXT + bnez t0, 0b + la.pcrel t0, saved_regs + PTR_L ra, t0, PT_R1 + PTR_L tp, t0, PT_R2 + PTR_L sp, t0, PT_R3 + PTR_L u0, t0, PT_R21 + PTR_L fp, t0, PT_R22 + PTR_L s0, t0, PT_R23 + PTR_L s1, t0, PT_R24 + PTR_L s2, t0, PT_R25 + PTR_L s3, t0, PT_R26 + PTR_L s4, t0, PT_R27 + PTR_L s5, t0, PT_R28 + PTR_L s6, t0, PT_R29 + PTR_L s7, t0, PT_R30 + PTR_L s8, t0, PT_R31 + PTR_LI a0, 0x0 + jirl zero, ra, 0 +SYM_FUNC_END(swsusp_asm_resume) diff --git a/arch/loongarch/power/platform.c b/arch/loongarch/power/platform.c new file mode 100644 index 000000000000..d0e72ab0798d --- /dev/null +++ b/arch/loongarch/power/platform.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include + +void enable_gpe_wakeup(void) +{ + acpi_enable_all_wakeup_gpes(); +} + +void enable_pci_wakeup(void) +{ + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_STATUS, 1); + + if (acpi_gbl_FADT.flags & ACPI_FADT_PCI_EXPRESS_WAKE) + acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, 0); +} + +static int __init loongson3_acpi_suspend_init(void) +{ +#ifdef CONFIG_ACPI + acpi_status status; + uint64_t suspend_addr = 0; + + if (acpi_disabled || acpi_gbl_reduced_hardware) + return 0; + + acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1); + status = acpi_evaluate_integer(NULL, "\\SADR", NULL, &suspend_addr); + if (ACPI_FAILURE(status) || !suspend_addr) { + pr_err("ACPI S3 is not support!\n"); + return -1; + } + loongson_sysconf.suspend_addr = (u64)phys_to_virt(PHYSADDR(suspend_addr)); +#endif + return 0; +} + +device_initcall(loongson3_acpi_suspend_init); diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c new file mode 100644 index 000000000000..e5b54bcfe043 --- /dev/null +++ b/arch/loongarch/power/suspend.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * loongson-specific suspend support + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include + +#include +#include +#include +#include + +#include + +u64 loongarch_suspend_addr; + +struct saved_registers { + u32 ecfg; + u32 euen; + u64 pgd; + u64 kpgd; + u32 pwctl0; + u32 pwctl1; +}; +static struct saved_registers saved_regs; + +static void arch_common_suspend(void) +{ + save_counter(); + saved_regs.pgd = csr_read64(LOONGARCH_CSR_PGDL); + saved_regs.kpgd = csr_read64(LOONGARCH_CSR_PGDH); + saved_regs.pwctl0 = csr_read32(LOONGARCH_CSR_PWCTL0); + saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1); + saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG); + saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN); + + loongarch_suspend_addr = loongson_sysconf.suspend_addr; +} + +static void arch_common_resume(void) +{ + sync_counter(); + local_flush_tlb_all(); + csr_write64(per_cpu_offset(0), PERCPU_BASE_KS); + csr_write64(eentry, LOONGARCH_CSR_EENTRY); + csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); + csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + + csr_write64(saved_regs.pgd, LOONGARCH_CSR_PGDL); + csr_write64(saved_regs.kpgd, LOONGARCH_CSR_PGDH); + csr_write32(saved_regs.pwctl0, LOONGARCH_CSR_PWCTL0); + csr_write32(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1); + csr_write32(saved_regs.ecfg, LOONGARCH_CSR_ECFG); + csr_write32(saved_regs.euen, LOONGARCH_CSR_EUEN); +} + +int loongarch_acpi_suspend(void) +{ + enable_gpe_wakeup(); + enable_pci_wakeup(); + + arch_common_suspend(); + + /* processor specific suspend */ + loongarch_suspend_enter(); + + arch_common_resume(); + + return 0; +} diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S new file mode 100644 index 000000000000..06ea5c48eb68 --- /dev/null +++ b/arch/loongarch/power/suspend_asm.S @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Sleep helper for Loongson-3 sleep mode. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + * Author: Huacai Chen + */ + +#include +#include +#include +#include +#include + +/* preparatory stuff */ +.macro SETUP_SLEEP + addi.d sp, sp, -PT_SIZE + st.d $r1, sp, PT_R1 + st.d $r2, sp, PT_R2 + st.d $r3, sp, PT_R3 + st.d $r4, sp, PT_R4 + st.d $r21, sp, PT_R21 + st.d $r22, sp, PT_R22 + st.d $r23, sp, PT_R23 + st.d $r24, sp, PT_R24 + st.d $r25, sp, PT_R25 + st.d $r26, sp, PT_R26 + st.d $r27, sp, PT_R27 + st.d $r28, sp, PT_R28 + st.d $r29, sp, PT_R29 + st.d $r30, sp, PT_R30 + st.d $r31, sp, PT_R31 + + la.pcrel t0, acpi_saved_sp + st.d sp, t0, 0 +.endm + +.macro SETUP_WAKEUP + ld.d $r1, sp, PT_R1 + ld.d $r2, sp, PT_R2 + ld.d $r3, sp, PT_R3 + ld.d $r4, sp, PT_R4 + ld.d $r21, sp, PT_R21 + ld.d $r22, sp, PT_R22 + ld.d $r23, sp, PT_R23 + ld.d $r24, sp, PT_R24 + ld.d $r25, sp, PT_R25 + ld.d $r26, sp, PT_R26 + ld.d $r27, sp, PT_R27 + ld.d $r28, sp, PT_R28 + ld.d $r29, sp, PT_R29 + ld.d $r30, sp, PT_R30 + ld.d $r31, sp, PT_R31 +.endm + + .text + .align 12 + +/* Sleep/wakeup code for Loongson-3 */ +SYM_FUNC_START(loongarch_suspend_enter) + SETUP_SLEEP + bl __flush_cache_all + + /* Pass RA and SP to BIOS */ + addi.d a1, sp, 0 + la.pcrel a0, loongarch_wakeup_start + la.pcrel t0, loongarch_suspend_addr + ld.d t0, t0, 0 + jirl a0, t0, 0 /* Call BIOS's STR sleep routine */ + + /* + * This is where we return upon wakeup. + * Reload all of the registers and return. + */ +SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) + li.d t0, CSR_DMW0_INIT # UC, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN0 + li.d t0, CSR_DMW1_INIT # CA, PLV0 + csrwr t0, LOONGARCH_CSR_DMWIN1 + + la.abs t0, 0f + jr t0 +0: + la.pcrel t0, acpi_saved_sp + ld.d sp, t0, 0 + SETUP_WAKEUP + addi.d sp, sp, PT_SIZE + jr ra +SYM_FUNC_END(loongarch_suspend_enter) -- Gitee From 8c91c4a251b35855e65d44bc6ad3115169a2a8d1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 5 Sep 2022 11:06:10 +0800 Subject: [PATCH 104/241] LoongArch: Add CPUFreq driver support Change-Id: I6c47fa5291092f160e3b0e9781068e5d2ebe4b06 Signed-off-by: Binbin Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/loongarchregs.h | 3 + arch/loongarch/power/platform.c | 12 + drivers/cpufreq/Kconfig | 14 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/loongson3_cpufreq.c | 419 +++++++++++++++++++++ 6 files changed, 450 insertions(+) create mode 100644 drivers/cpufreq/loongson3_cpufreq.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e0eaf2e51a0d..6299354e098c 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -747,6 +747,7 @@ config ARCH_HIBERNATION_POSSIBLE source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" +source "drivers/cpufreq/Kconfig" endmenu diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 0ab158785a09..3b2610a0af6d 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -1109,11 +1109,14 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define LOONGARCH_IOCSR_NODECNT 0x408 #define LOONGARCH_IOCSR_MISC_FUNC 0x420 +#define IOCSR_MISC_FUNC_SOFT_INT BIT_ULL(10) #define IOCSR_MISC_FUNC_TIMER_RESET BIT_ULL(21) #define IOCSR_MISC_FUNC_EXT_IOI_EN BIT_ULL(48) #define LOONGARCH_IOCSR_CPUTEMP 0x428 +#define LOONGARCH_IOCSR_SMCMBX 0x51c + /* PerCore CSR, only accessable by local cores */ #define LOONGARCH_IOCSR_IPI_STATUS 0x1000 #define LOONGARCH_IOCSR_IPI_EN 0x1004 diff --git a/arch/loongarch/power/platform.c b/arch/loongarch/power/platform.c index d0e72ab0798d..d923385dddb2 100644 --- a/arch/loongarch/power/platform.c +++ b/arch/loongarch/power/platform.c @@ -22,6 +22,18 @@ void enable_pci_wakeup(void) acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, 0); } +static struct platform_device loongson3_cpufreq_device = { + .name = "loongson3_cpufreq", + .id = -1, +}; + +static int __init loongson_cpufreq_init(void) +{ + return platform_device_register(&loongson3_cpufreq_device); +} + +arch_initcall(loongson_cpufreq_init); + static int __init loongson3_acpi_suspend_init(void) { #ifdef CONFIG_ACPI diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 85de313ddec2..fdbe7ceb1579 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -287,6 +287,20 @@ config LOONGSON1_CPUFREQ If in doubt, say N. endif +if LOONGARCH +config LOONGSON3_CPUFREQ + tristate "Loongson3 CPUFreq Driver" + help + This option adds a CPUFreq driver for Loongson processors which + support software configurable cpu frequency. + + Loongson-3 family processors support this feature. + + For details, take a look at . + + If in doubt, say N. +endif + if SPARC64 config SPARC_US3_CPUFREQ tristate "UltraSPARC-III CPU Frequency driver" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index f1b7e3dd6e5d..cf2d6bc976e6 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -105,6 +105,7 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o +obj-$(CONFIG_LOONGSON3_CPUFREQ) += loongson3_cpufreq.o obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o diff --git a/drivers/cpufreq/loongson3_cpufreq.c b/drivers/cpufreq/loongson3_cpufreq.c new file mode 100644 index 000000000000..ace30f95384e --- /dev/null +++ b/drivers/cpufreq/loongson3_cpufreq.c @@ -0,0 +1,419 @@ +/* + * CPUFreq driver for the loongson-3 processors + * + * All revisions of Loongson-3 processor support this feature. + * + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +#include +#include +#include + +/* Message */ +union smc_message +{ + u32 value; + struct + { + u32 id : 4; + u32 info : 4; + u32 val : 16; + u32 cmd : 6; + u32 extra : 1; + u32 complete : 1; + }; +}; + +/* Command return values */ +#define CMD_OK 0 /* No error */ +#define CMD_ERROR 1 /* Regular error */ +#define CMD_NOCMD 2 /* Command does not support */ +#define CMD_INVAL 3 /* Invalid Parameter */ + +/* Version commands */ +/* + * CMD_GET_VERSION - Get interface version + * Input: none + * Output: version + */ +#define CMD_GET_VERSION 0x1 + +/* Feature commands */ +/* + * CMD_GET_FEATURE - Get feature state + * Input: feature ID + * Output: feature flag + */ +#define CMD_GET_FEATURE 0x2 + +/* + * CMD_SET_FEATURE - Set feature state + * Input: feature ID, feature flag + * output: none + */ +#define CMD_SET_FEATURE 0x3 + +/* Feature IDs */ +#define FEATURE_SENSOR 0 +#define FEATURE_FAN 1 +#define FEATURE_DVFS 2 + +/* Sensor feature flags */ +#define FEATURE_SENSOR_ENABLE (1 << 0) +#define FEATURE_SENSOR_SAMPLE (1 << 1) + +/* Fan feature flags */ +#define FEATURE_FAN_ENABLE (1 << 0) +#define FEATURE_FAN_AUTO (1 << 1) + +/* DVFS feature flags */ +#define FEATURE_DVFS_ENABLE (1 << 0) +#define FEATURE_DVFS_BOOST (1 << 1) + +/* Sensor commands */ +/* + * CMD_GET_SENSOR_NUM - Get number of sensors + * Input: none + * Output: number + */ +#define CMD_GET_SENSOR_NUM 0x4 + +/* + * CMD_GET_SENSOR_STATUS - Get sensor status + * Input: sensor ID, type + * Output: sensor status + */ +#define CMD_GET_SENSOR_STATUS 0x5 + +/* Sensor types */ +#define SENSOR_INFO_TYPE 0 +#define SENSOR_INFO_TYPE_TEMP 1 + +/* Fan commands */ +/* + * CMD_GET_FAN_NUM - Get number of fans + * Input: none + * Output: number + */ +#define CMD_GET_FAN_NUM 0x6 + +/* + * CMD_GET_FAN_INFO - Get fan status + * Input: fan ID, type + * Output: fan info + */ +#define CMD_GET_FAN_INFO 0x7 + +/* + * CMD_SET_FAN_INFO - Set fan status + * Input: fan ID, type, value + * Output: none + */ +#define CMD_SET_FAN_INFO 0x8 + +/* Fan types */ +#define FAN_INFO_TYPE_LEVEL 0 + +/* DVFS commands */ +/* + * CMD_GET_FREQ_LEVEL_NUM - Get number of freq levels + * Input: none + * Output: number + */ +#define CMD_GET_FREQ_LEVEL_NUM 0x9 + +/* + * CMD_GET_FREQ_BOOST_LEVEL - Get number of boost levels + * Input: none + * Output: number + */ +#define CMD_GET_FREQ_BOOST_LEVEL 0x10 + +/* + * CMD_GET_FREQ_LEVEL_INFO - Get freq level info + * Input: level ID + * Output: level info + */ +#define CMD_GET_FREQ_LEVEL_INFO 0x11 + +/* + * CMD_GET_FREQ_INFO - Get freq info + * Input: CPU ID, type + * Output: freq info + */ +#define CMD_GET_FREQ_INFO 0x12 + +/* + * CMD_SET_FREQ_INFO - Set freq info + * Input: CPU ID, type, value + * Output: none + */ +#define CMD_SET_FREQ_INFO 0x13 + +/* Freq types */ +#define FREQ_INFO_TYPE_FREQ 0 +#define FREQ_INFO_TYPE_LEVEL 1 + +static inline int do_service_request(union smc_message *msg) +{ + int retries; + union smc_message last; + + last.value = iocsr_read32(LOONGARCH_IOCSR_SMCMBX); + if (!last.complete) + return -1; + + iocsr_write32(msg->value, LOONGARCH_IOCSR_SMCMBX); + iocsr_write32(iocsr_read32(LOONGARCH_IOCSR_MISC_FUNC) | IOCSR_MISC_FUNC_SOFT_INT, + LOONGARCH_IOCSR_MISC_FUNC); + + for (retries = 0; retries < 10000; retries++) { + msg->value = iocsr_read32(LOONGARCH_IOCSR_SMCMBX); + if (msg->complete) + break; + + usleep_range(4, 8); + } + + if (!msg->complete || msg->cmd != CMD_OK) + return -1; + + return 0; +} + +static int boost_supported = 0; +static struct mutex cpufreq_mutex[MAX_PACKAGES]; + +enum freq { + FREQ_LEV0, /* Reserved */ + FREQ_LEV1, FREQ_LEV2, FREQ_LEV3, FREQ_LEV4, + FREQ_LEV5, FREQ_LEV6, FREQ_LEV7, FREQ_LEV8, + FREQ_LEV9, FREQ_LEV10, FREQ_LEV11, FREQ_LEV12, + FREQ_LEV13, FREQ_LEV14, FREQ_LEV15, FREQ_LEV16, + FREQ_RESV +}; + +/* For Loongson-3A5000, support boost */ +static struct cpufreq_frequency_table loongson3_cpufreq_table[] = { + {0, FREQ_LEV0, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV1, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV2, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV3, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV4, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV5, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV6, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV7, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV8, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV9, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV10, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV11, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV12, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV13, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV14, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV15, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_LEV16, CPUFREQ_ENTRY_INVALID}, + {0, FREQ_RESV, CPUFREQ_TABLE_END}, +}; + +static unsigned int loongson3_cpufreq_get(unsigned int cpu) +{ + union smc_message msg; + + msg.id = cpu; + msg.info = FREQ_INFO_TYPE_FREQ; + msg.cmd = CMD_GET_FREQ_INFO; + msg.extra = 0; + msg.complete = 0; + do_service_request(&msg); + + return msg.val * 1000; +} + +static int loongson3_cpufreq_set(struct cpufreq_policy *policy, int freq_level) +{ + uint32_t core_id = cpu_data[policy->cpu].core; + union smc_message msg; + + msg.id = core_id; + msg.info = FREQ_INFO_TYPE_LEVEL; + msg.val = freq_level; + msg.cmd = CMD_SET_FREQ_INFO; + msg.extra = 0; + msg.complete = 0; + do_service_request(&msg); + + return 0; +} + +/* + * Here we notify other drivers of the proposed change and the final change. + */ +static int loongson3_cpufreq_target(struct cpufreq_policy *policy, + unsigned int index) +{ + unsigned int cpu = policy->cpu; + unsigned int package = cpu_data[cpu].package; + + if (!cpu_online(cpu)) + return -ENODEV; + + /* setting the cpu frequency */ + mutex_lock(&cpufreq_mutex[package]); + loongson3_cpufreq_set(policy, index); + mutex_unlock(&cpufreq_mutex[package]); + + return 0; +} + +static int loongson3_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + if (!cpu_online(policy->cpu)) + return -ENODEV; + + policy->cur = loongson3_cpufreq_get(policy->cpu); + + policy->cpuinfo.transition_latency = 5000; + policy->freq_table = loongson3_cpufreq_table; + cpumask_copy(policy->cpus, topology_sibling_cpumask(policy->cpu)); + + return 0; +} + +static int loongson3_cpufreq_exit(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct cpufreq_driver loongson3_cpufreq_driver = { + .name = "loongson3", + .flags = CPUFREQ_CONST_LOOPS, + .init = loongson3_cpufreq_cpu_init, + .verify = cpufreq_generic_frequency_table_verify, + .target_index = loongson3_cpufreq_target, + .get = loongson3_cpufreq_get, + .exit = loongson3_cpufreq_exit, + .attr = cpufreq_generic_attr, +}; + +static struct platform_device_id cpufreq_id_table[] = { + { "loongson3_cpufreq", }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(platform, cpufreq_id_table); + +static struct platform_driver cpufreq_driver = { + .driver = { + .name = "loongson3_cpufreq", + .owner = THIS_MODULE, + }, + .id_table = cpufreq_id_table, +}; + +static int configure_cpufreq_info(void) +{ + int i, r, boost_level, max_level; + union smc_message msg; + + if (!cpu_has_csr) + return -EPERM; + + msg.cmd = CMD_GET_VERSION; + msg.extra = 0; + msg.complete = 0; + r = do_service_request(&msg); + if (r < 0 || msg.val < 0x1) + return -EPERM; + + msg.id = FEATURE_DVFS; + msg.val = FEATURE_DVFS_ENABLE | FEATURE_DVFS_BOOST; + msg.cmd = CMD_SET_FEATURE; + msg.extra = 0; + msg.complete = 0; + r = do_service_request(&msg); + if (r < 0) + return -EPERM; + + msg.cmd = CMD_GET_FREQ_LEVEL_NUM; + msg.extra = 0; + msg.complete = 0; + r = do_service_request(&msg); + if (r < 0) + return -EPERM; + max_level = msg.val; + + msg.cmd = CMD_GET_FREQ_BOOST_LEVEL; + msg.extra = 0; + msg.complete = 0; + r = do_service_request(&msg); + if (r < 0) + return -EPERM; + boost_level = msg.val; + if (boost_level < max_level) + boost_supported = 1; + + for (i = 0; i < max_level; i++) { + msg.cmd = CMD_GET_FREQ_LEVEL_INFO; + msg.id = i; + msg.info = FREQ_INFO_TYPE_FREQ; + msg.extra = 0; + msg.complete = 0; + r = do_service_request(&msg); + if (r < 0) + return -EPERM; + + loongson3_cpufreq_table[i].frequency = msg.val * 1000; + if (i >= boost_level) + loongson3_cpufreq_table[i].flags = CPUFREQ_BOOST_FREQ; + } + + return 0; +} + +static int __init cpufreq_init(void) +{ + int i, ret; + + ret = platform_driver_register(&cpufreq_driver); + if (ret) + goto err; + + ret = configure_cpufreq_info(); + if (ret) + goto err; + + for (i = 0; i < MAX_PACKAGES; i++) + mutex_init(&cpufreq_mutex[i]); + + ret = cpufreq_register_driver(&loongson3_cpufreq_driver); + + if (boost_supported) + cpufreq_enable_boost_support(); + + pr_info("cpufreq: Loongson-3 CPU frequency driver.\n"); + + return ret; + +err: + platform_driver_unregister(&cpufreq_driver); + return ret; +} + +static void __exit cpufreq_exit(void) +{ + cpufreq_unregister_driver(&loongson3_cpufreq_driver); + platform_driver_unregister(&cpufreq_driver); +} + +module_init(cpufreq_init); +module_exit(cpufreq_exit); + +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("CPUFreq driver for Loongson-3 processors"); +MODULE_LICENSE("GPL"); -- Gitee From 2de8f2d2e4caba6e340ed49ab98cb497a9fd3da2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 25 May 2022 15:31:37 +0800 Subject: [PATCH 105/241] LoongArch: Add vDSO syscall __vdso_getcpu() Change-Id: I404f5809b316721f2e91e2b031f1ea5681c38040 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/vdso.h | 1 + arch/loongarch/include/asm/vdso/vdso.h | 14 ++++++++- arch/loongarch/kernel/vdso.c | 25 +++++++++------ arch/loongarch/vdso/Makefile | 2 +- arch/loongarch/vdso/vdso.lds.S | 1 + arch/loongarch/vdso/vgetcpu.c | 43 ++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 12 deletions(-) create mode 100644 arch/loongarch/vdso/vgetcpu.c diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h index ad3ffae9b60e..f81602a9a069 100644 --- a/arch/loongarch/include/asm/vdso.h +++ b/arch/loongarch/include/asm/vdso.h @@ -12,6 +12,7 @@ #ifndef __ASM_VDSO_H #define __ASM_VDSO_H +#include #include #include diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 3ddfde3f7557..f7dd21689cb3 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -10,6 +10,17 @@ #include #include +struct vdso_pcpu_data { + u32 node; +} ____cacheline_aligned_in_smp; + +struct loongarch_vdso_data { + struct vdso_pcpu_data pdata[NR_CPUS]; + struct vdso_data data[CS_BASES]; /* Arch-independent data */ +}; + +#define VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data)) + static inline unsigned long get_vdso_base(void) { unsigned long addr; @@ -25,7 +36,8 @@ static inline unsigned long get_vdso_base(void) static inline const struct vdso_data *get_vdso_data(void) { - return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE); + return (const struct vdso_data *)(get_vdso_base() + - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS); } #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index e47c3175bef5..6f31bc071d22 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -25,12 +25,14 @@ extern char vdso_start[], vdso_end[]; /* Kernel-provided data used by the VDSO. */ -static union loongarch_vdso_data { - u8 page[PAGE_SIZE]; - struct vdso_data data[CS_BASES]; +static union { + u8 page[VDSO_DATA_SIZE]; + struct loongarch_vdso_data vdata; } loongarch_vdso_data __page_aligned_data; -struct vdso_data *vdso_data = loongarch_vdso_data.data; + static struct page *vdso_pages[] = { NULL }; +struct vdso_data *vdso_data = loongarch_vdso_data.vdata.data; +struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { @@ -55,11 +57,14 @@ struct loongarch_vdso_info vdso_info = { static int __init init_vdso(void) { - unsigned long i, pfn; + unsigned long i, cpu, pfn; BUG_ON(!PAGE_ALIGNED(vdso_info.vdso)); BUG_ON(!PAGE_ALIGNED(vdso_info.size)); + for_each_possible_cpu(cpu) + vdso_pdata[cpu].node = cpu_to_node(cpu); + pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso)); for (i = 0; i < vdso_info.size / PAGE_SIZE; i++) vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i); @@ -93,9 +98,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* * Determine total area size. This includes the VDSO data itself - * and the data page. + * and the data pages. */ - vvar_size = PAGE_SIZE; + vvar_size = VDSO_DATA_SIZE; size = vvar_size + info->size; data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0); @@ -103,7 +108,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) ret = data_addr; goto out; } - vdso_addr = data_addr + PAGE_SIZE; + vdso_addr = data_addr + VDSO_DATA_SIZE; vma = _install_special_mapping(mm, data_addr, vvar_size, VM_READ | VM_MAYREAD, @@ -115,8 +120,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* Map VDSO data page. */ ret = remap_pfn_range(vma, data_addr, - virt_to_phys(vdso_data) >> PAGE_SHIFT, - PAGE_SIZE, PAGE_READONLY); + virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT, + vvar_size, PAGE_READONLY); if (ret) goto out; diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 6b6e16732c60..22bc504eb8e7 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -6,7 +6,7 @@ ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT include $(srctree)/lib/vdso/Makefile -obj-vdso-y := elf.o vgettimeofday.o sigreturn.o +obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o # Common compiler flags between ABIs. ccflags-vdso := \ diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S index d3811bba0abf..d437b9695ba5 100644 --- a/arch/loongarch/vdso/vdso.lds.S +++ b/arch/loongarch/vdso/vdso.lds.S @@ -58,6 +58,7 @@ VERSION { LINUX_4.8 { global: + __vdso_getcpu; __vdso_clock_getres; __vdso_clock_gettime; __vdso_gettimeofday; diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c new file mode 100644 index 000000000000..43a0078e4418 --- /dev/null +++ b/arch/loongarch/vdso/vgetcpu.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fast user context implementation of getcpu() + */ + +#include +#include + +static __always_inline int read_cpu_id(void) +{ + int cpu_id; + + __asm__ __volatile__( + " rdtime.d $zero, %0\n" + : "=r" (cpu_id) + : + : "memory"); + + return cpu_id; +} + +static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) +{ + return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); +} + +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) +{ + int cpu_id; + const struct vdso_pcpu_data *data; + + cpu_id = read_cpu_id(); + + if (cpu) + *cpu = cpu_id; + + if (node) { + data = get_pcpu_data(); + *node = data[cpu_id].node; + } + + return 0; +} -- Gitee From fc5cfff36a29a15d070b7e17814430bc66e4bd81 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 23 Oct 2021 16:48:08 +0800 Subject: [PATCH 106/241] LoongArch: Use TLB for ioremap() Change-Id: Id0d560ef4858c5ec36dbb30016c676be4dc97ce8 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 11 ++++ arch/loongarch/include/asm/fixmap.h | 15 +++++ arch/loongarch/include/asm/io.h | 69 ++++++-------------- arch/loongarch/include/asm/pgtable-bits.h | 2 + arch/loongarch/kernel/setup.c | 2 +- arch/loongarch/mm/init.c | 64 +++++++++++++++++++ arch/loongarch/pci/acpi.c | 76 +++++++++++++++++++++-- 7 files changed, 183 insertions(+), 56 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 6299354e098c..487021fe10f8 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -53,6 +53,7 @@ config LOONGARCH select GENERIC_ENTRY select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY + select GENERIC_IOREMAP if !ARCH_IOREMAP select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW @@ -251,6 +252,9 @@ config 64BIT endchoice +config FIX_EARLYCON_MEM + def_bool y + config PAGE_SIZE_4KB bool @@ -670,6 +674,13 @@ config PHYSICAL_START specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed kernel). +config ARCH_IOREMAP + bool "Enable LoongArch DMW-based ioremap()" + help + We use generic TLB-based ioremap() by default since it has page + protection support. However, you can enable LoongArch DMW-based + ioremap() for better performance. + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h index cd810ee0d251..e6f8d21809ff 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -16,4 +16,19 @@ #define NR_FIX_BTMAPS 64 +enum fixed_addresses { + FIX_HOLE, + FIX_EARLYCON_MEM_BASE, + __end_of_fixed_addresses +}; + +#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXMAP_PAGE_IO PAGE_KERNEL_SUC + +extern void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags); + +#include + #endif diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index a8ebe6a61205..da80f037ef97 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -27,71 +27,38 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap #define early_memunmap early_iounmap +#ifdef CONFIG_ARCH_IOREMAP + static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, unsigned long prot_val) { - if (prot_val == _CACHE_CC) + if (prot_val & _CACHE_CC) return (void __iomem *)(unsigned long)(CACHE_BASE + offset); else return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset); } -/* - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - */ -#define ioremap(offset, size) \ - ioremap_prot((offset), (size), _CACHE_SUC) +#define ioremap(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC)) -/* - * ioremap_wc - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_wc performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * but accelerated by means of write-combining feature. It is specifically - * useful for PCIe prefetchable windows, which may vastly improve a - * communications performance. If it was determined on boot stage, what - * CPU CCA doesn't support WUC, the method shall fall-back to the - * _CACHE_SUC option (see cpu_probe() method). - */ -#define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), _CACHE_WUC) +#define iounmap(addr) ((void)(addr)) + +#endif /* - * ioremap_cache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_cache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. + * On LoongArch, ioremap() has two variants, ioremap_wc() and ioremap_cache(). + * They map bus memory into CPU space, the mapped memory is marked uncachable + * (_CACHE_SUC), uncachable but accelerated by write-combine (_CACHE_WUC) and + * cachable (_CACHE_CC) respectively for CPU access. * - * This version of ioremap ensures that the memory is marked cachable by - * the CPU. Also enables full write-combining. Useful for some - * memory-like regions on I/O busses. + * @offset: bus address of the memory + * @size: size of the resource to map */ -#define ioremap_cache(offset, size) \ - ioremap_prot((offset), (size), _CACHE_CC) +#define ioremap_wc(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC)) -static inline void iounmap(const volatile void __iomem *addr) -{ -} +#define ioremap_cache(offset, size) \ + ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) #define mmiowb() asm volatile ("dbar 0" ::: "memory") diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 4c377ff13d0f..e6f4f1c3d85f 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -104,6 +104,8 @@ #ifndef __ASSEMBLY__ +#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL_SUC) + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 4ec2cda3772f..4d5f61a1ce39 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -334,11 +334,11 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; early_init(); + pagetable_init(); parse_early_param(); reserve_initrd_mem(); platform_init(); - pagetable_init(); arch_mem_init(cmdline_p); resource_init(); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 6b97654eddc2..19e356f4cd6c 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -338,6 +338,70 @@ void vmemmap_free(unsigned long start, unsigned long end, } #endif +static pte_t *fixmap_pte(unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + p4d = p4d_offset(pgd, addr); + + if (pgd_none(*pgd)) { + pud_t *new __maybe_unused; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); +#endif + } + + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new __maybe_unused; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pud_populate(&init_mm, pud, new); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); +#endif + } + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new __maybe_unused; + + new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd, new); + } + + return pte_offset_kernel(pmd, addr); +} + +void __init __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + if (!pte_none(*ptep)) { + pte_ERROR(*ptep); + return; + } + + if (pgprot_val(flags)) + set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags)); + else { + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + } +} + /* * Align swapper_pg_dir in to 64K, allows its address to be loaded * with a single LUI instruction in the TLB handlers. If we used diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 0aba5e9efa2a..931a9a842c1f 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -83,6 +83,69 @@ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) return 0; } +/* + * Create a PCI config space window + * - reserve mem region + * - alloc struct pci_config_window with space for all mappings + * - ioremap the config space + */ +static struct pci_config_window *arch_pci_ecam_create(struct device *dev, + struct resource *cfgres, struct resource *busr, struct pci_ecam_ops *ops) +{ + int i, err; + unsigned int bus_range, bsz; + struct resource *conflict; + struct pci_config_window *cfg; + + if (busr->start > busr->end) + return ERR_PTR(-EINVAL); + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return ERR_PTR(-ENOMEM); + + cfg->parent = dev; + cfg->ops = ops; + cfg->busr.start = busr->start; + cfg->busr.end = busr->end; + cfg->busr.flags = IORESOURCE_BUS; + bus_range = resource_size(cfgres) >> ops->bus_shift; + + bsz = 1 << ops->bus_shift; + + cfg->res.start = cfgres->start; + cfg->res.end = cfgres->end; + cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + cfg->res.name = "PCI ECAM"; + + conflict = request_resource_conflict(&iomem_resource, &cfg->res); + if (conflict) { + err = -EBUSY; + dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n", + &cfg->res, conflict->name, conflict); + goto err_exit; + } + + cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz); + if (!cfg->win) + goto err_exit_iomap; + + if (ops->init) { + err = ops->init(cfg); + if (err) + goto err_exit; + } + dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr); + return cfg; + +err_exit_iomap: + err = -ENOMEM; + dev_err(dev, "ECAM ioremap failed\n"); +err_exit: + pci_ecam_free(cfg); + return ERR_PTR(err); +} + /* * Lookup the bus range for the domain in MCFG, and set up config space * mapping. @@ -107,11 +170,16 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root) bus_shift = ecam_ops->bus_shift ? : 20; - cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); - cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; - cfgres.flags = IORESOURCE_MEM; + if (bus_shift == 20) + cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + else { + cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift); + cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1; + cfgres.end |= BIT(28) + (((PCI_CFG_SPACE_EXP_SIZE - 1) & 0xf00) << 16); + cfgres.flags = IORESOURCE_MEM; + cfg = arch_pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); + } - cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops); if (IS_ERR(cfg)) { dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg)); return NULL; -- Gitee From 2a8c17cecfcd7831a4d6f06c7e3b2fd93172b0ff Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Fri, 23 Sep 2022 12:19:02 +0800 Subject: [PATCH 107/241] LoongArch: Use generic BUG() handler Inspired by commit 9fb7410f955("arm64/BUG: Use BRK instruction for generic BUG traps"), do similar for LoongArch to use generic BUG() handler. This patch uses the BREAK software breakpoint instruction to generate a trap instead, similarly to most other arches, with the generic BUG code generating the dmesg boilerplate. This allows bug metadata to be moved to a separate table and reduces the amount of inline code at BUG() and WARN() sites. This also avoids clobbering any registers before they can be dumped. Before patch: [ 3018.338013] lkdtm: Performing direct entry BUG [ 3018.342445] Kernel bug detected[#5]: [ 3018.345992] CPU: 2 PID: 865 Comm: cat Tainted: G D 6.0.0-rc6+ #35 After patch: [ 125.585985] lkdtm: Performing direct entry BUG [ 125.590433] ------------[ cut here ]------------ [ 125.595020] kernel BUG at drivers/misc/lkdtm/bugs.c:78! [ 125.600211] Oops - BUG[#1]: [ 125.602980] CPU: 3 PID: 410 Comm: cat Not tainted 6.0.0-rc6+ #36 Out-of-line file/line data information obtained compared to before. Change-Id: I8f8d96c6b19ab7711f4fc35bb437058b7413e1cb Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 8 +++++ arch/loongarch/include/asm/bug.h | 59 ++++++++++++++++++++++++++------ arch/loongarch/kernel/head.S | 4 +++ arch/loongarch/kernel/traps.c | 26 ++++++++++++-- 4 files changed, 85 insertions(+), 12 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 487021fe10f8..1faefebf74ab 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -716,6 +716,14 @@ config ARCH_MEMORY_PROBE def_bool y depends on MEMORY_HOTPLUG +config GENERIC_BUG + def_bool y + depends on BUG + +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + depends on GENERIC_BUG + config LOCKDEP_SUPPORT bool default y diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index bda49108a76d..b200d4e26138 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -2,21 +2,60 @@ #ifndef __ASM_BUG_H #define __ASM_BUG_H -#include +#include +#include + +#ifndef CONFIG_DEBUG_BUGVERBOSE +#define _BUGVERBOSE_LOCATION(file, line) +#else +#define __BUGVERBOSE_LOCATION(file, line) \ + .pushsection .rodata.str, "aMS", @progbits, 1; \ + 10002: .string file; \ + .popsection; \ + \ + .long 10002b - 10000b; \ + .short line; +#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) +#endif -#ifdef CONFIG_BUG +#ifndef CONFIG_GENERIC_BUG +#define __BUG_ENTRY(flags) +#else +#define __BUG_ENTRY(flags) \ + .pushsection __bug_table, "aw"; \ + .align 2; \ + 10000: .long 10001f - 10000b; \ + _BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + .short flags; \ + .popsection; \ + 10001: +#endif -#include +#define ASM_BUG_FLAGS(flags) \ + __BUG_ENTRY(flags) \ + break BRK_BUG -static inline void __noreturn BUG(void) -{ - __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); - unreachable(); -} +#define ASM_BUG() ASM_BUG_FLAGS(0) -#define HAVE_ARCH_BUG +#define __BUG_FLAGS(flags) \ + asm_inline volatile (__stringify(ASM_BUG_FLAGS(flags))); -#endif +#define __WARN_FLAGS(flags) \ +do { \ + instrumentation_begin(); \ + __BUG_FLAGS(BUGFLAG_WARNING|(flags)); \ + annotate_reachable(); \ + instrumentation_end(); \ +} while (0) + +#define BUG() \ +do { \ + instrumentation_begin(); \ + __BUG_FLAGS(0); \ + unreachable(); \ +} while (0) + +#define HAVE_ARCH_BUG #include diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index af79139f1d31..322862040776 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -84,6 +85,7 @@ SYM_CODE_START(kernel_entry) # kernel entry point #else bl start_kernel #endif + ASM_BUG() SYM_CODE_END(kernel_entry) @@ -115,6 +117,8 @@ SYM_CODE_START(smpboot_entry) ld.d tp, t0, CPU_BOOT_TINFO bl start_secondary + ASM_BUG() + SYM_CODE_END(smpboot_entry) #endif /* CONFIG_SMP */ diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 9139ba70f03a..a766522db6ce 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -412,6 +412,29 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) irqentry_exit(regs, state); } +#ifdef CONFIG_GENERIC_BUG +int is_valid_bugaddr(unsigned long addr) +{ + return 1; +} +#endif /* CONFIG_GENERIC_BUG */ + +static void bug_handler(struct pt_regs *regs) +{ + switch (report_bug(regs->csr_era, regs)) { + case BUG_TRAP_TYPE_BUG: + case BUG_TRAP_TYPE_NONE: + die_if_kernel("Oops - BUG", regs); + force_sig(SIGTRAP); + break; + + case BUG_TRAP_TYPE_WARN: + /* Skip the BUG instruction and continue */ + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + } +} + asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); @@ -465,8 +488,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) switch (bcode) { case BRK_BUG: - die_if_kernel("Kernel bug detected", regs); - force_sig(SIGTRAP); + bug_handler(regs); break; case BRK_DIVZERO: die_if_kernel("Break instruction in kernel code", regs); -- Gitee From 68e751574eadc902bc79df4be2e9fa7dd83061ae Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Nov 2022 16:39:59 +0800 Subject: [PATCH 108/241] LoongArch: module: Use got/plt section indices for relocations Instead of saving a pointer to the .got, .plt and .plt_idx sections to apply {got,plt}-based relocations, save and use their section indices instead. The mod->arch.{core,init}.{got,plt} pointers were problematic for live- patch because they pointed within temporary section headers (provided by the module loader via info->sechdrs) that would be freed after module load. Since livepatch modules may need to apply relocations post-module- load (for example, to patch a module that is loaded later), using section indices to offset into the section headers (instead of accessing them through a saved pointer) allows livepatch modules on LoongArch to pass in their own copy of the section headers to apply_relocate_add() to apply delayed relocations. The method used is same as commit c8ebf64eab743 ("arm64/module: use plt section indices for relocations"). Change-Id: Id7b083b3902a7cd3c98d985c01704bb7cf745cb2 Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/module.h | 22 +++++----- arch/loongarch/kernel/module-sections.c | 54 +++++++++++++------------ arch/loongarch/kernel/module.c | 39 ++++++++++++------ 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index bab3d67ad3df..fccef8461f80 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -11,7 +11,7 @@ #define RELA_STACK_DEPTH 16 struct mod_section { - Elf_Shdr *shdr; + int shndx; int num_entries; int max_entries; }; @@ -37,8 +37,8 @@ struct plt_idx_entry { Elf_Addr symbol_addr; }; -Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val); -Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val); +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val); +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val); static inline struct got_entry emit_got_entry(Elf_Addr val) { @@ -62,10 +62,10 @@ static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val) return (struct plt_idx_entry) { val }; } -static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) +static inline int get_plt_idx(unsigned long val, Elf_Shdr *sechdrs, const struct mod_section *sec) { int i; - struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sec->shdr->sh_addr; + struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sechdrs[sec->shndx].sh_addr; for (i = 0; i < sec->num_entries; i++) { if (plt_idx[i].symbol_addr == val) @@ -76,11 +76,12 @@ static inline int get_plt_idx(unsigned long val, const struct mod_section *sec) } static inline struct plt_entry *get_plt_entry(unsigned long val, - const struct mod_section *sec_plt, - const struct mod_section *sec_plt_idx) + Elf_Shdr *sechdrs, + const struct mod_section *sec_plt, + const struct mod_section *sec_plt_idx) { - int plt_idx = get_plt_idx(val, sec_plt_idx); - struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr; + int plt_idx = get_plt_idx(val, sechdrs, sec_plt_idx); + struct plt_entry *plt = (struct plt_entry *)sechdrs[sec_plt->shndx].sh_addr; if (plt_idx < 0) return NULL; @@ -89,10 +90,11 @@ static inline struct plt_entry *get_plt_entry(unsigned long val, } static inline struct got_entry *get_got_entry(Elf_Addr val, + Elf_Shdr *sechdrs, const struct mod_section *sec) { - struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr; int i; + struct got_entry *got = (struct got_entry *)sechdrs[sec->shndx].sh_addr; for (i = 0; i < sec->num_entries; i++) if (got[i].symbol_addr == val) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index d296a70b758f..13d9a427325a 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -7,17 +7,17 @@ #include #include -Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) +Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { struct mod_section *got_sec = &mod->arch.got; int i = got_sec->num_entries; - struct got_entry *got = get_got_entry(val, got_sec); + struct got_entry *got = get_got_entry(val, sechdrs, got_sec); if (got) return (Elf_Addr)got; /* There is no GOT entry for val yet, create a new one. */ - got = (struct got_entry *)got_sec->shdr->sh_addr; + got = (struct got_entry *)sechdrs[got_sec->shndx].sh_addr; got[i] = emit_got_entry(val); got_sec->num_entries++; @@ -33,12 +33,12 @@ Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val) return (Elf_Addr)&got[i]; } -Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) +Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { int nr; struct mod_section *plt_sec = &mod->arch.plt; struct mod_section *plt_idx_sec = &mod->arch.plt_idx; - struct plt_entry *plt = get_plt_entry(val, plt_sec, plt_idx_sec); + struct plt_entry *plt = get_plt_entry(val, sechdrs, plt_sec, plt_idx_sec); struct plt_idx_entry *plt_idx; if (plt) @@ -47,9 +47,9 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val) nr = plt_sec->num_entries; /* There is no duplicate entry, create a new one */ - plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt = (struct plt_entry *)sechdrs[plt_sec->shndx].sh_addr; plt[nr] = emit_plt_entry(val); - plt_idx = (struct plt_idx_entry *)plt_idx_sec->shdr->sh_addr; + plt_idx = (struct plt_idx_entry *)sechdrs[plt_idx_sec->shndx].sh_addr; plt_idx[nr] = emit_plt_idx_entry(val); plt_sec->num_entries++; @@ -103,28 +103,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { unsigned int i, num_plts = 0, num_gots = 0; + Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec; /* * Find the empty .plt sections. */ for (i = 0; i < ehdr->e_shnum; i++) { if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) - mod->arch.got.shdr = sechdrs + i; + mod->arch.got.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) - mod->arch.plt.shdr = sechdrs + i; + mod->arch.plt.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) - mod->arch.plt_idx.shdr = sechdrs + i; + mod->arch.plt_idx.shndx = i; } - if (!mod->arch.got.shdr) { + if (!mod->arch.got.shndx) { pr_err("%s: module GOT section(s) missing\n", mod->name); return -ENOEXEC; } - if (!mod->arch.plt.shdr) { + if (!mod->arch.plt.shndx) { pr_err("%s: module PLT section(s) missing\n", mod->name); return -ENOEXEC; } - if (!mod->arch.plt_idx.shdr) { + if (!mod->arch.plt_idx.shndx) { pr_err("%s: module PLT.IDX section(s) missing\n", mod->name); return -ENOEXEC; } @@ -145,24 +146,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, count_max_entries(relas, num_rela, &num_plts, &num_gots); } - mod->arch.got.shdr->sh_type = SHT_NOBITS; - mod->arch.got.shdr->sh_flags = SHF_ALLOC; - mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + got_sec = sechdrs + mod->arch.got.shndx; + got_sec->sh_type = SHT_NOBITS; + got_sec->sh_flags = SHF_ALLOC; + got_sec->sh_addralign = L1_CACHE_BYTES; + got_sec->sh_size = (num_gots + 1) * sizeof(struct got_entry); mod->arch.got.num_entries = 0; mod->arch.got.max_entries = num_gots; - mod->arch.plt.shdr->sh_type = SHT_NOBITS; - mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; - mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + plt_sec = sechdrs + mod->arch.plt.shndx; + plt_sec->sh_type = SHT_NOBITS; + plt_sec->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + plt_sec->sh_addralign = L1_CACHE_BYTES; + plt_sec->sh_size = (num_plts + 1) * sizeof(struct plt_entry); mod->arch.plt.num_entries = 0; mod->arch.plt.max_entries = num_plts; - mod->arch.plt_idx.shdr->sh_type = SHT_NOBITS; - mod->arch.plt_idx.shdr->sh_flags = SHF_ALLOC; - mod->arch.plt_idx.shdr->sh_addralign = L1_CACHE_BYTES; - mod->arch.plt_idx.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); + plt_idx_sec = sechdrs + mod->arch.plt_idx.shndx; + plt_idx_sec->sh_type = SHT_NOBITS; + plt_idx_sec->sh_flags = SHF_ALLOC; + plt_idx_sec->sh_addralign = L1_CACHE_BYTES; + plt_idx_sec->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry); mod->arch.plt_idx.num_entries = 0; mod->arch.plt_idx.max_entries = num_plts; diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index ddb1cd5ff9ac..fcb7881ef9d2 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -108,16 +108,17 @@ static int apply_r_larch_sop_push_dup(struct module *mod, u32 *location, Elf_Add return 0; } -static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { ptrdiff_t offset = (void *)v - (void *)location; if (offset >= SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); if (offset < -SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top, type); } @@ -281,17 +282,18 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v, } } -static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_b26(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { ptrdiff_t offset = (void *)v - (void *)location; union loongarch_instruction *insn = (union loongarch_instruction *)location; if (offset >= SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); if (offset < -SZ_128M) - v = module_emit_plt_entry(mod, v); + v = module_emit_plt_entry(mod, sechdrs, v); offset = (void *)v - (void *)location; @@ -348,10 +350,11 @@ static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v, return 0; } -static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v, +static int apply_r_larch_got_pc(struct module *mod, + Elf_Shdr *sechdrs, u32 *location, Elf_Addr v, s64 *rela_stack, size_t *rela_stack_top, unsigned int type) { - Elf_Addr got = module_emit_got_entry(mod, v); + Elf_Addr got = module_emit_got_entry(mod, sechdrs, v); if (!got) return -EINVAL; @@ -396,13 +399,10 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_PUSH_PCREL] = apply_r_larch_sop_push_pcrel, [R_LARCH_SOP_PUSH_ABSOLUTE] = apply_r_larch_sop_push_absolute, [R_LARCH_SOP_PUSH_DUP] = apply_r_larch_sop_push_dup, - [R_LARCH_SOP_PUSH_PLT_PCREL] = apply_r_larch_sop_push_plt_pcrel, [R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] = apply_r_larch_sop, [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, - [R_LARCH_B26] = apply_r_larch_b26, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, - [R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12] = apply_r_larch_got_pc, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, @@ -453,7 +453,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, sym->st_value, rel[i].r_addend, (u64)location); v = sym->st_value + rel[i].r_addend; - err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + switch (type) { + case R_LARCH_B26: + err = apply_r_larch_b26(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + case R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12: + err = apply_r_larch_got_pc(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + case R_LARCH_SOP_PUSH_PLT_PCREL: + err = apply_r_larch_sop_push_plt_pcrel(mod, sechdrs, location, + v, rela_stack, &rela_stack_top, type); + break; + default: + err = handler(mod, location, v, rela_stack, &rela_stack_top, type); + } if (err) return err; } -- Gitee From 2dd4b64077bb63913891e36f9c5f0cf60bae24ea Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 2 Feb 2023 16:32:50 +0800 Subject: [PATCH 109/241] LoongArch: Make -mstrict-align configurable Introduce Kconfig option ARCH_STRICT_ALIGN to make -mstrict-align be configurable. Not all LoongArch cores support h/w unaligned access, we can use the -mstrict-align build parameter to prevent unaligned accesses. CPUs with h/w unaligned access support: Loongson-2K2000/2K3000/3A5000/3C5000/3D5000. CPUs without h/w unaligned access support: Loongson-2K500/2K1000. This option is enabled by default to make the kernel be able to run on all LoongArch systems. But you can disable it manually if you want to run kernel only on systems with h/w unaligned access support in order to optimise for performance. Change-Id: Idbd540ed5f2b3037ac8eb624315dbcc3be8360bc Reviewed-by: Arnd Bergmann Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 19 +++++++++++++++++++ arch/loongarch/Makefile | 4 ++++ arch/loongarch/kernel/Makefile | 6 ++++-- arch/loongarch/kernel/traps.c | 9 +++++++-- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1faefebf74ab..756c1361d87b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -81,6 +81,7 @@ config LOONGARCH select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_FUTEX_CMPXCHG if FUTEX @@ -681,6 +682,24 @@ config ARCH_IOREMAP protection support. However, you can enable LoongArch DMW-based ioremap() for better performance. +config ARCH_STRICT_ALIGN + bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT + default y + help + Not all LoongArch cores support h/w unaligned access, we can use + -mstrict-align build parameter to prevent unaligned accesses. + + CPUs with h/w unaligned access support: + Loongson-2K2000/2K3000/3A5000/3C5000/3D5000. + + CPUs without h/w unaligned access support: + Loongson-2K500/2K1000. + + This option is enabled by default to make the kernel be able to run + on all LoongArch systems. But you can disable it manually if you want + to run kernel only on systems with h/w unaligned access support in + order to optimise for performance. + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 1688503b6597..be120ea442f6 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -106,7 +106,11 @@ endif # This is required to get dwarf unwinding tables into .debug_frame # instead of .eh_frame so we don't discard them. KBUILD_CFLAGS += -fno-asynchronous-unwind-tables +ifdef CONFIG_ARCH_STRICT_ALIGN KBUILD_CFLAGS += $(call cc-option,-mstrict-align) +else +KBUILD_CFLAGS += $(call cc-option,-mno-strict-align) +endif KBUILD_LDFLAGS += -m $(ld-emul) diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index ffd701d5dffc..dbf8767d89c4 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -7,12 +7,14 @@ extra-y := head.o vmlinux.lds obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ process.o ptrace.o reset.o setup.o signal.o io.o \ - syscall.o time.o topology.o traps.o unaligned.o \ - switch.o cacheinfo.o cmpxchg.o vdso.o + syscall.o time.o topology.o traps.o switch.o \ + cacheinfo.o cmpxchg.o vdso.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o +obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index a766522db6ce..591f268e1b38 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -381,9 +381,14 @@ int no_unaligned_warning __read_mostly = 1; /* Only 1 warning by default */ asmlinkage void noinstr do_ale(struct pt_regs *regs) { - unsigned int *pc; irqentry_state_t state = irqentry_enter(regs); +#ifndef CONFIG_ARCH_STRICT_ALIGN + die_if_kernel("Kernel ale access", regs); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); +#else + unsigned int *pc; + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr); /* @@ -407,8 +412,8 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs) sigbus: die_if_kernel("Kernel ale access", regs); force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr); - out: +#endif irqentry_exit(regs, state); } -- Gitee From ae8f71dfc00b77f4a5ed1409f32ac14662d0c615 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 14 Mar 2023 16:36:40 +0800 Subject: [PATCH 110/241] LoongArch: Make WriteCombine configurable for ioremap() LoongArch maintains cache coherency in hardware, but when paired with LS7A chipsets the WUC attribute (Weak-ordered UnCached, which is similar to WriteCombine) is out of the scope of cache coherency machanism for PCIe devices (this is a PCIe protocol violation, which may be fixed in newer chipsets). This means WUC can only used for write-only memory regions now, so this option is disabled by default, making WUC silently fallback to SUC for ioremap(). You can enable this option if the kernel is ensured to run on hardware without this bug. Kernel parameter writecombine=on/off can be used to override the Kconfig option. Change-Id: I64a86fe65222e37fe743bc945e005a52a87c46f8 Suggested-by: WANG Xuerui Reviewed-by: WANG Xuerui Signed-off-by: Huacai Chen --- .../admin-guide/kernel-parameters.rst | 1 + .../admin-guide/kernel-parameters.txt | 6 ++++++ arch/loongarch/Kconfig | 16 ++++++++++++++ arch/loongarch/include/asm/io.h | 4 +++- arch/loongarch/kernel/setup.c | 21 +++++++++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 6d421694d98e..7700d6a1cd25 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -115,6 +115,7 @@ parameter is applicable:: KVM Kernel Virtual Machine support is enabled. LIBATA Libata driver is enabled LP Printer support is enabled. + LOONGARCH LoongArch architecture is enabled. LOOP Loopback device support is enabled. M68k M68k architecture is enabled. These options have more detailed description inside of diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 82ae0b5b9a62..81041c381313 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6102,6 +6102,12 @@ When enabled, memory and cache locality will be impacted. + writecombine= [LOONGARCH] Control the MAT (Memory Access Type) of + ioremap_wc(). + + on - Enable writecombine, use WUC for ioremap_wc() + off - Disable writecombine, use SUC for ioremap_wc() + x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of default x2apic cluster mode on platforms supporting x2apic. diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 756c1361d87b..0917c35b77fc 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -682,6 +682,22 @@ config ARCH_IOREMAP protection support. However, you can enable LoongArch DMW-based ioremap() for better performance. +config ARCH_WRITECOMBINE + bool "Enable WriteCombine (WUC) for ioremap()" + help + LoongArch maintains cache coherency in hardware, but when paired + with LS7A chipsets the WUC attribute (Weak-ordered UnCached, which + is similar to WriteCombine) is out of the scope of cache coherency + machanism for PCIe devices (this is a PCIe protocol violation, which + may be fixed in newer chipsets). + + This means WUC can only used for write-only memory regions now, so + this option is disabled by default, making WUC silently fallback to + SUC for ioremap(). You can enable this option if the kernel is ensured + to run on hardware without this bug. + + You can override this setting via writecombine=on/off boot parameter. + config ARCH_STRICT_ALIGN bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT default y diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index da80f037ef97..5b12b061d571 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -54,8 +54,10 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * @offset: bus address of the memory * @size: size of the resource to map */ +extern pgprot_t pgprot_wc; + #define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC)) + ioremap_prot((offset), (size), pgprot_val(pgprot_wc)) #define ioremap_cache(offset, size) \ ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 4d5f61a1ce39..bd570c402d24 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -77,6 +77,27 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add memblock_add(start, size); } +#ifdef CONFIG_ARCH_WRITECOMBINE +pgprot_t pgprot_wc = PAGE_KERNEL_WUC; +#else +pgprot_t pgprot_wc = PAGE_KERNEL_SUC; +#endif + +EXPORT_SYMBOL(pgprot_wc); + +static int __init setup_writecombine(char *p) +{ + if (!strcmp(p, "on")) + pgprot_wc = PAGE_KERNEL_WUC; + else if (!strcmp(p, "off")) + pgprot_wc = PAGE_KERNEL_SUC; + else + pr_warn("Unknown writecombine setting \"%s\".\n", p); + + return 0; +} +early_param("writecombine", setup_writecombine); + static int usermem __initdata; static int __init early_parse_mem(char *p) -- Gitee From fff9dfdb55f1e8b8f087b25d7d5cc5d729fd9b13 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 2 Mar 2023 09:29:02 +0800 Subject: [PATCH 111/241] LoongArch: Fix probing of the CRC32 feature Not all LoongArch processors support CRC32 instructions. This feature is indicated by CPUCFG1.CRC32 (Bit25) but it is wrongly defined in the previous versions of the ISA manual (and so does in loongarch.h). The CRC32 feature is set unconditionally now, so fix it. BTW, expose the CRC32 feature in /proc/cpuinfo. Change-Id: I834b0966ee8d2053586b55603fd5adff3783e2b7 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 1 + arch/loongarch/include/asm/cpu.h | 40 ++++++++++++---------- arch/loongarch/include/asm/loongarchregs.h | 2 +- arch/loongarch/kernel/cpu-probe.c | 7 +++- arch/loongarch/kernel/proc.c | 1 + 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 866ca1a01d81..3ba07f23f773 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -38,6 +38,7 @@ #define cpu_has_fpu cpu_opt(LOONGARCH_CPU_FPU) #define cpu_has_lsx cpu_opt(LOONGARCH_CPU_LSX) #define cpu_has_lasx cpu_opt(LOONGARCH_CPU_LASX) +#define cpu_has_crc32 cpu_opt(LOONGARCH_CPU_CRC32) #define cpu_has_complex cpu_opt(LOONGARCH_CPU_COMPLEX) #define cpu_has_crypto cpu_opt(LOONGARCH_CPU_CRYPTO) #define cpu_has_lvz cpu_opt(LOONGARCH_CPU_LVZ) diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 443d8ee2f187..5ff57a1620df 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -78,25 +78,26 @@ enum cpu_type_enum { #define CPU_FEATURE_FPU 3 /* CPU has FPU */ #define CPU_FEATURE_LSX 4 /* CPU has LSX (128-bit SIMD) */ #define CPU_FEATURE_LASX 5 /* CPU has LASX (256-bit SIMD) */ -#define CPU_FEATURE_COMPLEX 6 /* CPU has Complex instructions */ -#define CPU_FEATURE_CRYPTO 7 /* CPU has Crypto instructions */ -#define CPU_FEATURE_LVZ 8 /* CPU has Virtualization extension */ -#define CPU_FEATURE_LBT_X86 9 /* CPU has X86 Binary Translation */ -#define CPU_FEATURE_LBT_ARM 10 /* CPU has ARM Binary Translation */ -#define CPU_FEATURE_LBT_MIPS 11 /* CPU has MIPS Binary Translation */ -#define CPU_FEATURE_TLB 12 /* CPU has TLB */ -#define CPU_FEATURE_CSR 13 /* CPU has CSR */ -#define CPU_FEATURE_WATCH 14 /* CPU has watchpoint registers */ -#define CPU_FEATURE_VINT 15 /* CPU has vectored interrupts */ -#define CPU_FEATURE_CSRIPI 16 /* CPU has CSR-IPI */ -#define CPU_FEATURE_EXTIOI 17 /* CPU has EXT-IOI */ -#define CPU_FEATURE_PREFETCH 18 /* CPU has prefetch instructions */ -#define CPU_FEATURE_PMP 19 /* CPU has perfermance counter */ -#define CPU_FEATURE_SCALEFREQ 20 /* CPU supports cpufreq scaling */ -#define CPU_FEATURE_FLATMODE 21 /* CPU has flat mode */ -#define CPU_FEATURE_EIODECODE 22 /* CPU has extioi int pin decode mode */ -#define CPU_FEATURE_GUESTID 23 /* CPU has GuestID feature */ -#define CPU_FEATURE_HYPERVISOR 24 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_CRC32 6 /* CPU has CRC32 instructions */ +#define CPU_FEATURE_COMPLEX 7 /* CPU has Complex instructions */ +#define CPU_FEATURE_CRYPTO 8 /* CPU has Crypto instructions */ +#define CPU_FEATURE_LVZ 9 /* CPU has Virtualization extension */ +#define CPU_FEATURE_LBT_X86 10 /* CPU has X86 Binary Translation */ +#define CPU_FEATURE_LBT_ARM 11 /* CPU has ARM Binary Translation */ +#define CPU_FEATURE_LBT_MIPS 12 /* CPU has MIPS Binary Translation */ +#define CPU_FEATURE_TLB 13 /* CPU has TLB */ +#define CPU_FEATURE_CSR 14 /* CPU has CSR */ +#define CPU_FEATURE_WATCH 15 /* CPU has watchpoint registers */ +#define CPU_FEATURE_VINT 16 /* CPU has vectored interrupts */ +#define CPU_FEATURE_CSRIPI 17 /* CPU has CSR-IPI */ +#define CPU_FEATURE_EXTIOI 18 /* CPU has EXT-IOI */ +#define CPU_FEATURE_PREFETCH 19 /* CPU has prefetch instructions */ +#define CPU_FEATURE_PMP 20 /* CPU has perfermance counter */ +#define CPU_FEATURE_SCALEFREQ 21 /* CPU supports cpufreq scaling */ +#define CPU_FEATURE_FLATMODE 22 /* CPU has flat mode */ +#define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ +#define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ +#define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -104,6 +105,7 @@ enum cpu_type_enum { #define LOONGARCH_CPU_FPU BIT_ULL(CPU_FEATURE_FPU) #define LOONGARCH_CPU_LSX BIT_ULL(CPU_FEATURE_LSX) #define LOONGARCH_CPU_LASX BIT_ULL(CPU_FEATURE_LASX) +#define LOONGARCH_CPU_CRC32 BIT_ULL(CPU_FEATURE_CRC32) #define LOONGARCH_CPU_COMPLEX BIT_ULL(CPU_FEATURE_COMPLEX) #define LOONGARCH_CPU_CRYPTO BIT_ULL(CPU_FEATURE_CRYPTO) #define LOONGARCH_CPU_LVZ BIT_ULL(CPU_FEATURE_LVZ) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 3b2610a0af6d..c526c53e4cf7 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -117,7 +117,7 @@ static inline u32 read_cpucfg(u32 reg) #define CPUCFG1_EP BIT(22) #define CPUCFG1_RPLV BIT(23) #define CPUCFG1_HUGEPG BIT(24) -#define CPUCFG1_IOCSRBRD BIT(25) +#define CPUCFG1_CRC32 BIT(25) #define CPUCFG1_MSGINT BIT(26) #define LOONGARCH_CPUCFG2 0x2 diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cae5dca8ca8c..2f6249dc1397 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -95,13 +95,18 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; - elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32; + elf_hwcap = HWCAP_LOONGARCH_CPUCFG; config = read_cpucfg(LOONGARCH_CPUCFG1); if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; } + if (config & CPUCFG1_CRC32) { + c->options |= LOONGARCH_CPU_CRC32; + elf_hwcap |= HWCAP_LOONGARCH_CRC32; + } + config = read_cpucfg(LOONGARCH_CPUCFG2); if (config & CPUCFG2_LAM) { diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index b12a1f21f864..d70e611bd7e7 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -76,6 +76,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_fpu) seq_printf(m, " fpu"); if (cpu_has_lsx) seq_printf(m, " lsx"); if (cpu_has_lasx) seq_printf(m, " lasx"); + if (cpu_has_crc32) seq_printf(m, " crc32"); if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); if (cpu_has_lvz) seq_printf(m, " lvz"); -- Gitee From 505920e82fe411250b20a13bc5ffcda7c25449c7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 31 Mar 2023 10:08:19 +0800 Subject: [PATCH 112/241] LoongArch: Fix build error if CONFIG_SUSPEND is not set We can see the following build error on LoongArch if CONFIG_SUSPEND is not set: ld: drivers/acpi/sleep.o: in function 'acpi_pm_prepare': sleep.c:(.text+0x2b8): undefined reference to 'loongarch_wakeup_start' Here is the call trace: acpi_pm_prepare() __acpi_pm_prepare() acpi_sleep_prepare() acpi_get_wakeup_address() loongarch_wakeup_start() Root cause: loongarch_wakeup_start() is defined in arch/loongarch/power/ suspend_asm.S which is only built under CONFIG_SUSPEND. In order to fix the build error, just let acpi_get_wakeup_address() return 0 if CONFIG_ SUSPEND is not set. Change-Id: I6be5413585959b41b86a82bbe763523a970e2cc7 Fixes: 366bb35a8e48 ("LoongArch: Add suspend (ACPI S3) support") Reviewed-by: WANG Xuerui Reported-by: Randy Dunlap Link: https://lore.kernel.org/all/11215033-fa3c-ecb1-2fc0-e9aeba47be9b@infradead.org/ Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/acpi.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 0a503df02ba2..7b4c1c6e66b2 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -45,8 +45,11 @@ extern void loongarch_suspend_enter(void); static inline unsigned long acpi_get_wakeup_address(void) { +#ifdef CONFIG_SUSPEND extern void loongarch_wakeup_start(void); return (unsigned long)loongarch_wakeup_start; +#endif + return 0UL; } #endif /* _ASM_LOONGARCH_ACPI_H */ -- Gitee From de9dd6b5fcb1c13081d1368863495cac8182e551 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Tue, 4 Apr 2023 16:41:44 +0800 Subject: [PATCH 113/241] LoongArch: Fix _CONST64_(x) as unsigned Addresses should all be of unsigned type to avoid unnecessary conversions. Change-Id: If1b737e23666fd61434655e6239d3077d094236c Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/addrspace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index f121e76d7e3f..6132318e428e 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -66,9 +66,9 @@ extern unsigned long vm_map_base; #define _ATYPE32_ int #define _ATYPE64_ __s64 #ifdef CONFIG_64BIT -#define _CONST64_(x) x ## L +#define _CONST64_(x) x ## UL #else -#define _CONST64_(x) x ## LL +#define _CONST64_(x) x ## ULL #endif #endif -- Gitee From c10ac5feb7d85e840904d2a96959bc383dccdb55 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 1 Mar 2023 16:24:34 +0800 Subject: [PATCH 114/241] LoongArch: Mark 3 symbol exports as non-GPL vm_map_base, empty_zero_page and invalid_pmd_table could be accessed widely by some out-of-tree non-GPL but important file systems or drivers (e.g. OpenZFS). Let's use EXPORT_SYMBOL() instead of EXPORT_SYMBOL_GPL() to export them, so as to avoid build errors. 1, Details about vm_map_base: This is a LoongArch-specific symbol and may be referenced through macros PCI_IOBASE, VMALLOC_START and VMALLOC_END. 2, Details about empty_zero_page: As it stands today, only 3 architectures export empty_zero_page as a GPL symbol: IA64, LoongArch and MIPS. LoongArch gets the GPL export by inheriting from MIPS, and the MIPS export was first introduced in commit 497d2adcbf50b ("[MIPS] Export empty_zero_page for sake of the ext4 module."). The IA64 export was similar: commit a7d57ecf4216e ("[IA64] Export three symbols for module use") did so for kvm. In both IA64 and MIPS, the export of empty_zero_page was done for satisfying some in-kernel component built as module (kvm and ext4 respectively), and given its reasonably low-level nature, GPL is a reasonable choice. But looking at the bigger picture it is evident most other architectures do not regard it as GPL, so in effect the symbol probably should not be treated as such, in favor of consistency. 3, Details about invalid_pmd_table: Keep consistency with invalid_pte_table and make it be possible by some modules. Change-Id: I39e709e6c4143ebb77847591793cb06d770c1b29 Reviewed-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/cpu-probe.c | 2 +- arch/loongarch/mm/init.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 2f6249dc1397..a44fe7d33500 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -61,7 +61,7 @@ static inline void set_elf_platform(int cpu, const char *plat) /* MAP BASE */ unsigned long vm_map_base; -EXPORT_SYMBOL_GPL(vm_map_base); +EXPORT_SYMBOL(vm_map_base); static void cpu_probe_addrbits(struct cpuinfo_loongarch *c) { diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 19e356f4cd6c..69449219e2a9 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -43,7 +43,7 @@ * don't have to care about aliases on other CPUs. */ unsigned long empty_zero_page, zero_page_mask; -EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); void setup_zero_pages(void) @@ -417,7 +417,7 @@ pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; #endif #ifndef __PAGETABLE_PMD_FOLDED pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; -EXPORT_SYMBOL_GPL(invalid_pmd_table); +EXPORT_SYMBOL(invalid_pmd_table); #endif pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; EXPORT_SYMBOL(invalid_pte_table); -- Gitee From 64a99ad0d62f481600daaff1c722a62ed8e904c1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 4 Apr 2023 20:58:09 +0800 Subject: [PATCH 115/241] LoongArch: module: set section addresses to 0x0 These got*, plt* and .text.ftrace_trampoline sections specified for LoongArch have non-zero addressses. Non-zero section addresses in a relocatable ELF would confuse GDB when it tries to compute the section offsets and it ends up printing wrong symbol addresses. Therefore, set them to zero, which mirrors the change in commit 5d8591bc0fbaeb6ded ("arm64 module: set plt* section addresses to 0x0"). Change-Id: I869841e874d220ce80002cbb25a0e23931550c9a Cc: stable@vger.kernel.org Signed-off-by: Chong Qiao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/module.lds.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index a3d1bc0fcc72..0739a26c46c0 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -2,7 +2,7 @@ /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ SECTIONS { . = ALIGN(4); - .got : { BYTE(0) } - .plt : { BYTE(0) } - .plt.idx : { BYTE(0) } + .got 0 : { BYTE(0) } + .plt 0 : { BYTE(0) } + .plt.idx 0 : { BYTE(0) } } -- Gitee From bc2ea7ffc21048114ab820ff3a69c14a2e115539 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 21 Mar 2023 14:29:14 +0800 Subject: [PATCH 116/241] LoongArch: Check unwind_error() in arch_stack_walk() We can see the following messages with CONFIG_PROVE_LOCKING=y on LoongArch: BUG: MAX_STACK_TRACE_ENTRIES too low! turning off the locking correctness validator. This is because stack_trace_save() returns a big value after call arch_stack_walk(), here is the call trace: save_trace() stack_trace_save() arch_stack_walk() stack_trace_consume_entry() arch_stack_walk() should return immediately if unwind_next_frame() failed, no need to do the useless loops to increase the value of c->len in stack_trace_consume_entry(), then we can fix the above problem. Change-Id: If5b7f3cf7988af7babd8bcc6c72077144284122e Reported-by: Guenter Roeck Link: https://lore.kernel.org/all/8a44ad71-68d2-4926-892f-72bfc7a67e2a@roeck-us.net/ Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/stacktrace.c | 2 +- arch/loongarch/kernel/unwind_guess.c | 1 + arch/loongarch/kernel/unwind_prologue.c | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index f1a67285680a..17fde4891397 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -63,7 +63,7 @@ static void save_context_stack(struct task_struct *tsk, regs->regs[22] = 0; for (unwind_start(&state, tsk, regs); - !unwind_done(&state); unwind_next_frame(&state)) { + !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !fn(trace, addr)) return; diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index a3c47a9e2d8c..9c5fea318bdf 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -44,6 +44,7 @@ bool unwind_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); + state->error = true; return false; } EXPORT_SYMBOL_GPL(unwind_next_frame); diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 687644e0994f..4fe1f4de4135 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -149,7 +149,7 @@ bool unwind_next_frame(struct unwind_state *state) regs = (struct pt_regs *)info->next_sp; pc = regs->csr_era; if (user_mode(regs) || !__kernel_text_address(pc)) - return false; + goto out; state->pc = pc; state->sp = regs->regs[3]; @@ -171,6 +171,8 @@ bool unwind_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); +out: + state->error = true; return false; } EXPORT_SYMBOL_GPL(unwind_next_frame); -- Gitee From bb132d03cb6460bec76658879da4bd9b276a80db Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 31 Mar 2023 16:13:52 +0800 Subject: [PATCH 117/241] LoongArch: Clean up plat_swiotlb_setup() related code After commit c78c43fe7d42 ("LoongArch: Use acpi_arch_dma_setup() and remove ARCH_HAS_PHYS_TO_DMA"), plat_swiotlb_setup() has been deleted, so clean up the related code. Change-Id: I3b33259e2b8bcd1e9264628cff21da20c84f0938 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/bootinfo.h | 1 - arch/loongarch/kernel/setup.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h index 2c1b4200a6ab..14b4b425a513 100644 --- a/arch/loongarch/include/asm/bootinfo.h +++ b/arch/loongarch/include/asm/bootinfo.h @@ -21,7 +21,6 @@ extern void early_init(void); extern void init_environ(void); extern void platform_init(void); extern void plat_mem_setup(void); -extern void plat_swiotlb_setup(void); extern int __init init_numa_memory(void); struct loongson_board_info { diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index bd570c402d24..8597c627d8c2 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -224,8 +224,8 @@ static void __init arch_mem_init(char **cmdline_p) /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate - * low memory as small as possible before plat_swiotlb_setup(), so - * make sparse_init() using top-down allocation. + * low memory as small as possible before swiotlb_init(), so make + * sparse_init() using top-down allocation. */ memblock_set_bottom_up(false); sparse_init(); -- Gitee From ebc381dd714efe9087bc40e529387430fd6395b1 Mon Sep 17 00:00:00 2001 From: Enze Li Date: Wed, 12 Apr 2023 10:55:25 +0800 Subject: [PATCH 118/241] LoongArch: Replace hard-coded values in comments with VALEN According to LoongArch documentation [1], CSR.PGDL and CSR.PGDH are concerned with the VA's MSB which is VALEN-1 instead of always being 47. Fix comments to avoid misleading others. [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#page-global-directory-base-address-for-lower-half-address-space Change-Id: I4e58451204e5c2e1df788dfd105af631aa712c17 Reviewed-by: WANG Xuerui Signed-off-by: Enze Li Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarchregs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index c526c53e4cf7..474806e0c866 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -423,9 +423,9 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ASID_ASID_WIDTH 10 #define CSR_ASID_ASID (_ULCAST_(0x3ff) << CSR_ASID_ASID_SHIFT) -#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */ +#define LOONGARCH_CSR_PGDL 0x19 /* Page table base address when VA[VALEN-1] = 0 */ -#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */ +#define LOONGARCH_CSR_PGDH 0x1a /* Page table base address when VA[VALEN-1] = 1 */ #define LOONGARCH_CSR_PGD 0x1b /* Page table base */ -- Gitee From 7681d003cf112aa413ee4f1f5172d3b6c19a58cb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 4 Mar 2023 14:52:53 +0800 Subject: [PATCH 119/241] LoongArch: Provide kernel fpu functions Provide kernel_fpu_begin()/kernel_fpu_end() to allow the kernel itself to use fpu. They can be used by some other kernel components, e.g., the AMDGPU graphic driver for DCN. Change-Id: I4ffd07b2f281b83e6e48ba3013c92331846f6892 Reported-by: WANG Xuerui Tested-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/fpu.h | 3 +++ arch/loongarch/kernel/Makefile | 2 +- arch/loongarch/kernel/kfpu.c | 43 ++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/kernel/kfpu.c diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 4d81e3b6e908..9889772c7eb3 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -22,6 +22,9 @@ struct sigcontext; +extern void kernel_fpu_begin(void); +extern void kernel_fpu_end(void); + extern void _init_fpu(unsigned int); extern void _save_fp(struct loongarch_fpu *); extern void _restore_fp(struct loongarch_fpu *); diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index dbf8767d89c4..07c76dc298b1 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_CPU_HAS_FPU) += fpu.o +obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c new file mode 100644 index 000000000000..5c46ae8c6cac --- /dev/null +++ b/arch/loongarch/kernel/kfpu.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +void kernel_fpu_begin(void) +{ + preempt_disable(); + + WARN_ON(this_cpu_read(in_kernel_fpu)); + + this_cpu_write(in_kernel_fpu, true); + + if (!is_fpu_owner()) + enable_fpu(); + else + _save_fp(¤t->thread.fpu); + + write_fcsr(LOONGARCH_FCSR0, 0); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + WARN_ON(!this_cpu_read(in_kernel_fpu)); + + if (!is_fpu_owner()) + disable_fpu(); + else + _restore_fp(¤t->thread.fpu); + + this_cpu_write(in_kernel_fpu, false); + + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); -- Gitee From b19309d103f4a46fb3d3695df86b2cd58d902278 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 21 Jul 2023 23:33:57 +0800 Subject: [PATCH 120/241] LoongArch: Allow usage of LSX/LASX in the kernel Allow usage of LSX/LASX in the kernel by extending kernel_fpu_begin() and kernel_fpu_end(). Change-Id: I7783243be206b60bf52c3d92d155cf8958b10c67 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kfpu.c | 55 +++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index 5c46ae8c6cac..ec5b28e570c9 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -8,19 +8,40 @@ #include #include +static unsigned int euen_mask = CSR_EUEN_FPEN; + +/* + * The critical section between kernel_fpu_begin() and kernel_fpu_end() + * is non-reentrant. It is the caller's responsibility to avoid reentrance. + * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example. + */ static DEFINE_PER_CPU(bool, in_kernel_fpu); +static DEFINE_PER_CPU(unsigned int, euen_current); void kernel_fpu_begin(void) { + unsigned int *euen_curr; + preempt_disable(); WARN_ON(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); + euen_curr = this_cpu_ptr(&euen_current); - if (!is_fpu_owner()) - enable_fpu(); + *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN); + +#ifdef CONFIG_CPU_HAS_LASX + if (*euen_curr & CSR_EUEN_LASXEN) + _save_lasx(¤t->thread.fpu); + else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (*euen_curr & CSR_EUEN_LSXEN) + _save_lsx(¤t->thread.fpu); else +#endif + if (*euen_curr & CSR_EUEN_FPEN) _save_fp(¤t->thread.fpu); write_fcsr(LOONGARCH_FCSR0, 0); @@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin); void kernel_fpu_end(void) { + unsigned int *euen_curr; + WARN_ON(!this_cpu_read(in_kernel_fpu)); - if (!is_fpu_owner()) - disable_fpu(); + euen_curr = this_cpu_ptr(&euen_current); + +#ifdef CONFIG_CPU_HAS_LASX + if (*euen_curr & CSR_EUEN_LASXEN) + _restore_lasx(¤t->thread.fpu); else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (*euen_curr & CSR_EUEN_LSXEN) + _restore_lsx(¤t->thread.fpu); + else +#endif + if (*euen_curr & CSR_EUEN_FPEN) _restore_fp(¤t->thread.fpu); + *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN); + this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); + +static int __init init_euen_mask(void) +{ + if (cpu_has_lsx) + euen_mask |= CSR_EUEN_LSXEN; + + if (cpu_has_lasx) + euen_mask |= CSR_EUEN_LASXEN; + + return 0; +} +arch_initcall(init_euen_mask); -- Gitee From e1ff246fcfda6b667b7d4b1b46177d8d6f4d6f69 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Tue, 4 Apr 2023 16:43:08 +0800 Subject: [PATCH 121/241] LoongArch: Add ARCH_HAS_FORTIFY_SOURCE selection FORTIFY_SOURCE could detect various overflows at compile and run time. ARCH_HAS_FORTIFY_SOURCE means that the architecture can be built and run with CONFIG_FORTIFY_SOURCE. So select it in LoongArch. See more about this feature from commit 6974f0c4555e285 ("include/linux/ string.h: add the option of fortified string.h functions"). Change-Id: I3df04edba6eeef15f3c8d2b2523882ece7ecc4c7 Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0917c35b77fc..27c10a4cee53 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -6,6 +6,7 @@ config LOONGARCH select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_PTE_SPECIAL if !32BIT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION -- Gitee From 631f02f8e5417272ad5d46af3dea46b3baaec2c9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 25 Feb 2023 22:20:31 +0800 Subject: [PATCH 122/241] LoongArch: kdump: Add single kernel image implementation This feature depends on the kernel being relocatable. Enable using single kernel image for kdump, and then no longer need to build two kernels (production kernel and capture kernel share a single kernel image). Also enable CONFIG_CRASH_DUMP in loongson3_defconfig. Change-Id: I8c455b9e2313c86d70634eb198d710e649bb7660 Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 18 +----- arch/loongarch/Makefile | 4 -- arch/loongarch/boot/compressed/decompress.c | 4 +- arch/loongarch/boot/compressed/head.S | 25 +++++--- arch/loongarch/include/asm/kexec.h | 3 +- arch/loongarch/include/asm/stackframe.h | 9 +++ arch/loongarch/kernel/head.S | 23 ++++--- arch/loongarch/kernel/machine_kexec.c | 6 +- arch/loongarch/kernel/relocate.c | 64 +++++++++++++------ arch/loongarch/kernel/relocate_kernel.S | 7 +- arch/loongarch/power/suspend_asm.S | 5 +- drivers/firmware/efi/libstub/loongarch-stub.c | 8 +-- 12 files changed, 109 insertions(+), 67 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 27c10a4cee53..cf8c42ff78fc 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -478,11 +478,9 @@ config RANDOMIZE_BASE_MAX_OFFSET help When kASLR is active, this provides the maximum offset that will be applied to the kernel image. It should be set according to the - amount of physical RAM available in the target system minus - PHYSICAL_START and must be a power of 2. + amount of physical RAM available in the target system. - This is limited by the size of KPRANGE1, 256Mb on 32-bit or 1Gb with - 64-bit. The default is 16Mb. + This is limited by the size of the lower address memory, 256MB. config NODES_SHIFT int @@ -656,6 +654,7 @@ config KEXEC config CRASH_DUMP bool "Build kdump crash kernel" + select RELOCATABLE help Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels which are @@ -665,17 +664,6 @@ config CRASH_DUMP For more details see Documentation/admin-guide/kdump/kdump.rst -config PHYSICAL_START - hex "Physical address where the kernel is loaded" - default "0x90000000a0000000" - depends on CRASH_DUMP - help - This gives the XKPRANGE address where the kernel is loaded. - If you plan to use kernel for capturing the crash dump change - this value to start of the reserved region (the "X" value as - specified in the "crashkernel=YM@XM" command line boot parameter - passed to the panic-ed kernel). - config ARCH_IOREMAP bool "Enable LoongArch DMW-based ioremap()" help diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index be120ea442f6..5d07a4f0e260 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -85,10 +85,6 @@ endif # include arch/loongarch/Kbuild.platforms -ifdef CONFIG_PHYSICAL_START -load-y = $(CONFIG_PHYSICAL_START) -endif - entry-y = $(shell $(objtree)/arch/loongarch/tools/elf-entry vmlinux) drivers-$(CONFIG_PCI) += arch/loongarch/pci/ diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c index e3076445d2f4..41d51d5acbec 100644 --- a/arch/loongarch/boot/compressed/decompress.c +++ b/arch/loongarch/boot/compressed/decompress.c @@ -71,7 +71,7 @@ void error(char *x) #include "../../../../lib/decompress_unzstd.c" #endif -void decompress_kernel(unsigned long boot_heap_start) +void decompress_kernel(unsigned long boot_heap_start, long kdump_reloc_offset) { unsigned long zimage_start, zimage_size; @@ -96,7 +96,7 @@ void decompress_kernel(unsigned long boot_heap_start) /* Decompress the kernel with according algorithm */ __decompress((char *)zimage_start, zimage_size, 0, 0, - (void *)VMLINUX_LOAD_ADDRESS_ULL, 0, 0, error); + (void *)VMLINUX_LOAD_ADDRESS_ULL + kdump_reloc_offset, 0, 0, error); puts("Now, booting the kernel...\n"); } diff --git a/arch/loongarch/boot/compressed/head.S b/arch/loongarch/boot/compressed/head.S index ad34d02ccf66..b2f676c5b4fc 100644 --- a/arch/loongarch/boot/compressed/head.S +++ b/arch/loongarch/boot/compressed/head.S @@ -14,6 +14,7 @@ SYM_CODE_START(start) move s0, a0 move s1, a1 move s2, a2 + move s3, a3 /* for kdump */ /* Config Direct Mapping */ li.d t0, CSR_DMW0_INIT @@ -22,26 +23,34 @@ SYM_CODE_START(start) csrwr t0, LOONGARCH_CSR_DMWIN1 /* Clear BSS */ - la.abs a0, _edata - la.abs a2, _end + la.pcrel a0, _edata + la.pcrel a2, _end 1: st.d zero, a0, 0 addi.d a0, a0, 8 bne a2, a0, 1b - la.abs a0, .heap /* heap address */ - la.abs sp, .stack + 8192 /* stack address */ + la.pcrel a0, .heap /* heap address */ + la.pcrel sp, .stack + 8192 /* stack address */ - la.pcrel ra, 2f + la.pcrel a0, .heap /* heap address */ + move a1, a3 /* kdump relocate offset */ + bnez a3, 2f + li.w a1, 0 + li.w s3, 0 +2: + la.pcrel ra, 3f la.pcrel t4, decompress_kernel jirl zero, t4, 0 -2: +3: move a0, s0 move a1, s1 move a2, s2 + move a3, s3 PTR_LI t4, KERNEL_ENTRY + add.d t4, t4, a3 jirl zero, t4, 0 -3: - b 3b +4: + b 4b SYM_CODE_END(start) .comm .heap,BOOT_HEAP_SIZE,4 diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h index cf95cd3eb2de..5a32604c8047 100644 --- a/arch/loongarch/include/asm/kexec.h +++ b/arch/loongarch/include/asm/kexec.h @@ -45,7 +45,8 @@ typedef void (*do_kexec_t)(unsigned long efi_boot, unsigned long cmdline_ptr, unsigned long systable_ptr, unsigned long start_addr, - unsigned long first_ind_entry); + unsigned long first_ind_entry, + unsigned long kdump_reloc_offset); struct kimage; extern const unsigned char relocate_new_kernel[]; diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index f9ed30ab1a1f..1ed681be9c22 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -36,6 +37,14 @@ cfi_restore \reg \offset \docfi .endm +/* Jump to the runtime virtual address. */ + .macro JUMP_VIRT_ADDR temp1 temp2 + li.d \temp1, CACHE_BASE + pcaddi \temp2, 0 + or \temp1, \temp1, \temp2 + jirl zero, \temp1, 0xc + .endm + .macro BACKUP_T0T1 csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 322862040776..908a225b190d 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -27,11 +27,8 @@ SYM_CODE_START(kernel_entry) # kernel entry point li.d t0, CSR_DMW1_INIT # CA, PLV0, 0x9000 xxxx xxxx xxxx csrwr t0, LOONGARCH_CSR_DMWIN1 - /* We might not get launched at the address the kernel is linked to, - so we jump there. */ - la.abs t0, 0f - jirl zero, t0, 0 -0: + JUMP_VIRT_ADDR t0, t1 + /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD @@ -67,14 +64,23 @@ SYM_CODE_START(kernel_entry) # kernel entry point set_saved_sp sp, t0, t1 #ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_CRASH_DUMP + beqz a3, 1f + move a0, a3 + + /* Apply the relocations for kdump */ + bl relocate_kdump_kernel + b 2f +#endif +1: /* Copy kernel and apply the relocations */ bl relocate_kernel +2: /* Repoint the sp into the new kernel image */ PTR_LI sp, (_THREAD_SIZE - 32 - PT_SIZE) PTR_ADD sp, sp, tp set_saved_sp sp, t0, t1 - PTR_ADDI sp, sp, -4 * SZREG # init stack pointer /* * relocate_kernel returns the entry point either @@ -101,9 +107,8 @@ SYM_CODE_START(smpboot_entry) li.d t0, CSR_DMW1_INIT # CA, PLV0 csrwr t0, LOONGARCH_CSR_DMWIN1 - la.abs t0, 0f - jirl zero, t0, 0 -0: + JUMP_VIRT_ADDR t0, t1 + /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index 2dcb9e003657..a370b61058f3 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -38,6 +38,7 @@ static unsigned long cmdline_ptr; static unsigned long systable_ptr; static unsigned long start_addr; static unsigned long first_ind_entry; +static unsigned long kdump_reloc_offset; static void kexec_image_info(const struct kimage *kimage) { @@ -131,7 +132,7 @@ void kexec_reboot(void) #endif do_kexec = (void *)reboot_code_buffer; - do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry); + do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry, kdump_reloc_offset); unreachable(); } @@ -195,6 +196,8 @@ void crash_smp_send_stop(void) unsigned long timeout; static int cpus_stopped; + kdump_reloc_offset = crashk_res.start; + /* * This function can be called twice in panic path, but obviously * we should execute this only once. @@ -290,6 +293,7 @@ void machine_kexec(struct kimage *image) pr_notice("EFI boot flag 0x%lx\n", efi_boot); pr_notice("Command line at 0x%lx\n", cmdline_ptr); pr_notice("System table at 0x%lx\n", systable_ptr); + pr_notice("Kdump relocation offset 0x%lx\n", kdump_reloc_offset); pr_notice("We will call new kernel at 0x%lx\n", start_addr); pr_notice("Bye ...\n"); diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index ba04dd1b1bab..d2933c4f0856 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -64,36 +64,34 @@ static void __init apply_r_loongarch_mark_la_rel(u32 *loc_new, long offset) lu52id->reg2i12_format.simmediate = (dest >> 52) & 0xfff; } -static void (*reloc_handlers_rel[]) (u32 *, long) __initdata = { - [R_LARCH_32] = apply_r_loongarch_32_rel, - [R_LARCH_64] = apply_r_loongarch_64_rel, - [R_LARCH_MARK_LA] = apply_r_loongarch_mark_la_rel, -}; - -static int __init do_relocations(void *kbase_old, void *kbase_new, long offset) +static int __init do_relocations(void *kbase_new, long offset, u32 *rstart, u32 *rend) { int type; u32 *r; u32 *loc_new; - u32 *loc_orig; - for (r = _relocation_start; r < _relocation_end; r++) { + for (r = rstart; r < rend; r++) { /* Sentinel for last relocation */ if (*r == 0) break; type = (*r >> 24) & 0xff; - loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2)); - loc_new = RELOCATED(loc_orig); + loc_new = (void *)(kbase_new + ((*r & 0x00ffffff) << 2)); - if (reloc_handlers_rel[type] == NULL) { - /* Unsupported relocation */ - pr_err("Unhandled relocation type %d at 0x%pK\n", - type, loc_orig); + switch (type) { + case R_LARCH_32: + apply_r_loongarch_32_rel(loc_new, offset); + break; + case R_LARCH_64: + apply_r_loongarch_64_rel(loc_new, offset); + break; + case R_LARCH_MARK_LA: + apply_r_loongarch_mark_la_rel(loc_new, offset); + break; + default: + pr_err("Unhandled relocation type %d\n", type); return -ENOEXEC; } - - reloc_handlers_rel[type](loc_new, offset); } return 0; @@ -231,7 +229,7 @@ void *__init relocate_kernel(void) memcpy(loc_new, &_text, kernel_length); /* Perform relocations on the new kernel */ - res = do_relocations(&_text, loc_new, offset); + res = do_relocations(loc_new, offset, _relocation_start, _relocation_end); if (res < 0) goto out; @@ -259,6 +257,36 @@ void *__init relocate_kernel(void) return kernel_entry; } +void *__init relocate_kdump_kernel(long offset) +{ +#ifdef CONFIG_CRASH_DUMP + void *loc_new; + int res = 1; + u32 *rstart, *rend; + /* Default to original kernel entry point */ + void *kernel_entry = start_kernel; + + rstart = RELOCATED((unsigned long)_relocation_start - (unsigned long)&_text + + VMLINUX_LOAD_ADDRESS); + rend = RELOCATED((unsigned long)_relocation_end - (unsigned long)&_text + + VMLINUX_LOAD_ADDRESS); + + loc_new = (void *)(offset + VMLINUX_LOAD_ADDRESS); + + /* Perform relocations on the new kernel */ + res = do_relocations(loc_new, offset, rstart, rend); + if (res < 0) + return NULL; + + /* Return the new kernel's entry point */ + kernel_entry = RELOCATED((unsigned long)start_kernel - (unsigned long)&_text + + VMLINUX_LOAD_ADDRESS); + + return kernel_entry; +#endif + return NULL; +} + /* * Show relocation information on panic. */ diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index 2bbcacb0ea92..3a4cf7426891 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -21,6 +21,7 @@ SYM_CODE_START(relocate_new_kernel) * a2: System table pointer for the new kernel * a3: Start address to jump to after relocation * a4: Pointer to the current indirection page entry + * a5: Kdump kernel relocate offset */ move s0, a4 @@ -80,9 +81,11 @@ done: /* * Jump to the new kernel, - * make sure the values of a0, a1, a2 and a3 are not changed. + * make sure the values of a0, a1, a2, a3 and a5 are not changed. */ - jr a3 + move t0, a3 + move a3, a5 + jr t0 SYM_CODE_END(relocate_new_kernel) #ifdef CONFIG_SMP diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S index 06ea5c48eb68..f002b28f50ea 100644 --- a/arch/loongarch/power/suspend_asm.S +++ b/arch/loongarch/power/suspend_asm.S @@ -82,9 +82,8 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) li.d t0, CSR_DMW1_INIT # CA, PLV0 csrwr t0, LOONGARCH_CSR_DMWIN1 - la.abs t0, 0f - jr t0 -0: + JUMP_VIRT_ADDR t0, t1 + la.pcrel t0, acpi_saved_sp ld.d sp, t0, 0 SETUP_WAKEUP diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index d27dea9c940e..1aefb84d1fc8 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -12,10 +12,10 @@ #define BOOT_HEAP_SIZE 0x400000 typedef void __noreturn (*kernel_entry_t)(bool efi, unsigned long cmdline, - unsigned long systab); + unsigned long systab, long kdump_reloc_offset); extern long kernel_entaddr; -extern void decompress_kernel(unsigned long boot_heap_start); +extern void decompress_kernel(unsigned long boot_heap_start, long kdump_reloc_offset); static unsigned char efi_heap[BOOT_HEAP_SIZE]; static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID; @@ -66,7 +66,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, csr_write64(CSR_DMW0_INIT, LOONGARCH_CSR_DMWIN0); csr_write64(CSR_DMW1_INIT, LOONGARCH_CSR_DMWIN1); - decompress_kernel((unsigned long)efi_heap); + decompress_kernel((unsigned long)efi_heap, 0); kernel_entry = (kernel_entry_t)kernel_entaddr; return EFI_SUCCESS; @@ -118,5 +118,5 @@ efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, priv.runtime_entry_count * desc_size, desc_size, desc_ver, priv.runtime_map); - kernel_entry(true, (unsigned long)cmdline_ptr, (unsigned long)efi_system_table); + kernel_entry(true, (unsigned long)cmdline_ptr, (unsigned long)efi_system_table, 0); } -- Gitee From 49d72048a51874d1d6b3ac37dc01c146138cee3c Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Thu, 23 Feb 2023 19:06:31 +0800 Subject: [PATCH 123/241] LoongArch: kdump: Add crashkernel=YM handling When the kernel crashkernel parameter is specified with just a size, we are supposed to allocate a region from RAM to store the crashkernel, "crashkernel=512M" would be recommended for kdump. Fix this by lifting similar code from x86, importing it to LoongArch with LoongArch specific parameters added. We allocate the crashkernel region from the first 4GB of physical memory (because SWIOTLB should be allocated below 4GB). However, LoongArch currently does not implement crashkernel_low and crashkernel_high the same as x86. When X is not specified, crash_base defaults to 0 (crashkernel=YM@XM). Change-Id: Ie0fce9658dca454387ffa64ff328c19d2c38bfbd Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/setup.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 8597c627d8c2..fe659a14c898 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -172,11 +172,14 @@ void __init arch_reserve_vmcore(void) #endif } +/* 2MB alignment for crash kernel regions */ +#define CRASH_ALIGN SZ_2M +#define CRASH_ADDR_MAX SZ_4G + void __init arch_parse_crashkernel(void) { #ifdef CONFIG_KEXEC int ret; - unsigned long long start; unsigned long long total_mem; unsigned long long crash_base, crash_size; @@ -185,8 +188,13 @@ void __init arch_parse_crashkernel(void) if (ret < 0 || crash_size <= 0) return; - start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size); - if (start != crash_base) { + if (crash_base <= 0) { + crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_MAX); + if (!crash_base) { + pr_warn("crashkernel reservation failed - No suitable area found.\n"); + return; + } + } else if (!memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size)) { pr_warn("Invalid memory region reserved for crash kernel\n"); return; } -- Gitee From 937a60c028b582d77bebb2a17f8e8d79d2eb685f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 11 Apr 2023 17:42:37 +0800 Subject: [PATCH 124/241] LoongArch: Enable PG when wakeup from suspend Some firmwares don't enable PG when wakeup from suspend, so do it in kernel. This can improve code compatibility for boot kernel. Change-Id: Ib0e8a08efa449077929896033f0a91b1f7259fd6 Signed-off-by: Baoqi Zhang Signed-off-by: Huacai Chen --- arch/loongarch/power/suspend_asm.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S index f002b28f50ea..49de8527f9bb 100644 --- a/arch/loongarch/power/suspend_asm.S +++ b/arch/loongarch/power/suspend_asm.S @@ -84,6 +84,10 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL) JUMP_VIRT_ADDR t0, t1 + /* Enable PG */ + li.w t0, 0xb0 # PLV=0, IE=0, PG=1 + csrwr t0, LOONGARCH_CSR_CRMD + la.pcrel t0, acpi_saved_sp ld.d sp, t0, 0 SETUP_WAKEUP -- Gitee From 07885217a31b3bd3b4953739d91239d69c188b65 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Wed, 24 May 2023 15:41:32 +0800 Subject: [PATCH 125/241] LoongArch: Let pmd_present() return true when splitting pmd When we split a pmd into ptes, pmd_present() and pmd_trans_huge() should return true, otherwise it would be treated as a swap pmd. This is the same as arm64 does in commit b65399f6111b ("arm64/mm: Change THP helpers to comply with generic MM semantics"), we also add a new bit named _PAGE_PRESENT_INVALID for LoongArch. Change-Id: I0e6d396ea817c6b7bc8fb7dce2b894c2c4419df4 Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable-64.h | 2 +- arch/loongarch/include/asm/pgtable-bits.h | 2 ++ arch/loongarch/include/asm/pgtable.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h index fe15f72dc79a..33ec860c368d 100644 --- a/arch/loongarch/include/asm/pgtable-64.h +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -198,7 +198,7 @@ static inline int pmd_bad(pmd_t pmd) static inline int pmd_present(pmd_t pmd) { if (unlikely(pmd_val(pmd) & _PAGE_HUGE)) - return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE)); + return !!(pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PRESENT_INVALID)); return pmd_val(pmd) != (unsigned long)invalid_pte_table; } diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index e6f4f1c3d85f..780bec355066 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -21,12 +21,14 @@ #define _PAGE_HGLOBAL_SHIFT 12 /* HGlobal is a PMD bit */ #define _PAGE_PFN_SHIFT 12 #define _PAGE_PFN_END_SHIFT 48 +#define _PAGE_PRESENT_INVALID_SHIFT 60 #define _PAGE_NO_READ_SHIFT 61 #define _PAGE_NO_EXEC_SHIFT 62 #define _PAGE_RPLV_SHIFT 63 /* Used by software */ #define _PAGE_PRESENT (_ULCAST_(1) << _PAGE_PRESENT_SHIFT) +#define _PAGE_PRESENT_INVALID (_ULCAST_(1) << _PAGE_PRESENT_INVALID_SHIFT) #define _PAGE_WRITE (_ULCAST_(1) << _PAGE_WRITE_SHIFT) #define _PAGE_ACCESSED (_ULCAST_(1) << _PAGE_ACCESSED_SHIFT) #define _PAGE_MODIFIED (_ULCAST_(1) << _PAGE_MODIFIED_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 45ef7f09fcdd..6eb5a124409f 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -291,6 +291,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) static inline pmd_t pmd_mkinvalid(pmd_t pmd) { + pmd_val(pmd) |= _PAGE_PRESENT_INVALID; pmd_val(pmd) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE); return pmd; -- Gitee From fa981a5f2901e1d471bd35e43c4de6c7c7099eed Mon Sep 17 00:00:00 2001 From: Qi Hu Date: Thu, 8 Jun 2023 10:27:38 +0800 Subject: [PATCH 126/241] LoongArch: Fix the write_fcsr() macro The "write_fcsr()" macro uses wrong the positions for val and dest in asm. Fix it! Change-Id: Ifa9e0af37e4f5c62e11404f6e1c87d32dbb0c01c Reported-by: Miao HAO Signed-off-by: Qi Hu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarchregs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 474806e0c866..28477de2989f 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -1481,7 +1481,7 @@ __BUILD_CSR_OP(tlbidx) #define write_fcsr(dest, val) \ do { \ __asm__ __volatile__( \ - " movgr2fcsr %0, "__stringify(dest)" \n" \ + " movgr2fcsr "__stringify(dest)", %0 \n" \ : : "r" (val)); \ } while (0) -- Gitee From 0b0a27d5deffb4d362586f147ed884dd7d430d87 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 18 May 2023 12:19:35 +0800 Subject: [PATCH 127/241] LoongArch: Fix perf event id calculation LoongArch PMCFG has 10bit event id rather than 8 bit, so fix it. Change-Id: Ia8ca2402bda5b2e6b78817506647cba17e7d6efe Cc: stable@vger.kernel.org Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/kernel/perf_event.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index ff3c35d0c27d..49174d08c36a 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -272,7 +272,7 @@ static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx) WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters); /* Make sure interrupt enabled. */ - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base) | (evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE; cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id(); @@ -585,7 +585,7 @@ static struct pmu pmu = { static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev) { - return (pev->event_id & 0xff); + return M_PERFCTL_EVENT(pev->event_id); } static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx) @@ -840,9 +840,9 @@ static void resume_local_counters(void) static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config) { - /* currently most cores have 7-bit event numbers */ - unsigned int raw_id = config & 0xff; - unsigned int base_id = raw_id & 0x7f; + /* currently most cores have 10-bit event numbers */ + unsigned int raw_id = M_PERFCTL_EVENT(config); + unsigned int base_id = M_PERFCTL_EVENT(raw_id); switch (current_cpu_data.cputype) { case CPU_LOONGSON64: -- Gitee From 0ffb3bba20ae17ec8c9d2d3e573bd18348a4ce08 Mon Sep 17 00:00:00 2001 From: Immad Mir Date: Sun, 14 May 2023 12:13:46 +0530 Subject: [PATCH 128/241] LoongArch: Fix debugfs_create_dir() error checking The debugfs_create_dir() returns ERR_PTR in case of an error and the correct way of checking it is using the IS_ERR_OR_NULL inline function rather than the simple null comparision. This patch fixes the issue. Change-Id: I32a5d09d402d817ba29c395206c87cd986a97506 Suggested-By: Ivan Orlov Signed-off-by: Immad Mir Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unaligned.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index bdff825d29ef..85fae3d2d71a 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -485,7 +485,7 @@ static int __init debugfs_unaligned(void) struct dentry *d; d = debugfs_create_dir("loongarch", NULL); - if (!d) + if (IS_ERR_OR_NULL(d)) return -ENOMEM; debugfs_create_u32("unaligned_instructions_user", -- Gitee From 6373ad6bdc64a5ea3dc20569e1360797cccdfd4e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Jun 2023 11:06:34 +0300 Subject: [PATCH 129/241] LoongArch: Delete unnecessary debugfs checking Debugfs functions are not supposed to be checked for errors. This is sort of unusual but it is described in the comments for the debugfs_create_dir() function. Also debugfs_create_dir() can never return NULL. Change-Id: Ib14613b55a8100b87a5d2be3638a5fed8914e4c6 Reviewed-by: WANG Xuerui Signed-off-by: Dan Carpenter Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unaligned.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c index 85fae3d2d71a..3abf163dda05 100644 --- a/arch/loongarch/kernel/unaligned.c +++ b/arch/loongarch/kernel/unaligned.c @@ -485,8 +485,6 @@ static int __init debugfs_unaligned(void) struct dentry *d; d = debugfs_create_dir("loongarch", NULL); - if (IS_ERR_OR_NULL(d)) - return -ENOMEM; debugfs_create_u32("unaligned_instructions_user", S_IRUGO, d, &unaligned_instructions_user); -- Gitee From 55b9bd7ca8069fac9b7f4c346c5be8fc76770a7d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:14 +0800 Subject: [PATCH 130/241] LoongArch: Clean up the architectural interrupt definitions While interrupts are assigned ECodes `64 + interrupt number`, all existing use sites of interrupt numbers want the 64 subtracted. Re-arrange the definitions so that the actual interrupt number is used everywhere, and make EXCCODE_INT_END inclusive as it is more intuitive that way. While at it, according to the asm/loongarch.h definitions, the total number of architectural interrupts should be 14, but various other places indicate otherwise (13 or 15). Those places have been adjusted to 14 as well for consistency. Change-Id: I862b0ce941c496fc858ff755de3d213c495cefaf Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarchregs.h | 47 ++++++++++++---------- arch/loongarch/kernel/traps.c | 2 +- drivers/irqchip/irq-loongarch-cpu.c | 6 +-- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 28477de2989f..b32d419bd57e 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -311,8 +311,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ECFG_VS_WIDTH 3 #define CSR_ECFG_VS (_ULCAST_(0x7) << CSR_ECFG_VS_SHIFT) #define CSR_ECFG_IM_SHIFT 0 -#define CSR_ECFG_IM_WIDTH 13 -#define CSR_ECFG_IM (_ULCAST_(0x1fff) << CSR_ECFG_IM_SHIFT) +#define CSR_ECFG_IM_WIDTH 14 +#define CSR_ECFG_IM (_ULCAST_(0x3fff) << CSR_ECFG_IM_SHIFT) #define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ #define CSR_ESTAT_ESUBCODE_SHIFT 22 @@ -322,8 +322,8 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define CSR_ESTAT_EXC_WIDTH 6 #define CSR_ESTAT_EXC (_ULCAST_(0x3f) << CSR_ESTAT_EXC_SHIFT) #define CSR_ESTAT_IS_SHIFT 0 -#define CSR_ESTAT_IS_WIDTH 15 -#define CSR_ESTAT_IS (_ULCAST_(0x7fff) << CSR_ESTAT_IS_SHIFT) +#define CSR_ESTAT_IS_WIDTH 14 +#define CSR_ESTAT_IS (_ULCAST_(0x3fff) << CSR_ESTAT_IS_SHIFT) #define LOONGARCH_CSR_ERA 0x6 /* ERA */ @@ -1087,7 +1087,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg) #define ECFGF_IPI (_ULCAST_(1) << ECFGB_IPI) #define ECFGF(hwirq) (_ULCAST_(1) << hwirq) -#define ESTATF_IP 0x00001fff +#define ESTATF_IP 0x00003fff #define LOONGARCH_IOCSR_FEATURES 0x8 #define IOCSRF_TEMP BIT_ULL(0) @@ -1418,23 +1418,26 @@ __BUILD_CSR_OP(tlbidx) #define EXCSUBCODE_GCHC 1 /* Hardware caused */ #define EXCCODE_SE 25 /* Security */ -#define EXCCODE_INT_START 64 -#define EXCCODE_SIP0 64 -#define EXCCODE_SIP1 65 -#define EXCCODE_IP0 66 -#define EXCCODE_IP1 67 -#define EXCCODE_IP2 68 -#define EXCCODE_IP3 69 -#define EXCCODE_IP4 70 -#define EXCCODE_IP5 71 -#define EXCCODE_IP6 72 -#define EXCCODE_IP7 73 -#define EXCCODE_PMC 74 /* Performance Counter */ -#define EXCCODE_TIMER 75 -#define EXCCODE_IPI 76 -#define EXCCODE_NMI 77 -#define EXCCODE_INT_END 78 -#define EXCCODE_INT_NUM (EXCCODE_INT_END - EXCCODE_INT_START) +/* Interrupt numbers */ +#define INT_SWI0 0 /* Software Interrupts */ +#define INT_SWI1 1 +#define INT_HWI0 2 /* Hardware Interrupts */ +#define INT_HWI1 3 +#define INT_HWI2 4 +#define INT_HWI3 5 +#define INT_HWI4 6 +#define INT_HWI5 7 +#define INT_HWI6 8 +#define INT_HWI7 9 +#define INT_PCOV 10 /* Performance Counter Overflow */ +#define INT_TI 11 /* Timer */ +#define INT_IPI 12 +#define INT_NMI 13 + +/* ExcCodes corresponding to interrupts */ +#define EXCCODE_INT_NUM (INT_NMI + 1) +#define EXCCODE_INT_START 64 +#define EXCCODE_INT_END (EXCCODE_INT_START + EXCCODE_INT_NUM - 1) /* FPU register names */ #define LOONGARCH_FCSR0 $r0 diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 591f268e1b38..fd32f3ac2143 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -854,7 +854,7 @@ void __init trap_init(void) long i; /* Set interrupt vector handler */ - for (i = EXCCODE_INT_START; i < EXCCODE_INT_END; i++) + for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++) set_handler(i * VECSIZE, handle_vint, VECSIZE); set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index ea4841ffc470..743015d0e26f 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -44,17 +44,17 @@ static void handle_cpu_irq(struct pt_regs *regs) int get_ipi_irq(void) { - return irq_create_mapping(irq_domain, EXCCODE_IPI - EXCCODE_INT_START); + return irq_create_mapping(irq_domain, INT_IPI); } int get_pmc_irq(void) { - return irq_create_mapping(irq_domain, EXCCODE_PMC - EXCCODE_INT_START); + return irq_create_mapping(irq_domain, INT_PCOV); } int get_timer_irq(void) { - return irq_create_mapping(irq_domain, EXCCODE_TIMER - EXCCODE_INT_START); + return irq_create_mapping(irq_domain, INT_TI); } static int loongarch_cpu_intc_map(struct irq_domain *d, unsigned int irq, -- Gitee From 4c19d3cf064410e133477cc2b7f351842a207a7f Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:15 +0800 Subject: [PATCH 131/241] LoongArch: Define regular names for BCE/WATCH/HVC/GSPR exceptions Define them according to the ISA manual, in order to enable matching the sub-exceptions for humanization purposes later. Change-Id: Ie424b8ff1572017d9c37e69b2ebe22570928ff89 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarchregs.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index b32d419bd57e..24f450063aac 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -1397,7 +1397,7 @@ __BUILD_CSR_OP(tlbidx) #define EXSUBCODE_ADEF 0 /* Fetch Instruction */ #define EXSUBCODE_ADEM 1 /* Access Memory*/ #define EXCCODE_ALE 9 /* Unalign Access */ -#define EXCCODE_OOB 10 /* Out of bounds */ +#define EXCCODE_BCE 10 /* Bounds Check Error */ #define EXCCODE_SYS 11 /* System call */ #define EXCCODE_BP 12 /* Breakpoint */ #define EXCCODE_INE 13 /* Inst. Not Exist */ @@ -1408,11 +1408,13 @@ __BUILD_CSR_OP(tlbidx) #define EXCCODE_FPE 18 /* Floating Point Exception */ #define EXCSUBCODE_FPE 0 /* Floating Point Exception */ #define EXCSUBCODE_VFPE 1 /* Vector Exception */ -#define EXCCODE_WATCH 19 /* Watch address reference */ +#define EXCCODE_WATCH 19 /* WatchPoint Exception */ + #define EXCSUBCODE_WPEF 0 /* ... on Instruction Fetch */ + #define EXCSUBCODE_WPEM 1 /* ... on Memory Accesses */ #define EXCCODE_BTDIS 20 /* Binary Trans. Disabled */ #define EXCCODE_BTE 21 /* Binary Trans. Exception */ -#define EXCCODE_PSI 22 /* Guest Privileged Error */ -#define EXCCODE_HYP 23 /* Hypercall */ +#define EXCCODE_GSPR 22 /* Guest Privileged Error */ +#define EXCCODE_HVC 23 /* Hypercall */ #define EXCCODE_GCM 24 /* Guest CSR modified */ #define EXCSUBCODE_GCSC 0 /* Software caused */ #define EXCSUBCODE_GCHC 1 /* Hardware caused */ -- Gitee From d8a2b91e67c5c490cff9fe5b595b11da292bcf1d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:16 +0800 Subject: [PATCH 132/241] LoongArch: Print GPRs with ABI names when showing registers Show PC (CSR.ERA) in place of $zero, and also show the syscall restart flag (conveniently stuffed in regs[0]) if non-zero. Change-Id: Ibbe770050ba8c0377ff04f221d596fad8c75628f Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 36 ++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index fd32f3ac2143..e4a7da242844 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -166,22 +166,32 @@ static void __show_regs(const struct pt_regs *regs) const int field = 2 * sizeof(unsigned long); unsigned int excsubcode; unsigned int exccode; - int i; show_regs_print_info(KERN_DEFAULT); - /* - * Saved main processor registers - */ - for (i = 0; i < 32; ) { - if ((i % 4) == 0) - printk("$%2d :", i); - pr_cont(" %0*lx", field, regs->regs[i]); - - i++; - if ((i % 4) == 0) - pr_cont("\n"); - } + /* Print saved GPRs except $zero (substituting with PC/ERA) */ +#define GPR_FIELD(x) field, regs->regs[x] + printk("pc %0*lx ra %0*lx tp %0*lx sp %0*lx\n", + field, regs->csr_era, GPR_FIELD(1), GPR_FIELD(2), GPR_FIELD(3)); + printk("a0 %0*lx a1 %0*lx a2 %0*lx a3 %0*lx\n", + GPR_FIELD(4), GPR_FIELD(5), GPR_FIELD(6), GPR_FIELD(7)); + printk("a4 %0*lx a5 %0*lx a6 %0*lx a7 %0*lx\n", + GPR_FIELD(8), GPR_FIELD(9), GPR_FIELD(10), GPR_FIELD(11)); + printk("t0 %0*lx t1 %0*lx t2 %0*lx t3 %0*lx\n", + GPR_FIELD(12), GPR_FIELD(13), GPR_FIELD(14), GPR_FIELD(15)); + printk("t4 %0*lx t5 %0*lx t6 %0*lx t7 %0*lx\n", + GPR_FIELD(16), GPR_FIELD(17), GPR_FIELD(18), GPR_FIELD(19)); + printk("t8 %0*lx u0 %0*lx s9 %0*lx s0 %0*lx\n", + GPR_FIELD(20), GPR_FIELD(21), GPR_FIELD(22), GPR_FIELD(23)); + printk("s1 %0*lx s2 %0*lx s3 %0*lx s4 %0*lx\n", + GPR_FIELD(24), GPR_FIELD(25), GPR_FIELD(26), GPR_FIELD(27)); + printk("s5 %0*lx s6 %0*lx s7 %0*lx s8 %0*lx\n", + GPR_FIELD(28), GPR_FIELD(29), GPR_FIELD(30), GPR_FIELD(31)); + + /* The slot for $zero is reused as the syscall restart flag */ + if (regs->regs[0]) + printk("syscall restart flag: %0*lx\n", GPR_FIELD(0)); +#undef GPR_FIELD /* * Saved csr registers -- Gitee From ea499a3ff7250b01393d4324bb8e2f5da2b894c5 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:17 +0800 Subject: [PATCH 133/241] LoongArch: Print symbol info for $ra and CSR.ERA only for kernel-mode contexts Otherwise the addresses wouldn't make sense at all. While at it, align the "map keys" to maintain right-alignment with the "estat:" line too; also swap the ERA and ra lines so all CSRs are shown together. Change-Id: If60342416490b8a913739641cb1f5c5654b2238c Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e4a7da242844..062464e34797 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -191,16 +191,19 @@ static void __show_regs(const struct pt_regs *regs) /* The slot for $zero is reused as the syscall restart flag */ if (regs->regs[0]) printk("syscall restart flag: %0*lx\n", GPR_FIELD(0)); + + if (user_mode(regs)) { + printk(" ra: %0*lx\n", GPR_FIELD(1)); + printk(" ERA: %0*lx\n", field, regs->csr_era); + } else { + printk(" ra: %0*lx %pS\n", GPR_FIELD(1), (void *) regs->regs[1]); + printk(" ERA: %0*lx %pS\n", field, regs->csr_era, (void *) regs->csr_era); + } #undef GPR_FIELD /* * Saved csr registers */ - printk("era : %0*lx %pS\n", field, regs->csr_era, - (void *) regs->csr_era); - printk("ra : %0*lx %pS\n", field, regs->regs[1], - (void *) regs->regs[1]); - printk("CSR crmd: %08lx ", regs->csr_crmd); printk("CSR prmd: %08lx ", regs->csr_prmd); printk("CSR euen: %08lx ", regs->csr_euen); -- Gitee From 3bafb0cf9b6f92e8f06cca76f2c506da6dba71c4 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:18 +0800 Subject: [PATCH 134/241] LoongArch: Fix format of CSR lines during show_regs() Use uppercase CSR names throughout for consistency with the manual wording, and right-align the keys. The "CSR" part is inferrable from context, hence dropped for more horizontal space. Change-Id: I37e451741dfd65306e2511897259eb9c05bcaab9 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 062464e34797..adc848e7447a 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -201,16 +201,12 @@ static void __show_regs(const struct pt_regs *regs) } #undef GPR_FIELD - /* - * Saved csr registers - */ - printk("CSR crmd: %08lx ", regs->csr_crmd); - printk("CSR prmd: %08lx ", regs->csr_prmd); - printk("CSR euen: %08lx ", regs->csr_euen); - printk("CSR ecfg: %08lx ", regs->csr_ecfg); - printk("CSR estat: %08lx ", regs->csr_estat); - - pr_cont("\n"); + /* Print saved important CSRs */ + printk(" CRMD: %08lx\n", regs->csr_crmd); + printk(" PRMD: %08lx\n", regs->csr_prmd); + printk(" EUEN: %08lx\n", regs->csr_euen); + printk(" ECFG: %08lx\n", regs->csr_ecfg); + printk("ESTAT: %08lx\n", regs->csr_estat); exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT; -- Gitee From 0cee104d0e5aec1cc0b38f2ae130b0b2fda6736e Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:19 +0800 Subject: [PATCH 135/241] LoongArch: Humanize the CRMD line when showing registers Example output looks like: [ xx.xxxxxx] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) Some initial machinery for this pretty-printing format has been included in this patch as well. Change-Id: I53528856d0efe4864aa00f4c1a785bbbeb08c12e Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 51 ++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index adc848e7447a..be03a03e8de6 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -3,6 +3,7 @@ * Author: Huacai Chen * Copyright (C) 2020 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -161,6 +162,54 @@ static void show_code(unsigned int *pc, bool user) pr_cont("\n"); } +static void print_bool_fragment(const char *key, unsigned long val, bool first) +{ + /* e.g. "+PG", "-DA" */ + pr_cont("%s%c%s", first ? "" : " ", val ? '+' : '-', key); +} + +static void print_plv_fragment(const char *key, int val) +{ + /* e.g. "PLV0", "PPLV3" */ + pr_cont("%s%d", key, val); +} + +static void print_memory_type_fragment(const char *key, unsigned long val) +{ + const char *humanized_type; + + switch (val) { + case 0: + humanized_type = "SUC"; + break; + case 1: + humanized_type = "CC"; + break; + case 2: + humanized_type = "WUC"; + break; + default: + pr_cont(" %s=Reserved(%lu)", key, val); + return; + } + + /* e.g. " DATM=WUC" */ + pr_cont(" %s=%s", key, humanized_type); +} + +static void print_crmd(unsigned long x) +{ + printk(" CRMD: %08lx (", x); + print_plv_fragment("PLV", (int) FIELD_GET(CSR_CRMD_PLV, x)); + print_bool_fragment("IE", FIELD_GET(CSR_CRMD_IE, x), false); + print_bool_fragment("DA", FIELD_GET(CSR_CRMD_DA, x), false); + print_bool_fragment("PG", FIELD_GET(CSR_CRMD_PG, x), false); + print_memory_type_fragment("DACF", FIELD_GET(CSR_CRMD_DACF, x)); + print_memory_type_fragment("DACM", FIELD_GET(CSR_CRMD_DACM, x)); + print_bool_fragment("WE", FIELD_GET(CSR_CRMD_WE, x), false); + pr_cont(")\n"); +} + static void __show_regs(const struct pt_regs *regs) { const int field = 2 * sizeof(unsigned long); @@ -202,7 +251,7 @@ static void __show_regs(const struct pt_regs *regs) #undef GPR_FIELD /* Print saved important CSRs */ - printk(" CRMD: %08lx\n", regs->csr_crmd); + print_crmd(regs->csr_crmd); printk(" PRMD: %08lx\n", regs->csr_prmd); printk(" EUEN: %08lx\n", regs->csr_euen); printk(" ECFG: %08lx\n", regs->csr_ecfg); -- Gitee From 52bcc9090197d11e753da63b25352731051f133d Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:20 +0800 Subject: [PATCH 136/241] LoongArch: Humanize the PRMD line when showing registers Example output looks like: [ xx.xxxxxx] PRMD: 00000004 (PPLV0 +PIE -PWE) Change-Id: Ibc68df83c3b2645168a1cd792c1cb35106d6cc3b Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index be03a03e8de6..a328a9ea3a41 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -210,6 +210,15 @@ static void print_crmd(unsigned long x) pr_cont(")\n"); } +static void print_prmd(unsigned long x) +{ + printk(" PRMD: %08lx (", x); + print_plv_fragment("PPLV", (int) FIELD_GET(CSR_PRMD_PPLV, x)); + print_bool_fragment("PIE", FIELD_GET(CSR_PRMD_PIE, x), false); + print_bool_fragment("PWE", FIELD_GET(CSR_PRMD_PWE, x), false); + pr_cont(")\n"); +} + static void __show_regs(const struct pt_regs *regs) { const int field = 2 * sizeof(unsigned long); @@ -252,7 +261,7 @@ static void __show_regs(const struct pt_regs *regs) /* Print saved important CSRs */ print_crmd(regs->csr_crmd); - printk(" PRMD: %08lx\n", regs->csr_prmd); + print_prmd(regs->csr_prmd); printk(" EUEN: %08lx\n", regs->csr_euen); printk(" ECFG: %08lx\n", regs->csr_ecfg); printk("ESTAT: %08lx\n", regs->csr_estat); -- Gitee From c11cf2f00f4da4e6c250dc705d1fbc46a190d136 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:21 +0800 Subject: [PATCH 137/241] LoongArch: Humanize the EUEN line when showing registers Example output looks like: [ xx.xxxxxx] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) Change-Id: Ifa7badc6b0b1a4ee9c816954c5cdb6408cf8842f Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index a328a9ea3a41..d7650896d418 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -219,6 +219,16 @@ static void print_prmd(unsigned long x) pr_cont(")\n"); } +static void print_euen(unsigned long x) +{ + printk(" EUEN: %08lx (", x); + print_bool_fragment("FPE", FIELD_GET(CSR_EUEN_FPEN, x), true); + print_bool_fragment("SXE", FIELD_GET(CSR_EUEN_LSXEN, x), false); + print_bool_fragment("ASXE", FIELD_GET(CSR_EUEN_LASXEN, x), false); + print_bool_fragment("BTE", FIELD_GET(CSR_EUEN_LBTEN, x), false); + pr_cont(")\n"); +} + static void __show_regs(const struct pt_regs *regs) { const int field = 2 * sizeof(unsigned long); @@ -262,7 +272,7 @@ static void __show_regs(const struct pt_regs *regs) /* Print saved important CSRs */ print_crmd(regs->csr_crmd); print_prmd(regs->csr_prmd); - printk(" EUEN: %08lx\n", regs->csr_euen); + print_euen(regs->csr_euen); printk(" ECFG: %08lx\n", regs->csr_ecfg); printk("ESTAT: %08lx\n", regs->csr_estat); -- Gitee From 05d342fb32ad17959d510c46eb1f9a575ae1f03a Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:22 +0800 Subject: [PATCH 138/241] LoongArch: Humanize the ECFG line when showing registers Example output looks like: [ xx.xxxxxx] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) Change-Id: If8552304b140b339a4b0c72224706d33134749dc Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index d7650896d418..e5a960f7503f 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -197,6 +197,12 @@ static void print_memory_type_fragment(const char *key, unsigned long val) pr_cont(" %s=%s", key, humanized_type); } +static void print_intr_fragment(const char *key, unsigned long val) +{ + /* e.g. "LIE=0-1,3,5-7" */ + pr_cont("%s=%*pbl", key, EXCCODE_INT_NUM, &val); +} + static void print_crmd(unsigned long x) { printk(" CRMD: %08lx (", x); @@ -229,6 +235,13 @@ static void print_euen(unsigned long x) pr_cont(")\n"); } +static void print_ecfg(unsigned long x) +{ + printk(" ECFG: %08lx (", x); + print_intr_fragment("LIE", FIELD_GET(CSR_ECFG_IM, x)); + pr_cont(" VS=%d)\n", (int) FIELD_GET(CSR_ECFG_VS, x)); +} + static void __show_regs(const struct pt_regs *regs) { const int field = 2 * sizeof(unsigned long); @@ -273,7 +286,7 @@ static void __show_regs(const struct pt_regs *regs) print_crmd(regs->csr_crmd); print_prmd(regs->csr_prmd); print_euen(regs->csr_euen); - printk(" ECFG: %08lx\n", regs->csr_ecfg); + print_ecfg(regs->csr_ecfg); printk("ESTAT: %08lx\n", regs->csr_estat); exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; -- Gitee From 3a3cd1bb23cdd5e3adbcd1c6aceeef786b46bd8f Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:23 +0800 Subject: [PATCH 139/241] LoongArch: Humanize the ESTAT line when showing registers Example output looks like: [ xx.xxxxxx] ESTAT: 00001000 [INT] (IS=12 ECode=0 EsubCode=0) Change-Id: If953513e53866f576d08a29c7cbc1343a4b18c52 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 82 ++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index e5a960f7503f..ae669e2726e9 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -242,11 +242,83 @@ static void print_ecfg(unsigned long x) pr_cont(" VS=%d)\n", (int) FIELD_GET(CSR_ECFG_VS, x)); } +static const char *humanize_exc_name(unsigned int ecode, unsigned int esubcode) +{ + /* + * LoongArch users and developers are probably more familiar with + * those names found in the ISA manual, so we are going to print out + * the latter. This will require some mapping. + */ + switch (ecode) { + case EXCCODE_RSV: return "INT"; + case EXCCODE_TLBL: return "PIL"; + case EXCCODE_TLBS: return "PIS"; + case EXCCODE_TLBI: return "PIF"; + case EXCCODE_TLBM: return "PME"; + case EXCCODE_TLBNR: return "PNR"; + case EXCCODE_TLBNX: return "PNX"; + case EXCCODE_TLBPE: return "PPI"; + case EXCCODE_ADE: + switch (esubcode) { + case EXSUBCODE_ADEF: return "ADEF"; + case EXSUBCODE_ADEM: return "ADEM"; + } + break; + case EXCCODE_ALE: return "ALE"; + case EXCCODE_BCE: return "BCE"; + case EXCCODE_SYS: return "SYS"; + case EXCCODE_BP: return "BRK"; + case EXCCODE_INE: return "INE"; + case EXCCODE_IPE: return "IPE"; + case EXCCODE_FPDIS: return "FPD"; + case EXCCODE_LSXDIS: return "SXD"; + case EXCCODE_LASXDIS: return "ASXD"; + case EXCCODE_FPE: + switch (esubcode) { + case EXCSUBCODE_FPE: return "FPE"; + case EXCSUBCODE_VFPE: return "VFPE"; + } + break; + case EXCCODE_WATCH: + switch (esubcode) { + case EXCSUBCODE_WPEF: return "WPEF"; + case EXCSUBCODE_WPEM: return "WPEM"; + } + break; + case EXCCODE_BTDIS: return "BTD"; + case EXCCODE_BTE: return "BTE"; + case EXCCODE_GSPR: return "GSPR"; + case EXCCODE_HVC: return "HVC"; + case EXCCODE_GCM: + switch (esubcode) { + case EXCSUBCODE_GCSC: return "GCSC"; + case EXCSUBCODE_GCHC: return "GCHC"; + } + break; + /* + * The manual did not mention the EXCCODE_SE case, but print out it + * nevertheless. + */ + case EXCCODE_SE: return "SE"; + } + + return "???"; +} + +static void print_estat(unsigned long x) +{ + unsigned int ecode = FIELD_GET(CSR_ESTAT_EXC, x); + unsigned int esubcode = FIELD_GET(CSR_ESTAT_ESUBCODE, x); + + printk("ESTAT: %08lx [%s] (", x, humanize_exc_name(ecode, esubcode)); + print_intr_fragment("IS", FIELD_GET(CSR_ESTAT_IS, x)); + pr_cont(" ECode=%d EsubCode=%d)\n", (int) ecode, (int) esubcode); +} + static void __show_regs(const struct pt_regs *regs) { const int field = 2 * sizeof(unsigned long); - unsigned int excsubcode; - unsigned int exccode; + unsigned int exccode = FIELD_GET(CSR_ESTAT_EXC, regs->csr_estat); show_regs_print_info(KERN_DEFAULT); @@ -287,11 +359,7 @@ static void __show_regs(const struct pt_regs *regs) print_prmd(regs->csr_prmd); print_euen(regs->csr_euen); print_ecfg(regs->csr_ecfg); - printk("ESTAT: %08lx\n", regs->csr_estat); - - exccode = ((regs->csr_estat) & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; - excsubcode = ((regs->csr_estat) & CSR_ESTAT_ESUBCODE) >> CSR_ESTAT_ESUBCODE_SHIFT; - printk("ExcCode : %x (SubCode %x)\n", exccode, excsubcode); + print_estat(regs->csr_estat); if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE) printk("BadVA : %0*lx\n", field, regs->csr_badvaddr); -- Gitee From 2f11340e372490cb7ee21a783a0bfc03685196d6 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Wed, 26 Apr 2023 15:45:24 +0800 Subject: [PATCH 140/241] LoongArch: Tweak the BADV and CPUCFG.PRID lines in show_regs() Use ISA manual names for BADV and CPUCFG.PRID lines in show_regs(), for stylistic consistency with the other lines already touched. While at it, also include current CPU's full name in show_regs() output. It may be more helpful for developers looking at the resulting dumps, because multiple distinct CPU models may share the same PRID. Not having this info available may hide problems only found on some but not all of the models sharing one specific PRID. Change-Id: I92eb7dabd90ba269dd0cb7d4b23a2dd406b5d872 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index ae669e2726e9..bc69d83e1a2c 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -362,10 +362,10 @@ static void __show_regs(const struct pt_regs *regs) print_estat(regs->csr_estat); if (exccode >= EXCCODE_TLBL && exccode <= EXCCODE_ALE) - printk("BadVA : %0*lx\n", field, regs->csr_badvaddr); + printk(" BADV: %0*lx\n", field, regs->csr_badvaddr); - printk("PrId : %08x (%s)\n", read_cpucfg(LOONGARCH_CPUCFG0), - cpu_family_string()); + printk(" PRID: %08x (%s, %s)\n", read_cpucfg(LOONGARCH_CPUCFG0), + cpu_family_string(), cpu_full_name_string()); } void show_regs(struct pt_regs *regs) -- Gitee From b77d92d1fd6d842c2a5aefc13e1c22d308a5cc09 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Sun, 23 Apr 2023 01:57:05 +0800 Subject: [PATCH 141/241] LoongArch: Relay BCE exceptions to userland as SIGSEGV with si_code=SEGV_BNDERR SEGV_BNDERR was introduced initially for supporting the Intel MPX, but fell into disuse after the MPX support was removed. The LoongArch bounds-checking instructions behave very differently than MPX, but overall the interface is still kind of suitable for conveying the information to userland when bounds-checking assertions trigger, so we wouldn't have to invent more UAPI. Specifically, when the BCE triggers, a SEGV_BNDERR is sent to userland, with si_addr set to the out-of-bounds address or value (in asrt{gt,le}'s case), and one of si_lower or si_upper set to the configured bound depending on the faulting instruction. The other bound is set to either 0 or ULONG_MAX to resemble a range with both lower and upper bounds. Note that it is possible to have si_addr == si_lower in case of a failing asrtgt or {ld,st}gt, because those instructions test for strict greater-than relationship. This should not pose a problem for userland, though, because the faulting PC is available for the application to associate back to the exact instruction for figuring out the expectation. Example exception context generated by a faulting `asrtgt.d t0, t1` (assert t0 > t1 or BCE) with t0=100 and t1=200: > pc 00005555558206a4 ra 00007ffff2d854fc tp 00007ffff2f2f180 sp 00007ffffbf9fb80 > a0 0000000000000002 a1 00007ffffbf9fce8 a2 00007ffffbf9fd00 a3 00007ffff2ed4558 > a4 0000000000000000 a5 00007ffff2f044c8 a6 00007ffffbf9fce0 a7 fffffffffffff000 > t0 0000000000000064 t1 00000000000000c8 t2 00007ffffbfa2d5e t3 00007ffff2f12aa0 > t4 00007ffff2ed6158 t5 00007ffff2ed6158 t6 000000000000002e t7 0000000003d8f538 > t8 0000000000000005 u0 0000000000000000 s9 0000000000000000 s0 00007ffffbf9fce8 > s1 0000000000000002 s2 0000000000000000 s3 00007ffff2f2c038 s4 0000555555820610 > s5 00007ffff2ed5000 s6 0000555555827e38 s7 00007ffffbf9fd00 s8 0000555555827e38 > ra: 00007ffff2d854fc > ERA: 00005555558206a4 > CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) > PRMD: 00000007 (PPLV3 +PIE -PWE) > EUEN: 00000000 (-FPE -SXE -ASXE -BTE) > ECFG: 0007181c (LIE=2-4,11-12 VS=7) > ESTAT: 000a0000 [BCE] (IS= ECode=10 EsubCode=0) > PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) Change-Id: If9acc779f187a6d7aeccaf5e495cd2bb5f031822 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/kernel/genex.S | 1 + arch/loongarch/kernel/traps.c | 92 +++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 03a058c48a5f..37d1cedb6707 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -73,6 +73,7 @@ SYM_FUNC_END(except_vec_cex) BUILD_HANDLER ade ade badv BUILD_HANDLER ale ale badv + BUILD_HANDLER bce bce none BUILD_HANDLER bp bp none BUILD_HANDLER fpe fpe fcsr BUILD_HANDLER fpu fpu none diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index bc69d83e1a2c..61da45c12dea 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,7 @@ extern asmlinkage void handle_ade(void); extern asmlinkage void handle_ale(void); +extern asmlinkage void handle_bce(void); extern asmlinkage void handle_sys(void); extern asmlinkage void handle_bp(void); extern asmlinkage void handle_ri(void); @@ -598,6 +600,95 @@ static void bug_handler(struct pt_regs *regs) } } +asmlinkage void noinstr do_bce(struct pt_regs *regs) +{ + bool user = user_mode(regs); + unsigned long era = exception_era(regs); + u64 badv = 0, lower = 0, upper = ULONG_MAX; + union loongarch_instruction insn; + irqentry_state_t state = irqentry_enter(regs); + + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + + current->thread.trap_nr = read_csr_excode(); + + die_if_kernel("Bounds check error in kernel code", regs); + + /* + * Pull out the address that failed bounds checking, and the lower / + * upper bound, by minimally looking at the faulting instruction word + * and reading from the correct register. + */ + if (__get_inst(&insn.word, (u32 *)era, user)) + goto bad_era; + + switch (insn.reg3_format.opcode) { + case asrtled_op: + if (insn.reg3_format.rd != 0) + break; /* not asrtle */ + badv = regs->regs[insn.reg3_format.rj]; + upper = regs->regs[insn.reg3_format.rk]; + break; + + case asrtgtd_op: + if (insn.reg3_format.rd != 0) + break; /* not asrtgt */ + badv = regs->regs[insn.reg3_format.rj]; + lower = regs->regs[insn.reg3_format.rk]; + break; + + case ldleb_op: + case ldleh_op: + case ldlew_op: + case ldled_op: + case stleb_op: + case stleh_op: + case stlew_op: + case stled_op: + case fldles_op: + case fldled_op: + case fstles_op: + case fstled_op: + badv = regs->regs[insn.reg3_format.rj]; + upper = regs->regs[insn.reg3_format.rk]; + break; + + case ldgtb_op: + case ldgth_op: + case ldgtw_op: + case ldgtd_op: + case stgtb_op: + case stgth_op: + case stgtw_op: + case stgtd_op: + case fldgts_op: + case fldgtd_op: + case fstgts_op: + case fstgtd_op: + badv = regs->regs[insn.reg3_format.rj]; + lower = regs->regs[insn.reg3_format.rk]; + break; + } + + force_sig_bnderr((void __user *)badv, (void __user *)lower, (void __user *)upper); + +out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); + + irqentry_exit(regs, state); + return; + +bad_era: + /* + * Cannot pull out the instruction word, hence cannot provide more + * info than a regular SIGSEGV in this case. + */ + force_sig(SIGSEGV); + goto out; +} + asmlinkage void noinstr do_bp(struct pt_regs *regs) { bool user = user_mode(regs); @@ -1017,6 +1108,7 @@ void __init trap_init(void) set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); + set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE); set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); -- Gitee From c63002be4ee74eae5a8d1087997158234e72d4b8 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Sun, 25 Jun 2023 17:56:40 +0800 Subject: [PATCH 142/241] LoongArch: Make the CPUCFG&CSR ops simple aliases of compiler built-ins In addition to less visual clutter, this also makes Clang happy regarding the const-ness of arguments. In the original approach, all Clang gets to see is the incoming arguments whose const-ness cannot be proven without first being inlined; so Clang errors out here while GCC is fine. While at it, tweak several printk format strings because the return type of csr_read64 becomes effectively unsigned long, instead of unsigned long long. Change-Id: I6848d8f65a20affe874801ea83bcc588341a78e2 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarchregs.h | 63 ++++------------------ arch/loongarch/kernel/traps.c | 2 +- arch/loongarch/lib/dump_tlb.c | 6 +-- 3 files changed, 15 insertions(+), 56 deletions(-) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 24f450063aac..e6accca330f1 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -60,10 +60,7 @@ __asm__(".macro parse_r var r\n\t" #undef _IFC_REG /* CPUCFG */ -static inline u32 read_cpucfg(u32 reg) -{ - return __cpucfg(reg); -} +#define read_cpucfg(reg) __cpucfg(reg) #endif /* !__ASSEMBLY__ */ @@ -206,56 +203,18 @@ static inline u32 read_cpucfg(u32 reg) #ifndef __ASSEMBLY__ /* CSR */ -static __always_inline u32 csr_read32(u32 reg) -{ - return __csrrd_w(reg); -} - -static __always_inline u64 csr_read64(u32 reg) -{ - return __csrrd_d(reg); -} - -static __always_inline void csr_write32(u32 val, u32 reg) -{ - __csrwr_w(val, reg); -} - -static __always_inline void csr_write64(u64 val, u32 reg) -{ - __csrwr_d(val, reg); -} - -static __always_inline u32 csr_xchg32(u32 val, u32 mask, u32 reg) -{ - return __csrxchg_w(val, mask, reg); -} - -static __always_inline u64 csr_xchg64(u64 val, u64 mask, u32 reg) -{ - return __csrxchg_d(val, mask, reg); -} +#define csr_read32(reg) __csrrd_w(reg) +#define csr_read64(reg) __csrrd_d(reg) +#define csr_write32(val, reg) __csrwr_w(val, reg) +#define csr_write64(val, reg) __csrwr_d(val, reg) +#define csr_xchg32(val, mask, reg) __csrxchg_w(val, mask, reg) +#define csr_xchg64(val, mask, reg) __csrxchg_d(val, mask, reg) /* IOCSR */ -static __always_inline u32 iocsr_read32(u32 reg) -{ - return __iocsrrd_w(reg); -} - -static __always_inline u64 iocsr_read64(u32 reg) -{ - return __iocsrrd_d(reg); -} - -static __always_inline void iocsr_write32(u32 val, u32 reg) -{ - __iocsrwr_w(val, reg); -} - -static __always_inline void iocsr_write64(u64 val, u32 reg) -{ - __iocsrwr_d(val, reg); -} +#define iocsr_read32(reg) __iocsrrd_w(reg) +#define iocsr_read64(reg) __iocsrrd_d(reg) +#define iocsr_write32(val, reg) __iocsrwr_w(val, reg) +#define iocsr_write64(val, reg) __iocsrwr_d(val, reg) #endif /* !__ASSEMBLY__ */ diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 61da45c12dea..58d55cee8fcd 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -993,7 +993,7 @@ asmlinkage void cache_parity_error(void) /* For the moment, report the problem and hang. */ printk("Cache error exception:\n"); printk("csr_merrctl == %08x\n", csr_read32(LOONGARCH_CSR_MERRCTL)); - printk("csr_merrera == %016llx\n", csr_read64(LOONGARCH_CSR_MERRERA)); + printk("csr_merrera == %016lx\n", csr_read64(LOONGARCH_CSR_MERRERA)); panic("Can't handle the cache error!"); } diff --git a/arch/loongarch/lib/dump_tlb.c b/arch/loongarch/lib/dump_tlb.c index 3b8c77bcdd9a..6a0e1c04ba91 100644 --- a/arch/loongarch/lib/dump_tlb.c +++ b/arch/loongarch/lib/dump_tlb.c @@ -16,9 +16,9 @@ void dump_tlb_regs(void) pr_info("Index : 0x%0x\n", read_csr_tlbidx()); pr_info("PageSize : 0x%0x\n", read_csr_pagesize()); - pr_info("EntryHi : 0x%0*llx\n", field, read_csr_entryhi()); - pr_info("EntryLo0 : 0x%0*llx\n", field, read_csr_entrylo0()); - pr_info("EntryLo1 : 0x%0*llx\n", field, read_csr_entrylo1()); + pr_info("EntryHi : 0x%0*lx\n", field, read_csr_entryhi()); + pr_info("EntryLo0 : 0x%0*lx\n", field, read_csr_entrylo0()); + pr_info("EntryLo1 : 0x%0*lx\n", field, read_csr_entrylo1()); } static void dump_tlb(int first, int last) -- Gitee From 5dc00a98567f0d9f2e65d024128c97651101487a Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Sun, 25 Jun 2023 17:56:41 +0800 Subject: [PATCH 143/241] LoongArch: Simplify the invtlb wrappers The invtlb instruction has been supported by upstream LoongArch toolchains from day one, so ditch the raw opcode trickery and just use plain inline asm for it. While at it, also make the invtlb asm statements barriers, for proper modeling of the side effects. The functions are also marked as __always_inline instead of just "inline", because they cannot work at all if not inlined: the op argument will not be compile-time const in that case, thus failing to satisfy the "i" constraint. The signature of the other more specific invtlb wrappers contain unused arguments right now, but these are not removed right away in order for the patch to be focused. In the meantime, assertions are added to ensure no accidental misuse happens before the refactor. (The more specific wrappers cannot re-use the generic invtlb wrapper, because the ISA manual says $zero shall be used in case a particular op does not take the respective argument: re-using the generic wrapper would mean losing control over the register usage.) Change-Id: Ibce2b55274d95c9cfcaf7ae3ba9fc0b2476c0e95 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/tlb.h | 43 ++++++++++++++------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index a77aa5905472..cf760b74d489 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -88,52 +88,47 @@ enum invtlb_ops { INVTLB_GID_ADDR = 0x16, }; -/* - * invtlb op info addr - * (0x1 << 26) | (0x24 << 20) | (0x13 << 15) | - * (addr << 10) | (info << 5) | op - */ -static inline void invtlb(u32 op, u32 info, u64 addr) +static __always_inline void invtlb(u32 op, u32 info, u64 addr) { __asm__ __volatile__( - "parse_r addr,%0\n\t" - "parse_r info,%1\n\t" - ".word ((0x6498000) | (addr << 10) | (info << 5) | %2)\n\t" - : - : "r"(addr), "r"(info), "i"(op) + "invtlb %0, %1, %2\n\t" : + : "i"(op), "r"(info), "r"(addr) + : "memory" ); } -static inline void invtlb_addr(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_addr(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(info) || info != 0); __asm__ __volatile__( - "parse_r addr,%0\n\t" - ".word ((0x6498000) | (addr << 10) | (0 << 5) | %1)\n\t" - : - : "r"(addr), "i"(op) + "invtlb %0, $zero, %1\n\t" : + : "i"(op), "r"(addr) + : "memory" ); } -static inline void invtlb_info(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_info(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(addr) || addr != 0); __asm__ __volatile__( - "parse_r info,%0\n\t" - ".word ((0x6498000) | (0 << 10) | (info << 5) | %1)\n\t" - : - : "r"(info), "i"(op) + "invtlb %0, %1, $zero\n\t" : + : "i"(op), "r"(info) + : "memory" ); } -static inline void invtlb_all(u32 op, u32 info, u64 addr) +static __always_inline void invtlb_all(u32 op, u32 info, u64 addr) { + BUILD_BUG_ON(!__builtin_constant_p(info) || info != 0); + BUILD_BUG_ON(!__builtin_constant_p(addr) || addr != 0); __asm__ __volatile__( - ".word ((0x6498000) | (0 << 10) | (0 << 5) | %0)\n\t" + "invtlb %0, $zero, $zero\n\t" : : "i"(op) - : + : "memory" ); } -- Gitee From 1b9dc0aa6eede1bb71505437a46a9ce77cbd6f3f Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Sun, 25 Jun 2023 17:56:42 +0800 Subject: [PATCH 144/241] LoongArch: Tweak CFLAGS for Clang compatibility Now the arch code is mostly ready for LLVM/Clang consumption, it is time to re-organize the CFLAGS a little to actually enable the LLVM build. Namely, all -G0 switches from CFLAGS are removed, and -mexplicit-relocs and -mdirect-extern-access are now wrapped with cc-option (with the related asm/percpu.h definition guarded against toolchain combos that are known to not work). A build with !RELOCATABLE && !MODULE is confirmed working within a QEMU environment; support for the two features are currently blocked on LLVM/Clang, and will come later. Why -G0 can be removed: In GCC, -G stands for "small data threshold", that instructs the compiler to put data smaller than the specified threshold in a dedicated "small data" section (called .sdata on LoongArch and several other arches). However, benefiting from this would require ABI cooperation, which is not the case for LoongArch; and current GCC behave the same whether -G0 (equal to disabling this optimization) is given or not. So, remove -G0 from CFLAGS altogether for one less thing to care about. This also benefits LLVM/Clang compatibility where the -G switch is not supported. Why -mexplicit-relocs can now be conditionally applied without regressions: Originally -mexplicit-relocs is unconditionally added to CFLAGS in case of CONFIG_AS_HAS_EXPLICIT_RELOCS, because not having it (i.e. old GCC + new binutils) would not work: modules will have R_LARCH_ABS_* relocs inside, but given the rarity of such toolchain combo in the wild, it may not be worthwhile to support it, so support for such relocs in modules were not added back when explicit relocs support was upstreamed, and -mexplicit-relocs is unconditionally added to fail the build early. Now that Clang compatibility is desired, given Clang is behaving like -mexplicit-relocs from day one but without support for the CLI flag, we must ensure the flag is not passed in case of Clang. However, explicit compiler flavor checks can be more brittle than feature detection: in this case what actually matters is support for __attribute__((model)) when building modules. Given neither older GCC nor current Clang support this attribute, probing for the attribute support and #error'ing out would allow proper UX without checking for Clang, and also automatically work when Clang support for the attribute is to be added in the future. Why -mdirect-extern-access is now conditionally applied: This is actually a nice-to-have optimization that can reduce GOT accesses, but not having it is harmless either. Because Clang does not support the option currently, but might do so in the future, conditional application via cc-option ensures compatibility with both current and future Clang versions. Change-Id: Ic7d9cc7a14c5e3fe78855f88bea9cf171f1a1890 Suggested-by: Xi Ruoyao # cc-option changes Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 21 +++++++++++++-------- arch/loongarch/include/asm/percpu.h | 6 +++++- arch/loongarch/vdso/Makefile | 2 +- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 5d07a4f0e260..05108d4380c2 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -40,8 +40,8 @@ all-$(CONFIG_EFI_STUB) += vmlinuz.efi vmlinuz.efi: vmlinuz -cflags-y += -G0 -pipe -msoft-float -LDFLAGS_vmlinux += -G0 -static -n -nostdlib +cflags-y += -pipe -msoft-float +LDFLAGS_vmlinux += -static -n -nostdlib # When the assembler supports explicit relocation hint, we must use it. # GCC may have -mexplicit-relocs off by default if it was built with an old @@ -50,13 +50,18 @@ LDFLAGS_vmlinux += -G0 -static -n -nostdlib # When the assembler does not supports explicit relocation hint, we can't use # it. Disable it if the compiler supports it. # -# If you've seen "unknown reloc hint" message building the kernel and you are -# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the -# combination of a "new" assembler and "old" compiler is not supported. Either -# upgrade the compiler or downgrade the assembler. +# The combination of a "new" assembler and "old" GCC is not supported, given +# the rarity of this combo and the extra complexity needed to make it work. +# Either upgrade the compiler or downgrade the assembler; the build will error +# out if it is the case (by probing for the model attribute; all supported +# compilers in this case would have support). +# +# Also, -mdirect-extern-access is useful in case of building with explicit +# relocs, for avoiding unnecessary GOT accesses. It is harmless to not have +# support though. ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS -cflags-y += -mexplicit-relocs -KBUILD_CFLAGS_KERNEL += -mdirect-extern-access +cflags-y += $(call cc-option,-mexplicit-relocs) +KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index fd0556726418..7d0861554c29 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -13,7 +13,11 @@ * loaded. Tell the compiler this fact when using explicit relocs. */ #if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS) -#define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +# if __has_attribute(model) +# define PER_CPU_ATTRIBUTES __attribute__((model("extreme"))) +# else +# error compiler support for the model attribute is necessary when a recent assembler is used +# endif #endif /* Use r21 for fast access */ diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 22bc504eb8e7..9bf343ceeed2 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -22,7 +22,7 @@ endif cflags-vdso := $(ccflags-vdso) \ $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ - -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ + -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \ -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ $(call cc-option, -fno-asynchronous-unwind-tables) \ $(call cc-option, -fno-stack-protector) -- Gitee From ef9c9871b571f8265b87ddfec1fd53fff803d02e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 9 Jul 2023 17:59:52 +0800 Subject: [PATCH 145/241] LoongArch: Fix module relocation error with binutils 2.41 Binutils 2.41 enable linker relaxation by default, but kernel module loader doesn't support that, so disable it. Otherwise we get such an error when loading modules: "Unknown relocation type 102". Change-Id: I9f670da0ea4b31a495a6599541f06fc416daf2ab Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 05108d4380c2..cf9d1ea86182 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -62,6 +62,8 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel -- Gitee From 11fa6496a330a3b365bd774ac643803dfd632d28 Mon Sep 17 00:00:00 2001 From: Chenguang Zhao Date: Thu, 27 Jul 2023 12:21:25 +0800 Subject: [PATCH 146/241] LoongArch: BPF: Enable bpf_probe_read{, str}() on LoongArch Currently nettrace does not work on LoongArch due to missing bpf_probe_read{,str}() support, with the error message: ERROR: failed to load kprobe-based eBPF ERROR: failed to load kprobe-based bpf According to commit 0ebeea8ca8a4d1d ("bpf: Restrict bpf_probe_read{, str}() only to archs where they work"), we only need to select CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE to add said support, because LoongArch does have non-overlapping address ranges for kernel and userspace. Change-Id: Iab1ddc9c67ad4a3626f6c43a141a3bf7c8d11360 Signed-off-by: Chenguang Zhao Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index cf8c42ff78fc..97126fcac790 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -7,6 +7,7 @@ config LOONGARCH select ARCH_BINFMT_ELF_STATE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL if !32BIT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_INLINE_READ_LOCK if !PREEMPTION -- Gitee From 681cca25a1fbf2ad940cd78dacbdf38470fa335f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 18 Jul 2023 14:31:06 +0800 Subject: [PATCH 147/241] LoongArch: Cleanup __builtin_constant_p() checking for cpu_has_* In the current configuration, cpu_has_lsx and cpu_has_lasx cannot be constants. So cleanup the __builtin_constant_p() checking to reduce the complexity. Change-Id: I9979baaa8a085e69726abf84554413738d24b7f6 Reviewed-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/fpu.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 9889772c7eb3..21c65fd4625b 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -225,15 +225,8 @@ static inline void restore_lsx(struct task_struct *t) static inline void init_lsx_upper(void) { - /* - * Check cpu_has_lsx only if it's a constant. This will allow the - * compiler to optimise out code for CPUs without LSX without adding - * an extra redundant check for CPUs with LSX. - */ - if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) - return; - - _init_lsx_upper(); + if (cpu_has_lsx) + _init_lsx_upper(); } static inline void restore_lsx_upper(struct task_struct *t) @@ -301,7 +294,7 @@ static inline void restore_lasx_upper(struct task_struct *t) {} static inline int thread_lsx_context_live(void) { - if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx) + if (!cpu_has_lsx) return 0; return test_thread_flag(TIF_LSX_CTX_LIVE); @@ -309,7 +302,7 @@ static inline int thread_lsx_context_live(void) static inline int thread_lasx_context_live(void) { - if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx) + if (!cpu_has_lasx) return 0; return test_thread_flag(TIF_LASX_CTX_LIVE); -- Gitee From 4bed31cf10493a3cddd23bab305a02b35fd28f97 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Sun, 6 Aug 2023 18:36:20 +0800 Subject: [PATCH 148/241] LoongArch: Replace -ffreestanding with finer-grained -fno-builtin's As explained by Nick in the original issue: the kernel usually does a good job of providing library helpers that have similar semantics as their ordinary userspace libc equivalents, but -ffreestanding disables such libcall optimization and other related features in the compiler, which can lead to unexpected things such as CONFIG_FORTIFY_SOURCE not working (!). However, due to the desire for better control over unaligned accesses with respect to CONFIG_ARCH_STRICT_ALIGN, and also for avoiding the GCC bug https://gcc.gnu.org/PR109465, we do want to still disable optimizations for the memory libcalls (memcpy, memmove and memset for now). Use finer-grained -fno-builtin-* toggles to achieve this without losing source fortification and other libcall optimizations. Change-Id: I1ab73f976e878c13ea2a593137cd8533b3970846 Closes: https://github.com/ClangBuiltLinux/linux/issues/1897 Reported-by: Nathan Chancellor Suggested-by: Nick Desaulniers Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index cf9d1ea86182..92f0d21d690c 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -76,7 +76,7 @@ ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += --emit-relocs endif -cflags-y += -ffreestanding +cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset # # Kernel compression -- Gitee From a97fb482a83545fe51f7ab7fc860b3825ac71824 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 16 Aug 2023 17:50:08 +0800 Subject: [PATCH 149/241] LoongArch: Put the body of play_dead() into arch_cpu_idle_dead() The initial aim is to silence the following objtool warning: arch/loongarch/kernel/process.o: warning: objtool: arch_cpu_idle_dead() falls through to next function start_thread() According to tools/objtool/Documentation/objtool.txt, this is because the last instruction of arch_cpu_idle_dead() is a call to a noreturn function play_dead(). In order to silence the warning, one simple way is to add the noreturn function play_dead() to objtool's hard-coded global_noreturns array, that is to say, just put "NORETURN(play_dead)" into tools/objtool/noreturns.h, it works well. But I noticed that play_dead() is only defined once and only called by arch_cpu_idle_dead(), so put the body of play_dead() into the caller arch_cpu_idle_dead(), then remove the noreturn function play_dead() is an alternative way which can reduce the overhead of the function call at the same time. Change-Id: Ib64c4168866a0e50f6aee40b4410f421592b58d3 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/smp.h | 2 -- arch/loongarch/kernel/process.c | 7 ------- arch/loongarch/loongson64/smp.c | 2 +- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index d638b88e8744..bafbdd305378 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -123,8 +123,6 @@ static inline void __cpu_die(unsigned int cpu) { mp_ops->cpu_die(cpu); } - -extern void play_dead(void); #endif static inline void arch_send_call_function_single_ipi(int cpu) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 11ef4d8806a2..49d2e825f29b 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -49,13 +49,6 @@ unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -#ifdef CONFIG_HOTPLUG_CPU -void arch_cpu_idle_dead(void) -{ - play_dead(); -} -#endif - asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index d8762e8ed215..79b7669eb333 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -344,7 +344,7 @@ static void loongson3_cpu_die(unsigned int cpu) mb(); } -void play_dead(void) +void __noreturn arch_cpu_idle_dead(void) { register long cpuid, core, node, count; register void *addr, *base; -- Gitee From af6b628651b8c1b52014388266063363e23b00bf Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 25 Aug 2023 19:16:30 +0800 Subject: [PATCH 150/241] LoongArch: Ensure FP/SIMD registers in the core dump file is up to date This is a port of commit 379eb01c21795edb4c ("riscv: Ensure the value of FP registers in the core dump file is up to date"). The values of FP/SIMD registers in the core dump file come from the thread.fpu. However, kernel saves the FP/SIMD registers only before scheduling out the process. If no process switch happens during the exception handling, kernel will not have a chance to save the latest values of FP/SIMD registers. So it may cause their values in the core dump file incorrect. To solve this problem, force fpr_get()/simd_get() to save the FP/SIMD registers into the thread.fpu if the target task equals the current task. Change-Id: Ie5c5e6eddeb609cb3a16d2bc237613a35b551e57 Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/fpu.h | 22 ++++++++++++++++++---- arch/loongarch/kernel/ptrace.c | 4 ++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 21c65fd4625b..6dca64890074 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -180,16 +180,30 @@ static inline void restore_fp(struct task_struct *tsk) _restore_fp(&tsk->thread.fpu); } -static inline union fpureg *get_fpu_regs(struct task_struct *tsk) +static inline void save_fpu_regs(struct task_struct *tsk) { + unsigned int euen; + if (tsk == current) { preempt_disable(); - if (is_fpu_owner()) + + euen = csr_read32(LOONGARCH_CSR_EUEN); + +#ifdef CONFIG_CPU_HAS_LASX + if (euen & CSR_EUEN_LASXEN) + _save_lasx(¤t->thread.fpu); + else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (euen & CSR_EUEN_LSXEN) + _save_lsx(¤t->thread.fpu); + else +#endif + if (euen & CSR_EUEN_FPEN) _save_fp(¤t->thread.fpu); + preempt_enable(); } - - return tsk->thread.fpu.fpr; } static inline int is_simd_owner(void) diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index e12d7febdc97..d41670fd5cf7 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -139,6 +139,8 @@ static int fpr_get(struct task_struct *target, { int r; + save_fpu_regs(target); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) r = gfpr_get(target, &to); else @@ -270,6 +272,8 @@ static int simd_get(struct task_struct *target, { const unsigned int wr_size = NUM_FPU_REGS * regset->size; + save_fpu_regs(target); + if (!tsk_used_math(target)) { /* The task hasn't used FP or LSX, fill with 0xff */ copy_pad_fprs(target, regset, &to, 0); -- Gitee From ba28ca4d2984a35b3e15a5cbb3a5ddbc8036ef4b Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Wed, 6 Sep 2023 17:20:19 +0800 Subject: [PATCH 151/241] LoongArch: mm: Add p?d_leaf() definitions When I do LTP test, LTP test case ksm06 caused panic at break_ksm_pmd_entry -> pmd_leaf (Huge page table but False) -> pte_present (panic) The reason is pmd_leaf() is not defined, So like commit 501b81046701 ("mips: mm: add p?d_leaf() definitions") add p?d_leaf() definition for LoongArch. Change-Id: Ibe19b234b1843200b2dc8572f4d6514523616835 Fixes: 09cfefb7fa70 ("LoongArch: Add memory management") Cc: stable@vger.kernel.org Acked-by: David Hildenbrand Signed-off-by: Hongchen Zhang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 6eb5a124409f..583278ce9a8e 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -326,6 +326,9 @@ static inline long pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ +#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0) +#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0) + /* * We provide our own get_unmapped area to cope with the virtual aliasing * constraints placed on us by the cache architecture. -- Gitee From 9a9ed956297c37f9abd522b0d11f3e77d41356fc Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 28 Aug 2023 22:46:37 +0800 Subject: [PATCH 152/241] LoongArch: Remove shm_align_mask and use SHMLBA instead Both shm_align_mask and SHMLBA want to avoid cache alias. But they are inconsistent: shm_align_mask is (PAGE_SIZE - 1) while SHMLBA is SZ_64K, but PAGE_SIZE is not always equal to SZ_64K. This may cause problems when shmat() twice. Fix this problem by removing shm_align_mask and using SHMLBA (strictly SHMLBA - 1) instead. Change-Id: I9accfefa6d9d6a8f2ee1f80ba2b6c9aa825139b2 Reported-by: Jiantao Shan Signed-off-by: Huacai Chen --- arch/loongarch/mm/cache.c | 1 - arch/loongarch/mm/mmap.c | 13 ++++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c index d68423998210..51899ad6bf68 100644 --- a/arch/loongarch/mm/cache.c +++ b/arch/loongarch/mm/cache.c @@ -153,5 +153,4 @@ void cpu_cache_init(void) current_cpu_data.cache_leaves_present = leaf; current_cpu_data.options |= LOONGARCH_CPU_PREFETCH; - shm_align_mask = PAGE_SIZE - 1; } diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index ad0bd39100ce..6e6ac5c21414 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -8,12 +8,11 @@ #include #include -unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ -EXPORT_SYMBOL(shm_align_mask); +#define SHM_ALIGN_MASK (SHMLBA - 1) -#define COLOUR_ALIGN(addr, pgoff) \ - ((((addr) + shm_align_mask) & ~shm_align_mask) + \ - (((pgoff) << PAGE_SHIFT) & shm_align_mask)) +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \ + + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK)) enum mmap_allocation_direction {UP, DOWN}; @@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, * cache aliasing constraints. */ if ((flags & MAP_SHARED) && - ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK)) return -EINVAL; return addr; } @@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.length = len; - info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0; info.align_offset = pgoff << PAGE_SHIFT; if (dir == DOWN) { -- Gitee From 05c4958d510adec487b4528a2972043854fe76cc Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 12 Jul 2023 11:16:21 +0800 Subject: [PATCH 153/241] LoongArch: Code improvements in function pcpu_populate_pte() Do some code improvements in function pcpu_populate_pte(): 1. Add memory allocation failure handling; 2. Replace pgd_populate() with p4d_populate(), it will be useful if there are four-level page tables. Change-Id: Ibd45e0583e195ed73c737a5b2ae5547758e1358b Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/numa.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c index d875aeded754..5f3a28534617 100644 --- a/arch/loongarch/loongson64/numa.c +++ b/arch/loongarch/loongson64/numa.c @@ -80,32 +80,34 @@ static void __init pcpu_populate_pte(unsigned long addr) pmd_t *pmd; if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); + pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pud) + panic("%s: Failed to allocate memory\n", __func__); + p4d_populate(&init_mm, p4d, pud); #ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); #endif } pud = pud_offset(p4d, addr); if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); + pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pmd) + panic("%s: Failed to allocate memory\n", __func__); + pud_populate(&init_mm, pud, pmd); #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); #endif } pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) { - pte_t *new; + pte_t *pte; - new = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate memory\n", __func__); + pmd_populate_kernel(&init_mm, pmd, pte); } } -- Gitee From 9eec76110498279aa81becf8f4d381a6c5aafe8b Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 12 Jul 2023 11:16:22 +0800 Subject: [PATCH 154/241] LoongArch: mm: Introduce unified function populate_kernel_pte() Function pcpu_populate_pte() and fixmap_pte() are similar, they populate one page from kernel address space. And there is confusion between pgd and p4d in the function fixmap_pte(), such as pgd_none() always returns zero. This patch introduces a unified function populate_kernel_pte() and then replaces pcpu_populate_pte() and fixmap_pte(). Change-Id: I8e492c9cc2319e5e15e62e07233266134a13371f Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgalloc.h | 1 + arch/loongarch/loongson64/numa.c | 36 +---------------------- arch/loongarch/mm/init.c | 43 ++++++++++++++-------------- 3 files changed, 23 insertions(+), 57 deletions(-) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index e94de7f25366..146a61e6f4a1 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -101,4 +101,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) #endif /* __PAGETABLE_PUD_FOLDED */ +extern pte_t * __init populate_kernel_pte(unsigned long addr); #endif /* _ASM_PGALLOC_H */ diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c index 5f3a28534617..74b1ec696771 100644 --- a/arch/loongarch/loongson64/numa.c +++ b/arch/loongarch/loongson64/numa.c @@ -74,41 +74,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size) static void __init pcpu_populate_pte(unsigned long addr) { - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d = p4d_offset(pgd, addr); - pud_t *pud; - pmd_t *pmd; - - if (p4d_none(*p4d)) { - pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!pud) - panic("%s: Failed to allocate memory\n", __func__); - p4d_populate(&init_mm, p4d, pud); -#ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); -#endif - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!pmd) - panic("%s: Failed to allocate memory\n", __func__); - pud_populate(&init_mm, pud, pmd); -#ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); -#endif - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *pte; - - pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - if (!pte) - panic("%s: Failed to allocate memory\n", __func__); - pmd_populate_kernel(&init_mm, pmd, pte); - } + populate_kernel_pte(addr); } void __init setup_per_cpu_areas(void) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 69449219e2a9..d8f939791116 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -338,43 +338,42 @@ void vmemmap_free(unsigned long start, unsigned long end, } #endif -static pte_t *fixmap_pte(unsigned long addr) +pte_t * __init populate_kernel_pte(unsigned long addr) { - pgd_t *pgd; - p4d_t *p4d; + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud; pmd_t *pmd; - pgd = pgd_offset_k(addr); - p4d = p4d_offset(pgd, addr); - - if (pgd_none(*pgd)) { - pud_t *new __maybe_unused; - - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pgd_populate(&init_mm, pgd, new); + if (p4d_none(*p4d)) { + pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pud) + panic("%s: Failed to allocate memory\n", __func__); + p4d_populate(&init_mm, p4d, pud); #ifndef __PAGETABLE_PUD_FOLDED - pud_init((unsigned long)new, (unsigned long)invalid_pmd_table); + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); #endif } pud = pud_offset(p4d, addr); if (pud_none(*pud)) { - pmd_t *new __maybe_unused; - - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pud_populate(&init_mm, pud, new); + pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pmd) + panic("%s: Failed to allocate memory\n", __func__); + pud_populate(&init_mm, pud, pmd); #ifndef __PAGETABLE_PMD_FOLDED - pmd_init((unsigned long)new, (unsigned long)invalid_pte_table); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); #endif } pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - pte_t *new __maybe_unused; + if (!pmd_present(*pmd)) { + pte_t *pte; - new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); - pmd_populate_kernel(&init_mm, pmd, new); + pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate memory\n", __func__); + pmd_populate_kernel(&init_mm, pmd, pte); } return pte_offset_kernel(pmd, addr); @@ -388,7 +387,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); - ptep = fixmap_pte(addr); + ptep = populate_kernel_pte(addr); if (!pte_none(*ptep)) { pte_ERROR(*ptep); return; -- Gitee From 80df1405fb0bb54b0ef8e530a7a26c6be0dace4c Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 5 Sep 2023 16:01:46 +0800 Subject: [PATCH 155/241] LoongArch: Use static defined zero page rather than allocated On LoongArch system, there is only one page needed for zero page (no cache synonyms), and there is no COLOR_ZERO_PAGE, so zero_page_mask is useless and the macro __HAVE_COLOR_ZERO_PAGE is not necessary. Like other popular architectures, It is simpler to define the zero page in kernel BSS code segment rather than dynamically allocate. Change-Id: I61014d275828d8ce3b8c76666b42c7613fd1414f Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/mmzone.h | 2 -- arch/loongarch/include/asm/pgtable.h | 7 ++----- arch/loongarch/loongson64/numa.c | 1 - arch/loongarch/mm/init.c | 28 +--------------------------- 4 files changed, 3 insertions(+), 35 deletions(-) diff --git a/arch/loongarch/include/asm/mmzone.h b/arch/loongarch/include/asm/mmzone.h index db106f92dd9f..1be0073189fd 100644 --- a/arch/loongarch/include/asm/mmzone.h +++ b/arch/loongarch/include/asm/mmzone.h @@ -13,6 +13,4 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[(nid)]) -extern void setup_zero_pages(void); - #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 583278ce9a8e..7b37b0da6f10 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -18,12 +18,9 @@ struct vm_area_struct; * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page; -extern unsigned long zero_page_mask; +extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) \ - (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask)))) -#define __HAVE_COLOR_ZERO_PAGE +#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) extern void paging_init(void); diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c index 74b1ec696771..3ad927a46d5b 100644 --- a/arch/loongarch/loongson64/numa.c +++ b/arch/loongarch/loongson64/numa.c @@ -432,7 +432,6 @@ void __init mem_init(void) { high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); memblock_free_all(); - setup_zero_pages(); /* This comes from node 0 */ mem_init_print_info(NULL); } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index d8f939791116..151144642dd8 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -37,33 +37,8 @@ #include #include -/* - * We have up to 8 empty zeroed pages so we can map one of the right colour - * when needed. Since page is never written to after the initialization we - * don't have to care about aliases on other CPUs. - */ -unsigned long empty_zero_page, zero_page_mask; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(zero_page_mask); - -void setup_zero_pages(void) -{ - unsigned int order, i; - struct page *page; - - order = 0; - - empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!empty_zero_page) - panic("Oh boy, that early out of memory?"); - - page = virt_to_page((void *)empty_zero_page); - split_page(page, order); - for (i = 0; i < (1 << order); i++, page++) - mark_page_reserved(page); - - zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; -} void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) @@ -108,7 +83,6 @@ void __init mem_init(void) high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); - setup_zero_pages(); /* Setup zeroed pages. */ mem_init_print_info(NULL); } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ -- Gitee From ed39df67f26dd15cdb697e10f8024f9851b61de9 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 30 Aug 2023 11:00:30 +0800 Subject: [PATCH 156/241] LoongArch: Define symbol 'fault' as a local label in fpu.S The initial aim is to silence the following objtool warnings: arch/loongarch/kernel/fpu.o: warning: objtool: _save_fp_context() falls through to next function fault() arch/loongarch/kernel/fpu.o: warning: objtool: _restore_fp_context() falls through to next function fault() arch/loongarch/kernel/fpu.o: warning: objtool: _save_lsx_context() falls through to next function fault() arch/loongarch/kernel/fpu.o: warning: objtool: _restore_lsx_context() falls through to next function fault() arch/loongarch/kernel/fpu.o: warning: objtool: _save_lasx_context() falls through to next function fault() arch/loongarch/kernel/fpu.o: warning: objtool: _restore_lasx_context() falls through to next function fault() Currently, SYM_FUNC_START()/SYM_FUNC_END() defines the symbol 'fault' as SYM_T_FUNC which is STT_FUNC, the objtool warnings are generated through the following code: tools/objtool/include/objtool/check.h: static inline struct symbol *insn_func(struct instruction *insn) { struct symbol *sym = insn->sym; if (sym && sym->type != STT_FUNC) sym = NULL; return sym; } tools/objtool/check.c: static int validate_branch(struct objtool_file *file, struct symbol *func, struct instruction *insn, struct insn_state state) { ... if (func && insn_func(insn) && func != insn_func(insn)->pfunc) { ... WARN("%s() falls through to next function %s()", func->name, insn_func(insn)->name); return 1; } ... } We can see that the fixup can be a local label in the following code: arch/loongarch/include/asm/asm-extable.h: .pushsection __ex_table, "a"; \ .balign 4; \ .long ((insn) - .); \ .long ((fixup) - .); \ .short (type); \ .short (data); \ .popsection; .macro _asm_extable, insn, fixup __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm Like arch/loongarch/lib/*.S, just define the symbol 'fault' as a local label in fpu.S. Before: $ readelf -s arch/loongarch/kernel/fpu.o | awk -F: /fault/'{print $2}' 000000000000053c 8 FUNC GLOBAL DEFAULT 1 fault After: $ readelf -s arch/loongarch/kernel/fpu.o | awk -F: /fault/'{print $2}' 000000000000053c 0 NOTYPE LOCAL DEFAULT 1 .L_fault Change-Id: Icb3ed79a39c54db2737260bac390a8dbd9e0b164 Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/fpu.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 078f2f7aae4a..653a785caa60 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -25,7 +25,7 @@ .macro EX insn, reg, src, offs .ex\@: \insn \reg, \src, \offs - _asm_extable .ex\@, fault + _asm_extable .ex\@, .L_fault .endm .macro EX_V insn, reg, src, offs @@ -35,7 +35,7 @@ parse_vr __reg, \reg .ex\@: .word __insn << 22 | __offs << 10 | __src << 5 | __reg - _asm_extable .ex\@, fault + _asm_extable .ex\@, .L_fault .endm .macro EX_XV insn, reg, src, offs @@ -45,7 +45,7 @@ parse_xr __reg, \reg .ex\@: .word __insn << 22 | __offs << 10 | __src << 5 | __reg - _asm_extable .ex\@, fault + _asm_extable .ex\@, .L_fault .endm .macro sc_save_fp base @@ -529,7 +529,6 @@ SYM_FUNC_START(_restore_lasx_context) jirl zero, ra, 0 SYM_FUNC_END(_restore_lasx_context) -SYM_FUNC_START(fault) +.L_fault: li.w a0, -EFAULT # failure jirl zero, ra, 0 -SYM_FUNC_END(fault) -- Gitee From fbe3a6c04e8052503f45165c1736335e5b4c88cd Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sat, 16 Sep 2023 17:23:30 +0800 Subject: [PATCH 157/241] LoongArch: Fix some build warnings with W=1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some building warnings when building LoongArch kernel with W=1 as following, this patch fixes them. arch/loongarch/kernel/acpi.c:284:13: warning: no previous prototype for ‘acpi_numa_arch_fixup’ [-Wmissing-prototypes] 284 | void __init acpi_numa_arch_fixup(void) {} | ^~~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/time.c:32:13: warning: no previous prototype for ‘constant_timer_interrupt’ [-Wmissing-prototypes] 32 | irqreturn_t constant_timer_interrupt(int irq, void *data) | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/traps.c:496:25: warning: no previous prototype for 'do_fpe' [-Wmissing-prototypes] 496 | asmlinkage void noinstr do_fpe(struct pt_regs *regs | ^~~~~~ arch/loongarch/kernel/traps.c:813:22: warning: variable ‘opcode’ set but not used [-Wunused-but-set-variable] 813 | unsigned int opcode; | ^~~~~~ arch/loongarch/kernel/syscall.c:21:40: warning: initialized field overwritten [-Woverride-init] 21 | #define __SYSCALL(nr, call) [nr] = (call), | ^ arch/loongarch/kernel/syscall.c:40:14: warning: no previous prototype for ‘do_syscall’ [-Wmissing-prototypes] 40 | void noinstr do_syscall(struct pt_regs *regs) | ^~~~~~~~~~ arch/loongarch/kernel/smp.c:502:17: warning: no previous prototype for ‘start_secondary’ [-Wmissing-prototypes] 502 | asmlinkage void start_secondary(void) | ^~~~~~~~~~~~~~~ arch/loongarch/kernel/process.c:309:15: warning: no previous prototype for ‘arch_align_stack’ [-Wmissing-prototypes] 309 | unsigned long arch_align_stack(unsigned long sp) | ^~~~~~~~~~~~~~~~ arch/loongarch/kernel/topology.c:13:5: warning: no previous prototype for ‘arch_register_cpu’ [-Wmissing-prototypes] 13 | int arch_register_cpu(int cpu) | ^~~~~~~~~~~~~~~~~ arch/loongarch/kernel/topology.c:27:6: warning: no previous prototype for ‘arch_unregister_cpu’ [-Wmissing-prototypes] 27 | void arch_unregister_cpu(int cpu) | ^~~~~~~~~~~~~~~~~~~ arch/loongarch/kernel/module-sections.c:103:5: warning: no previous prototype for ‘module_frob_arch_sections’ [-Wmissing-prototypes] 103 | int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/loongarch/mm/hugetlbpage.c:56:5: warning: no previous prototype for ‘is_aligned_hugepage_range’ [-Wmissing-prototypes] 56 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | ^~~~~~~~~~~~~~~~~~~~~~~~~ Change-Id: I4b55b3579f0f07cea48618d1899e4b101838a854 Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/exception.h | 45 +++++++++++++++++++++++++ arch/loongarch/include/asm/smp.h | 1 + arch/loongarch/kernel/Makefile | 4 +++ arch/loongarch/kernel/acpi.c | 1 - arch/loongarch/kernel/module-sections.c | 1 + arch/loongarch/kernel/process.c | 1 + arch/loongarch/kernel/signal.c | 3 +- arch/loongarch/kernel/smp.c | 3 ++ arch/loongarch/kernel/syscall.c | 1 + arch/loongarch/kernel/time.c | 2 +- arch/loongarch/kernel/topology.c | 3 ++ arch/loongarch/kernel/traps.c | 25 +++----------- arch/loongarch/mm/fault.c | 2 +- arch/loongarch/mm/hugetlbpage.c | 12 ------- arch/loongarch/mm/ioremap.c | 1 + arch/loongarch/mm/tlb.c | 2 +- 16 files changed, 70 insertions(+), 37 deletions(-) create mode 100644 arch/loongarch/include/asm/exception.h diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h new file mode 100644 index 000000000000..af74a3fdcad1 --- /dev/null +++ b/arch/loongarch/include/asm/exception.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_EXCEPTION_H +#define __ASM_EXCEPTION_H + +#include +#include + +void show_registers(struct pt_regs *regs); + +asmlinkage void cache_parity_error(void); +asmlinkage void noinstr do_ade(struct pt_regs *regs); +asmlinkage void noinstr do_ale(struct pt_regs *regs); +asmlinkage void noinstr do_bce(struct pt_regs *regs); +asmlinkage void noinstr do_bp(struct pt_regs *regs); +asmlinkage void noinstr do_ri(struct pt_regs *regs); +asmlinkage void noinstr do_fpu(struct pt_regs *regs); +asmlinkage void noinstr do_fpe(struct pt_regs *regs, unsigned long fcsr); +asmlinkage void noinstr do_lsx(struct pt_regs *regs); +asmlinkage void noinstr do_lasx(struct pt_regs *regs); +asmlinkage void noinstr do_lbt(struct pt_regs *regs); +asmlinkage void noinstr do_watch(struct pt_regs *regs); +asmlinkage void noinstr do_syscall(struct pt_regs *regs); +asmlinkage void noinstr do_reserved(struct pt_regs *regs); +asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp); +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address); + +asmlinkage void handle_ade(void); +asmlinkage void handle_ale(void); +asmlinkage void handle_bce(void); +asmlinkage void handle_sys(void); +asmlinkage void handle_bp(void); +asmlinkage void handle_ri(void); +asmlinkage void handle_fpu(void); +asmlinkage void handle_fpe(void); +asmlinkage void handle_lsx(void); +asmlinkage void handle_lasx(void); +asmlinkage void handle_lbt(void); +asmlinkage void handle_watch(void); +asmlinkage void handle_reserved(void); +asmlinkage void handle_vint(void); +asmlinkage void noinstr handle_loongarch_irq(struct pt_regs *regs); + +#endif /* __ASM_EXCEPTION_H */ diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index bafbdd305378..9cba390dfdfe 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -95,6 +95,7 @@ struct secondary_data { extern struct secondary_data cpuboot_data; extern asmlinkage void smpboot_entry(void); +extern asmlinkage void start_secondary(void); extern void calculate_cpu_foreign_map(void); diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 07c76dc298b1..18d04b7fe2a9 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -15,6 +15,10 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o +CFLAGS_module.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) +CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 1263019fc125..994b225f7494 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -435,7 +435,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) pxm, pa->apic_id, node); } -void __init acpi_numa_arch_fixup(void) {} #endif void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 13d9a427325a..9af39d43ba9e 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -6,6 +6,7 @@ #include #include #include +#include Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val) { diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 49d2e825f29b..0ef110c12609 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 2fd6dc72e1c2..757c0ee61e92 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -694,7 +695,7 @@ void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { int sig; sigset_t set; diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 9e6e41944735..a1648d7addf8 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -264,11 +265,13 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) return 0; } +#ifdef CONFIG_PROFILING /* Not really SMP stuff ... */ int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif static void flush_tlb_all_ipi(void *info) { diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c index 105c370a8c22..58454baa1d59 100644 --- a/arch/loongarch/kernel/syscall.c +++ b/arch/loongarch/kernel/syscall.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 8331b937503b..60f4d4ed52da 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -34,7 +34,7 @@ static void constant_event_handler(struct clock_event_device *dev) { } -irqreturn_t constant_timer_interrupt(int irq, void *data) +static irqreturn_t constant_timer_interrupt(int irq, void *data) { int cpu = smp_processor_id(); struct clock_event_device *cd; diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index ae5f0e5414a3..d5ea7a08487e 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -6,6 +7,8 @@ #include #include +#include + static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 58d55cee8fcd..53b3e51476c9 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -36,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -52,21 +52,6 @@ #include "access-helper.h" -extern asmlinkage void handle_ade(void); -extern asmlinkage void handle_ale(void); -extern asmlinkage void handle_bce(void); -extern asmlinkage void handle_sys(void); -extern asmlinkage void handle_bp(void); -extern asmlinkage void handle_ri(void); -extern asmlinkage void handle_fpu(void); -extern asmlinkage void handle_fpe(void); -extern asmlinkage void handle_lbt(void); -extern asmlinkage void handle_lsx(void); -extern asmlinkage void handle_lasx(void); -extern asmlinkage void handle_reserved(void); -extern asmlinkage void handle_watch(void); -extern asmlinkage void handle_vint(void); - static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -442,8 +427,8 @@ static inline void setup_vint_size(unsigned int size) * happen together with Overflow or Underflow, and `ptrace' can set * any bits. */ -void force_fcsr_sig(unsigned long fcsr, void __user *fault_addr, - struct task_struct *tsk) +static void force_fcsr_sig(unsigned long fcsr, + void __user *fault_addr, struct task_struct *tsk) { int si_code = FPE_FLTUNK; @@ -461,7 +446,7 @@ void force_fcsr_sig(unsigned long fcsr, void __user *fault_addr, force_sig_fault(SIGFPE, si_code, fault_addr); } -int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcsr) +static int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcsr) { int si_code; struct vm_area_struct *vma; @@ -809,7 +794,7 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs) asmlinkage void noinstr do_ri(struct pt_regs *regs) { int status = SIGILL; - unsigned int opcode = 0; + unsigned int __maybe_unused opcode; unsigned int __user *era = (unsigned int __user *)exception_era(regs); irqentry_state_t state = irqentry_enter(regs); diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 605579b19a00..f7485d93eb88 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -20,11 +20,11 @@ #include #include #include -#include #include #include #include +#include #include #include diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index bfc6f4fd2f98..f214b0bfc1b4 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -50,18 +50,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return (pte_t *) pmd; } -/* - * This function checks for proper alignment of input addr and len parameters. - */ -int is_aligned_hugepage_range(unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - return 0; -} - int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index ed732b96d0a5..c635a6afc6f7 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -8,6 +8,7 @@ */ #include +#include void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) { diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index dc0c9bb6f252..55d69c42adaf 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -254,7 +254,7 @@ static unsigned long pcpu_handlers[NR_CPUS]; #endif extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; -void setup_tlb_handler(int cpu) +static void setup_tlb_handler(int cpu) { setup_ptwalker(); local_flush_tlb_all(); -- Gitee From 432eb8de17854d22f718a11f430674dace319aba Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 14 Sep 2023 13:34:45 +0300 Subject: [PATCH 158/241] LoongArch: Use _UL() and _ULL() Use _UL() and _ULL() that are provided by const.h. Change-Id: Ie21b57c67ebe3f5b43e1fc135c7c77db4e0b06a4 Signed-off-by: Andy Shevchenko Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/addrspace.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index 6132318e428e..78d620804f72 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -14,7 +14,7 @@ */ #ifndef __ASSEMBLY__ #ifndef PHYS_OFFSET -#define PHYS_OFFSET _AC(0, UL) +#define PHYS_OFFSET _UL(0) #endif extern unsigned long vm_map_base; #endif /* __ASSEMBLY__ */ @@ -38,7 +38,7 @@ extern unsigned long vm_map_base; * Memory above this physical address will be considered highmem. */ #ifndef HIGHMEM_START -#define HIGHMEM_START (_AC(1, UL) << _AC(DMW_PABITS, UL)) +#define HIGHMEM_START (_UL(1) << _UL(DMW_PABITS)) #endif #define TO_PHYS(x) ( ((x) & TO_PHYS_MASK)) @@ -60,16 +60,16 @@ extern unsigned long vm_map_base; #define _ATYPE_ #define _ATYPE32_ #define _ATYPE64_ -#define _CONST64_(x) x #else #define _ATYPE_ __PTRDIFF_TYPE__ #define _ATYPE32_ int #define _ATYPE64_ __s64 +#endif + #ifdef CONFIG_64BIT -#define _CONST64_(x) x ## UL +#define _CONST64_(x) _UL(x) #else -#define _CONST64_(x) x ## ULL -#endif +#define _CONST64_(x) _ULL(x) #endif /* -- Gitee From 499a7c423e42155735928a9533e31963e52d7774 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 19 Sep 2023 17:07:08 +0800 Subject: [PATCH 159/241] LoongArch: Remove dead code in relocate_new_kernel The initial aim is to silence the following objtool warning: arch/loongarch/kernel/relocate_kernel.o: warning: objtool: relocate_new_kernel+0x74: unreachable instruction There are two adjacent "b" instructions, the second one is unreachable, it is dead code, just remove it. Change-Id: I480be293eca15027abe7a4cf5ff0088a850897b6 Co-developed-by: Jinyang He Signed-off-by: Jinyang He Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate_kernel.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S index 3a4cf7426891..358f0b0f38ba 100644 --- a/arch/loongarch/kernel/relocate_kernel.S +++ b/arch/loongarch/kernel/relocate_kernel.S @@ -73,7 +73,6 @@ copy_word: LONG_ADDI s5, s5, -1 beqz s5, process_entry b copy_word - b process_entry done: ibar 0 -- Gitee From ecb6137611ce1b3286cbc1385a2cc751bfc47f2f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 11 Sep 2023 16:21:17 +0800 Subject: [PATCH 160/241] LoongArch: Set all reserved memblocks on Node#0 at initialization After commit 61167ad5fecdea ("mm: pass nid to reserve_bootmem_region()") we get a panic if DEFERRED_STRUCT_PAGE_INIT is enabled: [ 0.000000] CPU 0 Unable to handle kernel paging request at virtual address 0000000000002b82, era == 90000000040e3f28, ra == 90000000040e3f18 [ 0.000000] Oops[#1]: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.5.0+ #733 [ 0.000000] pc 90000000040e3f28 ra 90000000040e3f18 tp 90000000046f4000 sp 90000000046f7c90 [ 0.000000] a0 0000000000000001 a1 0000000000200000 a2 0000000000000040 a3 90000000046f7ca0 [ 0.000000] a4 90000000046f7ca4 a5 0000000000000000 a6 90000000046f7c38 a7 0000000000000000 [ 0.000000] t0 0000000000000002 t1 9000000004b00ac8 t2 90000000040e3f18 t3 90000000040f0800 [ 0.000000] t4 00000000000f0000 t5 80000000ffffe07e t6 0000000000000003 t7 900000047fff5e20 [ 0.000000] t8 aaaaaaaaaaaaaaab u0 0000000000000018 s9 0000000000000000 s0 fffffefffe000000 [ 0.000000] s1 0000000000000000 s2 0000000000000080 s3 0000000000000040 s4 0000000000000000 [ 0.000000] s5 0000000000000000 s6 fffffefffe000000 s7 900000000470b740 s8 9000000004ad4000 [ 0.000000] ra: 90000000040e3f18 reserve_bootmem_region+0xec/0x21c [ 0.000000] ERA: 90000000040e3f28 reserve_bootmem_region+0xfc/0x21c [ 0.000000] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 0.000000] PRMD: 00000000 (PPLV0 -PIE -PWE) [ 0.000000] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 0.000000] ECFG: 00070800 (LIE=11 VS=7) [ 0.000000] ESTAT: 00010800 [PIL] (IS=11 ECode=1 EsubCode=0) [ 0.000000] BADV: 0000000000002b82 [ 0.000000] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000) [ 0.000000] Modules linked in: [ 0.000000] Process swapper (pid: 0, threadinfo=(____ptrval____), task=(____ptrval____)) [ 0.000000] Stack : 0000000000000000 9000000002eb5430 0000003a00000020 90000000045ccd00 [ 0.000000] 900000000470e000 90000000002c1918 0000000000000000 9000000004110780 [ 0.000000] 00000000fe6c0000 0000000480000000 9000000004b4e368 9000000004110748 [ 0.000000] 0000000000000000 900000000421ca84 9000000004620000 9000000004564970 [ 0.000000] 90000000046f7d78 9000000002cc9f70 90000000002c1918 900000000470e000 [ 0.000000] 9000000004564970 90000000040bc0e0 90000000046f7d78 0000000000000000 [ 0.000000] 0000000000004000 90000000045ccd00 0000000000000000 90000000002c1918 [ 0.000000] 90000000002c1900 900000000470b700 9000000004b4df78 9000000004620000 [ 0.000000] 90000000046200a8 90000000046200a8 0000000000000000 9000000004218b2c [ 0.000000] 9000000004270008 0000000000000001 0000000000000000 90000000045ccd00 [ 0.000000] ... [ 0.000000] Call Trace: [ 0.000000] [<90000000040e3f28>] reserve_bootmem_region+0xfc/0x21c [ 0.000000] [<900000000421ca84>] memblock_free_all+0x114/0x350 [ 0.000000] [<9000000004218b2c>] mm_core_init+0x138/0x3cc [ 0.000000] [<9000000004200e38>] start_kernel+0x488/0x7a4 [ 0.000000] [<90000000040df0d8>] kernel_entry+0xd8/0xdc [ 0.000000] [ 0.000000] Code: 02eb21ad 00410f4c 380c31ac <262b818d> 6800b70d 02c1c196 0015001c 57fe4bb1 260002cd The reason is early memblock_reserve() in memblock_init() set node id to MAX_NUMNODES, making NODE_DATA(nid) a NULL dereference in the call chain reserve_bootmem_region() -> init_reserved_page(). After memblock_init(), those late calls of memblock_reserve() operate on subregions of memblock .memory regions. As a result, these reserved regions will be set to the correct node at the first iteration of memmap_init_reserved_pages(). So set all reserved memblocks on Node#0 at initialization can avoid this panic. Change-Id: I4d9d511131f7fba3ed7d31a2646d032166ba356b Reported-by: WANG Xuerui Tested-by: WANG Xuerui Reviewed-by: WANG Xuerui # with nits addressed Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/loongson64/mem.c b/arch/loongarch/loongson64/mem.c index 35a23146252e..44bd3c184dd6 100644 --- a/arch/loongarch/loongson64/mem.c +++ b/arch/loongarch/loongson64/mem.c @@ -51,7 +51,6 @@ void __init memblock_init(void) } memblock_set_current_limit(PFN_PHYS(max_low_pfn)); - memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); /* Reserve the first 2MB */ memblock_reserve(PHYS_OFFSET, 0x200000); @@ -59,4 +58,7 @@ void __init memblock_init(void) /* Reserve the kernel text/data/bss */ memblock_reserve(__pa_symbol(&_text), __pa_symbol(&_end) - __pa_symbol(&_text)); + + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0); + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.reserved, 0); } -- Gitee From b5840f3f97c00ff531b274c74de257891a8179e5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 26 Sep 2023 19:53:49 +0800 Subject: [PATCH 161/241] LoongArch: numa: Fix high_memory calculation For 64bit kernel without HIGHMEM, high_memory is the virtual address of the highest physical address in the system. But __va(get_num_physpages() << PAGE_SHIFT) is not what we want for high_memory because there may be holes in the physical address space. On the other hand, max_low_pfn is calculated from memblock_end_of_DRAM(), which is exactly corresponding to the highest physical address, so use it for high_memory calculation. Change-Id: I0138693006556e73ab00ac52306f98c527406ce7 Cc: Signed-off-by: Chong Qiao Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/loongson64/numa.c b/arch/loongarch/loongson64/numa.c index 3ad927a46d5b..5abf1925ed09 100644 --- a/arch/loongarch/loongson64/numa.c +++ b/arch/loongarch/loongson64/numa.c @@ -430,7 +430,7 @@ void __init paging_init(void) void __init mem_init(void) { - high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT); + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); memblock_free_all(); mem_init_print_info(NULL); } -- Gitee From b69840f4dfa5ab1128a1d33ddf5a1543667fba03 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 26 Sep 2023 16:10:50 +0800 Subject: [PATCH 162/241] LoongArch: Define relocation types for ABI v2.10 The relocation types from 101 to 109 are used by GNU binutils >= 2.41, add their definitions to use them in later patches. Change-Id: Ie674af13f91249ab234cedd2897e98a997b39ea6 Link: https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=include/elf/loongarch.h#l230 Cc: Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 9 +++++++++ arch/loongarch/kernel/module.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 9107fa0dcca3..97427d4fa531 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -116,6 +116,15 @@ #define R_LARCH_TLS_GD_HI20 98 #define R_LARCH_32_PCREL 99 #define R_LARCH_RELAX 100 +#define R_LARCH_DELETE 101 +#define R_LARCH_ALIGN 102 +#define R_LARCH_PCREL20_S2 103 +#define R_LARCH_CFA 104 +#define R_LARCH_ADD6 105 +#define R_LARCH_SUB6 106 +#define R_LARCH_ADD_ULEB128 107 +#define R_LARCH_SUB_ULEB128 108 +#define R_LARCH_64_PCREL 109 #ifndef ELF_ARCH diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index fcb7881ef9d2..4eefd5d2de87 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -389,7 +389,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v, /* The handlers for known reloc types */ static reloc_rela_handler reloc_rela_handlers[] = { - [R_LARCH_NONE ... R_LARCH_RELAX] = apply_r_larch_error, + [R_LARCH_NONE ... R_LARCH_64_PCREL] = apply_r_larch_error, [R_LARCH_NONE] = apply_r_larch_none, [R_LARCH_32] = apply_r_larch_32, -- Gitee From fd57faace1841588139654f3135d0bfcffb7995d Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 26 Sep 2023 16:10:51 +0800 Subject: [PATCH 163/241] LoongArch: Add support for 32_PCREL relocation type When build and update kernel with the latest upstream binutils and loongson3_defconfig, module loader fails with: kmod: zsmalloc: Unsupport relocation type 99, please add its support. kmod: fuse: Unsupport relocation type 99, please add its support. kmod: ipmi_msghandler: Unsupport relocation type 99, please add its support. kmod: ipmi_msghandler: Unsupport relocation type 99, please add its support. kmod: pstore: Unsupport relocation type 99, please add its support. kmod: drm_display_helper: Unsupport relocation type 99, please add its support. kmod: drm_display_helper: Unsupport relocation type 99, please add its support. kmod: drm_display_helper: Unsupport relocation type 99, please add its support. kmod: fuse: Unsupport relocation type 99, please add its support. kmod: fat: Unsupport relocation type 99, please add its support. This is because the latest upstream binutils replaces a pair of ADD32 and SUB32 with 32_PCREL, so add support for 32_PCREL relocation type. Change-Id: I1076b17943b1443505a86d1bc6376130fd48753a Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=ecb802d02eeb Cc: Co-developed-by: Youling Tang Signed-off-by: Youling Tang Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 4eefd5d2de87..7b70d27a5e34 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -374,6 +374,15 @@ static int apply_r_larch_got_pc(struct module *mod, return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type); } +static int apply_r_larch_32_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + + *(u32 *)location = offset; + return 0; +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -403,6 +412,7 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field, [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, + [R_LARCH_32_PCREL] = apply_r_larch_32_pcrel, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- Gitee From 99aa03ed8f7824333e1d46757a9eb67eec09dc7a Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 26 Sep 2023 16:10:52 +0800 Subject: [PATCH 164/241] LoongArch: Add support for 64_PCREL relocation type When build and update kernel with the latest upstream binutils and loongson3_defconfig, module loader fails with: kmod: zsmalloc: Unknown relocation type 109 kmod: fuse: Unknown relocation type 109 kmod: fuse: Unknown relocation type 109 kmod: radeon: Unknown relocation type 109 kmod: nf_tables: Unknown relocation type 109 kmod: nf_tables: Unknown relocation type 109 This is because the latest upstream binutils replaces a pair of ADD64 and SUB64 with 64_PCREL, so add support for 64_PCREL relocation type. Change-Id: Ib03464a49ed8d33c184a6a22d903c689ef1da7a5 Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=ecb802d02eeb Cc: Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/module.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 7b70d27a5e34..492200917a7a 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -383,6 +383,15 @@ static int apply_r_larch_32_pcrel(struct module *mod, u32 *location, Elf_Addr v, return 0; } +static int apply_r_larch_64_pcrel(struct module *mod, u32 *location, Elf_Addr v, + s64 *rela_stack, size_t *rela_stack_top, unsigned int type) +{ + ptrdiff_t offset = (void *)v - (void *)location; + + *(u64 *)location = offset; + return 0; +} + /* * reloc_handlers_rela() - Apply a particular relocation to a module * @mod: the module to apply the reloc to @@ -413,6 +422,7 @@ static reloc_rela_handler reloc_rela_handlers[] = { [R_LARCH_ADD32 ... R_LARCH_SUB64] = apply_r_larch_add_sub, [R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12] = apply_r_larch_pcala, [R_LARCH_32_PCREL] = apply_r_larch_32_pcrel, + [R_LARCH_64_PCREL] = apply_r_larch_64_pcrel, }; int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- Gitee From 180120baff52dda67150102dd7d6c227c873f912 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 5 Jun 2023 20:33:30 +0800 Subject: [PATCH 165/241] LoongArch: Add support to clone a time namespace We can see that "Time namespaces are not supported" on LoongArch: (1) clone3 test # cd tools/testing/selftests/clone3 && make && ./clone3 ... # Time namespaces are not supported ok 18 # SKIP Skipping clone3() with CLONE_NEWTIME # Totals: pass:17 fail:0 xfail:0 xpass:0 skip:1 error:0 (2) timens test # cd tools/testing/selftests/timens && make && ./timens ... 1..0 # SKIP Time namespaces are not supported On LoongArch the current kernel does not support CONFIG_TIME_NS which depends on GENERIC_VDSO_TIME_NS, select GENERIC_VDSO_TIME_NS to enable CONFIG_TIME_NS to build kernel/time/namespace.c. Additionally, it needs to define some arch-dependent functions for the timens, such as __arch_get_timens_vdso_data(), arch_get_vdso_data() and vdso_join_timens(). At the same time, modify the layout of vvar to use one page size for generic vdso data, expand another page size for timens vdso data and assign LOONGARCH_VDSO_DATA_SIZE (maybe exceeds a page size if expand in the future) for loongarch vdso data, at last add the callback function vvar_fault() and modify stack_top(). With this patch under CONFIG_TIME_NS: (1) clone3 test # cd tools/testing/selftests/clone3 && make && ./clone3 ... ok 18 [739] Result (0) matches expectation (0) # Totals: pass:18 fail:0 xfail:0 xpass:0 skip:0 error:0 (2) timens test # cd tools/testing/selftests/timens && make && ./timens ... # Totals: pass:10 fail:0 xfail:0 xpass:0 skip:0 error:0 Change-Id: I0260c4a9d69feef42d2577c08a1b7b55c48c39b7 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/page.h | 1 + .../loongarch/include/asm/vdso/gettimeofday.h | 8 +- arch/loongarch/include/asm/vdso/vdso.h | 32 ++++- arch/loongarch/kernel/process.c | 2 +- arch/loongarch/kernel/vdso.c | 122 +++++++++++++++--- arch/loongarch/vdso/vgetcpu.c | 2 +- 7 files changed, 144 insertions(+), 24 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 97126fcac790..99e7e022bd2d 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -71,6 +71,7 @@ config LOONGARCH select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_TIME_NS select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 64bb53444908..5931fb323627 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -80,6 +80,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) #ifdef CONFIG_FLATMEM diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index 5d2f7c5f0c03..e7a10d62d077 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -96,9 +96,15 @@ static inline bool loongarch_vdso_hres_capable(void) static __always_inline const struct vdso_data *__arch_get_vdso_data(void) { - return get_vdso_data(); + return (const struct vdso_data *)get_vdso_data(); } +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return (const struct vdso_data *)(get_vdso_data() + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE); +} +#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index f7dd21689cb3..567be88d0fa5 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -16,10 +16,33 @@ struct vdso_pcpu_data { struct loongarch_vdso_data { struct vdso_pcpu_data pdata[NR_CPUS]; - struct vdso_data data[CS_BASES]; /* Arch-independent data */ }; -#define VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data)) +/* + * The layout of vvar: + * + * high + * +---------------------+--------------------------+ + * | loongarch vdso data | LOONGARCH_VDSO_DATA_SIZE | + * +---------------------+--------------------------+ + * | time-ns vdso data | PAGE_SIZE | + * +---------------------+--------------------------+ + * | generic vdso data | PAGE_SIZE | + * +---------------------+--------------------------+ + * low + */ +#define LOONGARCH_VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data)) +#define LOONGARCH_VDSO_DATA_PAGES (LOONGARCH_VDSO_DATA_SIZE >> PAGE_SHIFT) + +enum vvar_pages { + VVAR_GENERIC_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_LOONGARCH_PAGES_START, + VVAR_LOONGARCH_PAGES_END = VVAR_LOONGARCH_PAGES_START + LOONGARCH_VDSO_DATA_PAGES - 1, + VVAR_NR_PAGES, +}; + +#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) static inline unsigned long get_vdso_base(void) { @@ -34,10 +57,9 @@ static inline unsigned long get_vdso_base(void) return addr; } -static inline const struct vdso_data *get_vdso_data(void) +static inline unsigned long get_vdso_data(void) { - return (const struct vdso_data *)(get_vdso_base() - - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS); + return get_vdso_base() - VVAR_SIZE; } #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 0ef110c12609..3702a80cf2e7 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -298,7 +298,7 @@ unsigned long stack_top(void) /* Space for the VDSO & data page */ top -= PAGE_ALIGN(current->thread.vdso->size); - top -= PAGE_SIZE; + top -= VVAR_SIZE; /* Space to randomize the VDSO base */ if (current->flags & PF_RANDOMIZE) diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 6f31bc071d22..0a7dafc2ad35 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -26,14 +27,21 @@ extern char vdso_start[], vdso_end[]; /* Kernel-provided data used by the VDSO. */ static union { - u8 page[VDSO_DATA_SIZE]; + u8 page[PAGE_SIZE]; + struct vdso_data data[CS_BASES]; +} generic_vdso_data __page_aligned_data; + +static union { + u8 page[LOONGARCH_VDSO_DATA_SIZE]; struct loongarch_vdso_data vdata; } loongarch_vdso_data __page_aligned_data; static struct page *vdso_pages[] = { NULL }; -struct vdso_data *vdso_data = loongarch_vdso_data.vdata.data; +struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; +static struct page *find_timens_vvar_page(struct vm_area_struct *vma); + static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { current->mm->context.vdso = (void *)(new_vma->vm_start); @@ -41,6 +49,43 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struc return 0; } +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + unsigned long pfn; + struct page *timens_page = find_timens_vvar_page(vma); + + switch (vmf->pgoff) { + case VVAR_GENERIC_PAGE_OFFSET: + if (!timens_page) + pfn = sym_to_pfn(vdso_data); + else + pfn = page_to_pfn(timens_page); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace specific + * VVAR is mapped with the VVAR_GENERIC_PAGE_OFFSET and the real + * VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + else + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + case VVAR_LOONGARCH_PAGES_START ... VVAR_LOONGARCH_PAGES_END: + pfn = sym_to_pfn(&loongarch_vdso_data) + vmf->pgoff - VVAR_LOONGARCH_PAGES_START; + break; + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + struct loongarch_vdso_info vdso_info = { .vdso = vdso_start, .size = PAGE_SIZE, @@ -51,6 +96,7 @@ struct loongarch_vdso_info vdso_info = { }, .data_mapping = { .name = "[vvar]", + .fault = vvar_fault, }, .offset_sigreturn = vdso_offset_sigreturn, }; @@ -73,6 +119,59 @@ static int __init init_vdso(void) } subsys_initcall(init_vdso); +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a + * task changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, &vdso_info.data_mapping)) + zap_page_range(vma, vma->vm_start, size); + } + mmap_read_unlock(mm); + + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + static unsigned long vdso_base(void) { unsigned long base = STACK_TOP; @@ -88,7 +187,7 @@ static unsigned long vdso_base(void) int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { int ret; - unsigned long vvar_size, size, data_addr, vdso_addr; + unsigned long size, data_addr, vdso_addr; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct loongarch_vdso_info *info = current->thread.vdso; @@ -100,32 +199,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * Determine total area size. This includes the VDSO data itself * and the data pages. */ - vvar_size = VDSO_DATA_SIZE; - size = vvar_size + info->size; + size = VVAR_SIZE + info->size; data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(data_addr)) { ret = data_addr; goto out; } - vdso_addr = data_addr + VDSO_DATA_SIZE; - vma = _install_special_mapping(mm, data_addr, vvar_size, - VM_READ | VM_MAYREAD, + vma = _install_special_mapping(mm, data_addr, VVAR_SIZE, + VM_READ | VM_MAYREAD | VM_PFNMAP, &info->data_mapping); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; } - /* Map VDSO data page. */ - ret = remap_pfn_range(vma, data_addr, - virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT, - vvar_size, PAGE_READONLY); - if (ret) - goto out; - - /* Map VDSO code page. */ + vdso_addr = data_addr + VVAR_SIZE; vma = _install_special_mapping(mm, vdso_addr, info->size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, &info->code_mapping); diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c index 43a0078e4418..5f9313ab3cb4 100644 --- a/arch/loongarch/vdso/vgetcpu.c +++ b/arch/loongarch/vdso/vgetcpu.c @@ -21,7 +21,7 @@ static __always_inline int read_cpu_id(void) static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) { - return (struct vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); + return (struct vdso_pcpu_data *)(get_vdso_data() + VVAR_LOONGARCH_PAGES_START * PAGE_SIZE); } int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) -- Gitee From d66b389548019d009484753f706e8d414de325e3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 Jun 2023 13:01:51 +0800 Subject: [PATCH 166/241] LoongArch: Add SMT (Simultaneous Multi-Threading) support Loongson-3A6000 has SMT (Simultaneous Multi-Threading) support, each physical core has two logical cores (threads). This patch add SMT probe and scheduler support via ACPI PPTT. If SCHED_SMT enabled, Loongson-3A6000 is treated as 4 cores, 8 threads; If SCHED_SMT disabled, Loongson-3A6000 is treated as 8 cores, 8 threads. Remove smp_num_siblings to support HMP (Heterogeneous Multi-Processing). Change-Id: Ibd1c9909030e77de63db564966f6225670c5d765 Signed-off-by: Liupu Wang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 8 +++++++ arch/loongarch/include/asm/acpi.h | 9 ++++++++ arch/loongarch/include/asm/cpu-info.h | 1 + arch/loongarch/kernel/acpi.c | 32 +++++++++++++++++++++++++++ arch/loongarch/kernel/proc.c | 1 + arch/loongarch/kernel/smp.c | 17 +++++--------- arch/loongarch/loongson64/smp.c | 8 +++++-- drivers/acpi/Kconfig | 2 +- 8 files changed, 63 insertions(+), 15 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 99e7e022bd2d..362957502d52 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -3,6 +3,7 @@ config LOONGARCH bool default y select ACPI_MCFG if ACPI + select ACPI_PPTT if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI @@ -573,6 +574,13 @@ config EFI_STUB This kernel feature allows a vmlinuz.efi to be loaded directly by EFI firmware without the use of a bootloader. +config SCHED_SMT + bool "SMT scheduler support" + default y + help + Improves scheduler's performance when there are multiple + threads in one physical core. + config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 7b4c1c6e66b2..93bc0ebf303b 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -17,6 +17,7 @@ extern int acpi_strict; extern int acpi_disabled; extern int acpi_pci_disabled; extern int acpi_noirq; +extern int pptt_enabled; #define acpi_os_ioremap acpi_os_ioremap void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); @@ -34,6 +35,14 @@ static inline bool acpi_has_cpu_in_madt(void) } extern struct list_head acpi_wakeup_device_list; +extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; + +extern int __init parse_acpi_topology(void); + +static inline u32 get_acpi_id_for_cpu(unsigned int cpu) +{ + return acpi_core_pic[cpu_logical_map(cpu)].processor_id; +} #endif /* !CONFIG_ACPI */ diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index d3bfed781c4f..8757ba20b054 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -55,6 +55,7 @@ struct cpuinfo_loongarch { struct cache_desc cache_leaves[CACHE_LEAVES_MAX]; int core; /* physical core number in package */ int package;/* physical package number */ + int global_id; /* physical global thread number */ int vabits; /* Virtual Address size in bits */ int pabits; /* Physical Address size in bits */ unsigned int ksave_mask; /* Usable KSave mask. */ diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 994b225f7494..ea72ff898d6f 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -33,6 +33,8 @@ u64 acpi_saved_sp; #define PREFIX "ACPI: " +struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; + int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { if (irqp != NULL) @@ -163,6 +165,7 @@ acpi_parse_cpuintc(union acpi_subtable_headers *header, const unsigned long end) return -EINVAL; acpi_table_print_madt_entry(&header->common); + acpi_core_pic[processor->core_id] = *processor; set_processor_mask(processor->core_id, processor->flags); return 0; @@ -331,6 +334,35 @@ static void __init acpi_process_madt(void) } } +int pptt_enabled; + +int __init parse_acpi_topology(void) +{ + int cpu, topology_id; + + for_each_possible_cpu(cpu) { + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) { + pr_warn("Invalid BIOS PPTT\n"); + return -ENOENT; + } + + if (acpi_pptt_cpu_is_thread(cpu) <= 0) + cpu_data[cpu].core = topology_id; + else { + topology_id = find_acpi_cpu_topology(cpu, 1); + if (topology_id < 0) + return -ENOENT; + + cpu_data[cpu].core = topology_id; + } + } + + pptt_enabled = 1; + + return 0; +} + #ifndef CONFIG_SUSPEND int (*acpi_suspend_lowlevel)(void); #else diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index d70e611bd7e7..02f98f2a4d82 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -49,6 +49,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "processor\t\t: %ld\n", n); seq_printf(m, "package\t\t\t: %d\n", cpu_data[n].package); seq_printf(m, "core\t\t\t: %d\n", cpu_data[n].core); + seq_printf(m, "global_id\t\t: %d\n", cpu_data[n].global_id); seq_printf(m, "CPU Family\t\t: %s\n", __cpu_family[n]); seq_printf(m, "Model Name\t\t: %s\n", __cpu_full_name[n]); seq_printf(m, "CPU Revision\t\t: 0x%02x\n", version); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index a1648d7addf8..7dc70cee41d3 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -35,10 +35,6 @@ EXPORT_SYMBOL(__cpu_number_map); int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ EXPORT_SYMBOL(__cpu_logical_map); -/* Number of threads (siblings) per CPU core */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - /* Representing the threads (siblings) of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -69,15 +65,12 @@ static inline void set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, &cpu_sibling_setup_map); - if (smp_num_siblings > 1) { - for_each_cpu(i, &cpu_sibling_setup_map) { - if (cpus_are_siblings(cpu, i)) { - cpumask_set_cpu(i, &cpu_sibling_map[cpu]); - cpumask_set_cpu(cpu, &cpu_sibling_map[i]); - } + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_set_cpu(i, &cpu_sibling_map[cpu]); + cpumask_set_cpu(cpu, &cpu_sibling_map[i]); } - } else - cpumask_set_cpu(cpu, &cpu_sibling_map[cpu]); + } } static inline void set_cpu_core_map(int cpu) diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 79b7669eb333..0d76fc9259e8 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -4,6 +4,7 @@ * Copyright (C) 2020 Loongson Technology Corporation Limited */ +#include #include #include #include @@ -219,10 +220,10 @@ static void loongson3_init_secondary(void) numa_add_cpu(cpu); #endif per_cpu(cpu_state, cpu) = CPU_ONLINE; - cpu_data[cpu].core = - cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; cpu_data[cpu].package = cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; + cpu_data[cpu].core = pptt_enabled ? cpu_data[cpu].core : + cpu_logical_map(cpu) % loongson_sysconf.cores_per_package; } static void loongson3_smp_finish(void) @@ -259,8 +260,11 @@ static void __init loongson3_prepare_cpus(unsigned int max_cpus) { int i = 0; + parse_acpi_topology(); + for (i = 0; i < loongson_sysconf.nr_cpus; i++) { set_cpu_present(i, true); + cpu_data[i].global_id = __cpu_logical_map[i]; if (cpu_has_csripi) csr_mail_send(0, __cpu_logical_map[i], 0); diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index b9753969b0db..aff14992598c 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -514,10 +514,10 @@ config ACPI_CONFIGFS if ARM64 source "drivers/acpi/arm64/Kconfig" +endif config ACPI_PPTT bool -endif source "drivers/acpi/pmic/Kconfig" -- Gitee From bf8b8d86de11935170fd1d6606b16cf57ce44847 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 16 May 2023 11:54:40 +0800 Subject: [PATCH 167/241] LoongArch: Support dbar with different hints Traditionally, LoongArch uses "dbar 0" (full completion barrier) for everything. But the full completion barrier is a performance killer, so Loongson-3A6000 and newer processors have made finer granularity hints available: Bit4: ordering or completion (0: completion, 1: ordering) Bit3: barrier for previous read (0: true, 1: false) Bit2: barrier for previous write (0: true, 1: false) Bit1: barrier for succeeding read (0: true, 1: false) Bit0: barrier for succeeding write (0: true, 1: false) Hint 0x700: barrier for "read after read" from the same address, which is needed by LL-SC loops on old models (dbar 0x700 behaves the same as nop if such reordering is disabled on new models). This patch makes use of the various new hints for different kinds of memory barriers. It brings performance improvements on Loongson-3A6000 series, while not affecting the existing models because all variants are treated as 'dbar 0' there. Why override queued_spin_unlock()? After commit 01e3b958efe85a26d9b ("drivers: Remove explicit invocations of mmiowb()") we need a completion barrier in queued_spin_unlock(), but the generic implementation use smp_store_release() which only provide an ordering barrier. Change-Id: I45fdeaf351bb3cfd4f7a74f1af36f5fc152de9cc Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/Kbuild | 1 - arch/loongarch/include/asm/barrier.h | 130 +++++++++++-------------- arch/loongarch/include/asm/io.h | 2 +- arch/loongarch/include/asm/qspinlock.h | 18 ++++ arch/loongarch/loongson64/smp.c | 2 +- arch/loongarch/mm/tlbex.S | 6 +- 6 files changed, 78 insertions(+), 81 deletions(-) create mode 100644 arch/loongarch/include/asm/qspinlock.h diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 0c311f3098eb..b032dcabfa11 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h generic-y += qrwlock.h -generic-y += qspinlock.h generic-y += rwsem.h generic-y += segment.h generic-y += user.h diff --git a/arch/loongarch/include/asm/barrier.h b/arch/loongarch/include/asm/barrier.h index d9e3e1e6b9c2..f33162d4e405 100644 --- a/arch/loongarch/include/asm/barrier.h +++ b/arch/loongarch/include/asm/barrier.h @@ -7,27 +7,56 @@ #include -#define __sync() __asm__ __volatile__("dbar 0" : : :"memory") +/* + * Hint encoding: + * + * Bit4: ordering or completion (0: completion, 1: ordering) + * Bit3: barrier for previous read (0: true, 1: false) + * Bit2: barrier for previous write (0: true, 1: false) + * Bit1: barrier for succeeding read (0: true, 1: false) + * Bit0: barrier for succeeding write (0: true, 1: false) + * + * Hint 0x700: barrier for "read after read" from the same address + */ + +#define DBAR(hint) __asm__ __volatile__("dbar %0 " : : "I"(hint) : "memory") + +#define crwrw 0b00000 +#define cr_r_ 0b00101 +#define c_w_w 0b01010 -#define fast_wmb() __sync() -#define fast_rmb() __sync() -#define fast_mb() __sync() -#define fast_iob() __sync() -#define wbflush() __sync() +#define orwrw 0b10000 +#define or_r_ 0b10101 +#define o_w_w 0b11010 -#define wmb() fast_wmb() -#define rmb() fast_rmb() -#define mb() fast_mb() -#define iob() fast_iob() +#define orw_w 0b10010 +#define or_rw 0b10100 -#define __smp_mb() __asm__ __volatile__("dbar 0" : : :"memory") -#define __smp_rmb() __asm__ __volatile__("dbar 0" : : :"memory") -#define __smp_wmb() __asm__ __volatile__("dbar 0" : : :"memory") +#define c_sync() DBAR(crwrw) +#define c_rsync() DBAR(cr_r_) +#define c_wsync() DBAR(c_w_w) + +#define o_sync() DBAR(orwrw) +#define o_rsync() DBAR(or_r_) +#define o_wsync() DBAR(o_w_w) + +#define ldacq_mb() DBAR(or_rw) +#define strel_mb() DBAR(orw_w) + +#define mb() c_sync() +#define rmb() c_rsync() +#define wmb() c_wsync() +#define iob() c_sync() +#define wbflush() c_sync() + +#define __smp_mb() o_sync() +#define __smp_rmb() o_rsync() +#define __smp_wmb() o_wsync() #ifdef CONFIG_SMP -#define __WEAK_LLSC_MB " dbar 0 \n" +#define __WEAK_LLSC_MB " dbar 0x700 \n" #else -#define __WEAK_LLSC_MB " \n" +#define __WEAK_LLSC_MB " \n" #endif #define __smp_mb__before_atomic() barrier() @@ -61,68 +90,19 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, return mask; } -#define __smp_load_acquire(p) \ -({ \ - union { typeof(*p) __val; char __c[1]; } __u; \ - unsigned long __tmp = 0; \ - compiletime_assert_atomic_type(*p); \ - switch (sizeof(*p)) { \ - case 1: \ - *(__u8 *)__u.__c = *(volatile __u8 *)p; \ - __smp_mb(); \ - break; \ - case 2: \ - *(__u16 *)__u.__c = *(volatile __u16 *)p; \ - __smp_mb(); \ - break; \ - case 4: \ - __asm__ __volatile__( \ - "amor_db.w %[val], %[tmp], %[mem] \n" \ - : [val] "=&r" (*(__u32 *)__u.__c) \ - : [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp) \ - : "memory"); \ - break; \ - case 8: \ - __asm__ __volatile__( \ - "amor_db.d %[val], %[tmp], %[mem] \n" \ - : [val] "=&r" (*(__u64 *)__u.__c) \ - : [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp) \ - : "memory"); \ - break; \ - } \ - (typeof(*p))__u.__val; \ +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + ldacq_mb(); \ + ___p1; \ }) -#define __smp_store_release(p, v) \ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - unsigned long __tmp; \ - compiletime_assert_atomic_type(*p); \ - switch (sizeof(*p)) { \ - case 1: \ - __smp_mb(); \ - *(volatile __u8 *)p = *(__u8 *)__u.__c; \ - break; \ - case 2: \ - __smp_mb(); \ - *(volatile __u16 *)p = *(__u16 *)__u.__c; \ - break; \ - case 4: \ - __asm__ __volatile__( \ - "amswap_db.w %[tmp], %[val], %[mem] \n" \ - : [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp) \ - : [val] "r" (*(__u32 *)__u.__c) \ - : ); \ - break; \ - case 8: \ - __asm__ __volatile__( \ - "amswap_db.d %[tmp], %[val], %[mem] \n" \ - : [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp) \ - : [val] "r" (*(__u64 *)__u.__c) \ - : ); \ - break; \ - } \ +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + strel_mb(); \ + WRITE_ONCE(*p, v); \ } while (0) #define __smp_store_mb(p, v) \ diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 5b12b061d571..29481b6fcef3 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -62,7 +62,7 @@ extern pgprot_t pgprot_wc; #define ioremap_cache(offset, size) \ ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) -#define mmiowb() asm volatile ("dbar 0" ::: "memory") +#define mmiowb() wmb() /* * String version of I/O memory access operations. diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h new file mode 100644 index 000000000000..34f43f8ad591 --- /dev/null +++ b/arch/loongarch/include/asm/qspinlock.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_QSPINLOCK_H +#define _ASM_QSPINLOCK_H + +#include + +#define queued_spin_unlock queued_spin_unlock + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + compiletime_assert_atomic_type(lock->locked); + c_sync(); + WRITE_ONCE(lock->locked, 0); +} + +#include + +#endif /* _ASM_QSPINLOCK_H */ diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 0d76fc9259e8..8d7050fdcd25 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -185,7 +185,7 @@ irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) action = ipi_read_clear(cpu_logical_map(cpu)); - smp_mb(); + wbflush(); if (action & SMP_RESCHEDULE) { scheduler_ipi(); diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index b11776de96f7..af2a4e9c97cc 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -189,7 +189,7 @@ tlb_huge_update_load: ertn nopage_tlb_load: - dbar 0 + dbar 0x700 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_0 jr t0 @@ -338,7 +338,7 @@ tlb_huge_update_store: ertn nopage_tlb_store: - dbar 0 + dbar 0x700 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jr t0 @@ -485,7 +485,7 @@ tlb_huge_update_modify: ertn nopage_tlb_modify: - dbar 0 + dbar 0x700 csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jr t0 -- Gitee From d3fdc52b05093fa4d00207ecd34be3f8188faf11 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 15 May 2023 10:59:27 +0800 Subject: [PATCH 168/241] LoongArch: Introduce hardware page table walker Loongson-3A6000 and newer processors have hardware page table walker (PTW) support. PTW handle all fastpath of TLBI/TLBL/TLBS/TLBM exceptions by hardware, software only need to handle slowpath (page faults). BTW, PTW doesn't append _PAGE_MODIFIED for page table entries, so we change pmd_dirty() and pte_dirty() to also check _PAGE_DIRTY for the "dirty" attribute. Change-Id: I41f196ea5b5a3c5c9d0a2e57472ef0dc3af3a45a Signed-off-by: Liang Gao Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 2 +- arch/loongarch/include/asm/cpu.h | 2 ++ arch/loongarch/include/asm/loongarchregs.h | 4 ++++ arch/loongarch/include/asm/pgtable.h | 4 ++-- arch/loongarch/include/asm/tlb.h | 3 +++ arch/loongarch/include/uapi/asm/hwcap.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++++ arch/loongarch/kernel/proc.c | 1 + arch/loongarch/mm/tlb.c | 21 +++++++++++++++++---- arch/loongarch/mm/tlbex.S | 21 +++++++++++++++++++++ 10 files changed, 56 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 3ba07f23f773..03b9cf1453a6 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -60,6 +60,6 @@ #define cpu_has_eiodecode cpu_opt(LOONGARCH_CPU_EIODECODE) #define cpu_has_guestid cpu_opt(LOONGARCH_CPU_GUESTID) #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) - +#define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index 5ff57a1620df..5971faec6d5a 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -98,6 +98,7 @@ enum cpu_type_enum { #define CPU_FEATURE_EIODECODE 23 /* CPU has EXTIOI interrupt pin decode mode */ #define CPU_FEATURE_GUESTID 24 /* CPU has GuestID feature */ #define CPU_FEATURE_HYPERVISOR 25 /* CPU has hypervisor (running in VM) */ +#define CPU_FEATURE_PTW 26 /* CPU has hardware page table walker */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -125,4 +126,5 @@ enum cpu_type_enum { #define LOONGARCH_CPU_EIODECODE BIT_ULL(CPU_FEATURE_EIODECODE) #define LOONGARCH_CPU_GUESTID BIT_ULL(CPU_FEATURE_GUESTID) #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) +#define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index e6accca330f1..aaef3119ebec 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -135,6 +135,7 @@ __asm__(".macro parse_r var r\n\t" #define CPUCFG2_MIPSBT BIT(20) #define CPUCFG2_LSPW BIT(21) #define CPUCFG2_LAM BIT(22) +#define CPUCFG2_PTW BIT(24) #define LOONGARCH_CPUCFG3 0x3 #define CPUCFG3_CCDMA BIT(0) @@ -412,6 +413,9 @@ __asm__(".macro parse_r var r\n\t" #define CSR_PWCTL0_PTBASE (_ULCAST_(0x1f) << CSR_PWCTL0_PTBASE_SHIFT) #define LOONGARCH_CSR_PWCTL1 0x1d /* PWCtl1 */ +#define CSR_PWCTL1_PTW_SHIFT 24 +#define CSR_PWCTL1_PTW_WIDTH 1 +#define CSR_PWCTL1_PTW (_ULCAST_(0x1) << CSR_PWCTL1_PTW_SHIFT) #define CSR_PWCTL1_DIR3WIDTH_SHIFT 18 #define CSR_PWCTL1_DIR3WIDTH_WIDTH 5 #define CSR_PWCTL1_DIR3WIDTH (_ULCAST_(0x1f) << CSR_PWCTL1_DIR3WIDTH_SHIFT) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 7b37b0da6f10..24157f778baa 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -91,7 +91,7 @@ extern pgd_t invalid_pg_dir[]; */ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & (_PAGE_DIRTY | _PAGE_MODIFIED); } static inline pte_t pte_mkold(pte_t pte) { @@ -232,7 +232,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) static inline int pmd_dirty(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_MODIFIED); + return !!(pmd_val(pmd) & (_PAGE_DIRTY | _PAGE_MODIFIED)); } static inline pmd_t pmd_mkclean(pmd_t pmd) diff --git a/arch/loongarch/include/asm/tlb.h b/arch/loongarch/include/asm/tlb.h index cf760b74d489..fb0e984d1b28 100644 --- a/arch/loongarch/include/asm/tlb.h +++ b/arch/loongarch/include/asm/tlb.h @@ -168,6 +168,9 @@ extern void handle_tlb_store(void); extern void handle_tlb_modify(void); extern void handle_tlb_refill(void); extern void handle_tlb_protect(void); +extern void handle_tlb_load_ptw(void); +extern void handle_tlb_store_ptw(void); +extern void handle_tlb_modify_ptw(void); extern void dump_tlb_all(void); extern void dump_tlb_regs(void); diff --git a/arch/loongarch/include/uapi/asm/hwcap.h b/arch/loongarch/include/uapi/asm/hwcap.h index 8840b72fa8e8..6955a7cb2c65 100644 --- a/arch/loongarch/include/uapi/asm/hwcap.h +++ b/arch/loongarch/include/uapi/asm/hwcap.h @@ -16,5 +16,6 @@ #define HWCAP_LOONGARCH_LBT_X86 (1 << 10) #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) +#define HWCAP_LOONGARCH_PTW (1 << 13) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index a44fe7d33500..adb21c7e6783 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -137,6 +137,10 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_CRYPTO; elf_hwcap |= HWCAP_LOONGARCH_CRYPTO; } + if (config & CPUCFG2_PTW) { + c->options |= LOONGARCH_CPU_PTW; + elf_hwcap |= HWCAP_LOONGARCH_PTW; + } if (config & CPUCFG2_LVZP) { c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 02f98f2a4d82..9dcd2158a815 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -80,6 +80,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_crc32) seq_printf(m, " crc32"); if (cpu_has_complex) seq_printf(m, " complex"); if (cpu_has_crypto) seq_printf(m, " crypto"); + if (cpu_has_ptw) seq_printf(m, " ptw"); if (cpu_has_lvz) seq_printf(m, " lvz"); if (cpu_has_lbt_x86) seq_printf(m, " lbt_x86"); if (cpu_has_lbt_arm) seq_printf(m, " lbt_arm"); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 55d69c42adaf..2edb1d7fdd74 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -166,6 +166,9 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep int idx; unsigned long flags; + if (cpu_has_ptw) + return; + /* * Handle debugger faulting in for debugee. */ @@ -221,6 +224,9 @@ static void setup_ptwalker(void) pwctl0 = pte_i | pte_w << 5 | pmd_i << 10 | pmd_w << 15 | pud_i << 20 | pud_w << 25; pwctl1 = pgd_i | pgd_w << 6; + if (cpu_has_ptw) + pwctl1 |= CSR_PWCTL1_PTW; + csr_write64(pwctl0, LOONGARCH_CSR_PWCTL0); csr_write64(pwctl1, LOONGARCH_CSR_PWCTL1); csr_write64((long)swapper_pg_dir, LOONGARCH_CSR_PGDH); @@ -263,10 +269,17 @@ static void setup_tlb_handler(int cpu) if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + if (!cpu_has_ptw) { + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); + } else { + set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE); + set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE); + set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE); + set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE); + } set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index af2a4e9c97cc..374baa6d5505 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -195,6 +195,13 @@ nopage_tlb_load: jr t0 SYM_FUNC_END(handle_tlb_load) +SYM_FUNC_START(handle_tlb_load_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la.abs t0, tlb_do_page_fault_0 + jirl zero, t0, 0 +SYM_FUNC_END(handle_tlb_load_ptw) + SYM_FUNC_START(handle_tlb_store) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 @@ -344,6 +351,13 @@ nopage_tlb_store: jr t0 SYM_FUNC_END(handle_tlb_store) +SYM_FUNC_START(handle_tlb_store_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la.abs t0, tlb_do_page_fault_1 + jirl zero, t0, 0 +SYM_FUNC_END(handle_tlb_store_ptw) + SYM_FUNC_START(handle_tlb_modify) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 @@ -491,6 +505,13 @@ nopage_tlb_modify: jr t0 SYM_FUNC_END(handle_tlb_modify) +SYM_FUNC_START(handle_tlb_modify_ptw) + csrwr t0, LOONGARCH_CSR_KS0 + csrwr t1, LOONGARCH_CSR_KS1 + la.abs t0, tlb_do_page_fault_1 + jirl zero, t0, 0 +SYM_FUNC_END(handle_tlb_modify_ptw) + SYM_FUNC_START(handle_tlb_refill) csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD -- Gitee From 3ea0a25bff17b1cf0a2e8931505255f28953bfe7 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Fri, 4 Aug 2023 20:46:09 +0800 Subject: [PATCH 169/241] LoongArch: Add SIMD-optimized XOR routines Add LSX and LASX implementations of xor operations, operating on 64 bytes (one L1 cache line) at a time, for a balance between memory utilization and instruction mix. Huacai confirmed that all future LoongArch implementations by Loongson (that we care) will likely also feature 64-byte cache lines, and experiments show no throughput improvement with further unrolling. Performance numbers measured during system boot on a 3A5000 @ 2.5GHz: > 8regs : 12702 MB/sec > 8regs_prefetch : 10920 MB/sec > 32regs : 12686 MB/sec > 32regs_prefetch : 10918 MB/sec > lsx : 17589 MB/sec > lasx : 26116 MB/sec Change-Id: I87617157dac035dcb79acfbac6e6b626133a00c7 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/xor.h | 68 +++++++++++++++++ arch/loongarch/include/asm/xor_simd.h | 34 +++++++++ arch/loongarch/lib/Makefile | 2 + arch/loongarch/lib/xor_simd.c | 93 +++++++++++++++++++++++ arch/loongarch/lib/xor_simd.h | 38 ++++++++++ arch/loongarch/lib/xor_simd_glue.c | 68 +++++++++++++++++ arch/loongarch/lib/xor_template.c | 104 ++++++++++++++++++++++++++ 7 files changed, 407 insertions(+) create mode 100644 arch/loongarch/include/asm/xor.h create mode 100644 arch/loongarch/include/asm/xor_simd.h create mode 100644 arch/loongarch/lib/xor_simd.c create mode 100644 arch/loongarch/lib/xor_simd.h create mode 100644 arch/loongarch/lib/xor_simd_glue.c create mode 100644 arch/loongarch/lib/xor_template.c diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h new file mode 100644 index 000000000000..12467fffee46 --- /dev/null +++ b/arch/loongarch/include/asm/xor.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui + */ +#ifndef _ASM_LOONGARCH_XOR_H +#define _ASM_LOONGARCH_XOR_H + +#include +#include + +#ifdef CONFIG_CPU_HAS_LSX +static struct xor_block_template xor_block_lsx = { + .name = "lsx", + .do_2 = xor_lsx_2, + .do_3 = xor_lsx_3, + .do_4 = xor_lsx_4, + .do_5 = xor_lsx_5, +}; + +#define XOR_SPEED_LSX() \ + do { \ + if (cpu_has_lsx) \ + xor_speed(&xor_block_lsx); \ + } while (0) +#else /* CONFIG_CPU_HAS_LSX */ +#define XOR_SPEED_LSX() +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static struct xor_block_template xor_block_lasx = { + .name = "lasx", + .do_2 = xor_lasx_2, + .do_3 = xor_lasx_3, + .do_4 = xor_lasx_4, + .do_5 = xor_lasx_5, +}; + +#define XOR_SPEED_LASX() \ + do { \ + if (cpu_has_lasx) \ + xor_speed(&xor_block_lasx); \ + } while (0) +#else /* CONFIG_CPU_HAS_LASX */ +#define XOR_SPEED_LASX() +#endif /* CONFIG_CPU_HAS_LASX */ + +/* + * For grins, also test the generic routines. + * + * More importantly: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ +#include + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ + XOR_SPEED_LSX(); \ + XOR_SPEED_LASX(); \ +} while (0) + +#endif /* _ASM_LOONGARCH_XOR_H */ diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h new file mode 100644 index 000000000000..809159293da2 --- /dev/null +++ b/arch/loongarch/include/asm/xor_simd.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui + */ +#ifndef _ASM_LOONGARCH_XOR_SIMD_H +#define _ASM_LOONGARCH_XOR_SIMD_H + +#ifdef CONFIG_CPU_HAS_LSX +void xor_lsx_2(unsigned long bytes, unsigned long *p1, + unsigned long *p2); +void xor_lsx_3(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3); +void xor_lsx_4(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4); +void xor_lsx_5(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4, unsigned long *p5); +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +void xor_lasx_2(unsigned long bytes, unsigned long *p1, + unsigned long *p2); +void xor_lasx_3(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3); +void xor_lasx_4(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4); +void xor_lasx_5(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4, unsigned long *p5); +#endif /* CONFIG_CPU_HAS_LASX */ + +#endif /* _ASM_LOONGARCH_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 269f16f6d169..29eabd49792b 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -5,3 +5,5 @@ lib-y += delay.o memset.o memcpy.o memmove.o \ clear_user.o copy_user.o unaligned.o dump_tlb.o + +obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c new file mode 100644 index 000000000000..84cd24b728c4 --- /dev/null +++ b/arch/loongarch/lib/xor_simd.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch SIMD XOR operations + * + * Copyright (C) 2023 WANG Xuerui + */ + +#include "xor_simd.h" + +/* + * Process one cache line (64 bytes) per loop. This is assuming all future + * popular LoongArch cores are similar performance-characteristics-wise to the + * current models. + */ +#define LINE_WIDTH 64 + +#ifdef CONFIG_CPU_HAS_LSX + +#define LD(reg, base, offset) \ + "vld $vr" #reg ", %[" #base "], " #offset "\n\t" +#define ST(reg, base, offset) \ + "vst $vr" #reg ", %[" #base "], " #offset "\n\t" +#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t" + +#define LD_INOUT_LINE(base) \ + LD(0, base, 0) \ + LD(1, base, 16) \ + LD(2, base, 32) \ + LD(3, base, 48) + +#define LD_AND_XOR_LINE(base) \ + LD(4, base, 0) \ + LD(5, base, 16) \ + LD(6, base, 32) \ + LD(7, base, 48) \ + XOR(0, 4) \ + XOR(1, 5) \ + XOR(2, 6) \ + XOR(3, 7) + +#define ST_LINE(base) \ + ST(0, base, 0) \ + ST(1, base, 16) \ + ST(2, base, 32) \ + ST(3, base, 48) + +#define XOR_FUNC_NAME(nr) __xor_lsx_##nr +#include "xor_template.c" + +#undef LD +#undef ST +#undef XOR +#undef LD_INOUT_LINE +#undef LD_AND_XOR_LINE +#undef ST_LINE +#undef XOR_FUNC_NAME + +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX + +#define LD(reg, base, offset) \ + "xvld $xr" #reg ", %[" #base "], " #offset "\n\t" +#define ST(reg, base, offset) \ + "xvst $xr" #reg ", %[" #base "], " #offset "\n\t" +#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t" + +#define LD_INOUT_LINE(base) \ + LD(0, base, 0) \ + LD(1, base, 32) + +#define LD_AND_XOR_LINE(base) \ + LD(2, base, 0) \ + LD(3, base, 32) \ + XOR(0, 2) \ + XOR(1, 3) + +#define ST_LINE(base) \ + ST(0, base, 0) \ + ST(1, base, 32) + +#define XOR_FUNC_NAME(nr) __xor_lasx_##nr +#include "xor_template.c" + +#undef LD +#undef ST +#undef XOR +#undef LD_INOUT_LINE +#undef LD_AND_XOR_LINE +#undef ST_LINE +#undef XOR_FUNC_NAME + +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h new file mode 100644 index 000000000000..6555f84a7c97 --- /dev/null +++ b/arch/loongarch/lib/xor_simd.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Simple interface to link xor_simd.c and xor_simd_glue.c + * + * Separating these files ensures that no SIMD instructions are run outside of + * the kfpu critical section. + */ + +#ifndef __LOONGARCH_LIB_XOR_SIMD_H +#define __LOONGARCH_LIB_XOR_SIMD_H + +#ifdef CONFIG_CPU_HAS_LSX +void __xor_lsx_2(unsigned long bytes, unsigned long *p1, + unsigned long *p2); +void __xor_lsx_3(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3); +void __xor_lsx_4(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4); +void __xor_lsx_5(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4, unsigned long *p5); +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +void __xor_lasx_2(unsigned long bytes, unsigned long *p1, + unsigned long *p2); +void __xor_lasx_3(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3); +void __xor_lasx_4(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4); +void __xor_lasx_5(unsigned long bytes, unsigned long *p1, + unsigned long *p2, unsigned long *p3, + unsigned long *p4, unsigned long *p5); +#endif /* CONFIG_CPU_HAS_LASX */ + +#endif /* __LOONGARCH_LIB_XOR_SIMD_H */ diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c new file mode 100644 index 000000000000..f5b433ff7230 --- /dev/null +++ b/arch/loongarch/lib/xor_simd_glue.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LoongArch SIMD XOR operations + * + * Copyright (C) 2023 WANG Xuerui + */ + +#include +#include +#include +#include +#include "xor_simd.h" + +#define MAKE_XOR_GLUE_2(flavor) \ +void xor_##flavor##_2(unsigned long bytes, unsigned long *p1, \ + unsigned long *p2) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_2(bytes, p1, p2); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_2) + +#define MAKE_XOR_GLUE_3(flavor) \ +void xor_##flavor##_3(unsigned long bytes, unsigned long *p1, \ + unsigned long *p2, unsigned long *p3) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_3(bytes, p1, p2, p3); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_3) + +#define MAKE_XOR_GLUE_4(flavor) \ +void xor_##flavor##_4(unsigned long bytes, unsigned long *p1, \ + unsigned long *p2, unsigned long *p3, \ + unsigned long *p4) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_4(bytes, p1, p2, p3, p4); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_4) + +#define MAKE_XOR_GLUE_5(flavor) \ +void xor_##flavor##_5(unsigned long bytes, unsigned long *p1, \ + unsigned long *p2, unsigned long *p3, \ + unsigned long *p4, unsigned long *p5) \ +{ \ + kernel_fpu_begin(); \ + __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \ + kernel_fpu_end(); \ +} \ +EXPORT_SYMBOL_GPL(xor_##flavor##_5) + +#define MAKE_XOR_GLUES(flavor) \ + MAKE_XOR_GLUE_2(flavor); \ + MAKE_XOR_GLUE_3(flavor); \ + MAKE_XOR_GLUE_4(flavor); \ + MAKE_XOR_GLUE_5(flavor) + +#ifdef CONFIG_CPU_HAS_LSX +MAKE_XOR_GLUES(lsx); +#endif + +#ifdef CONFIG_CPU_HAS_LASX +MAKE_XOR_GLUES(lasx); +#endif diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c new file mode 100644 index 000000000000..1f828860a25c --- /dev/null +++ b/arch/loongarch/lib/xor_template.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 WANG Xuerui + * + * Template for XOR operations, instantiated in xor_simd.c. + * + * Expected preprocessor definitions: + * + * - LINE_WIDTH + * - XOR_FUNC_NAME(nr) + * - LD_INOUT_LINE(buf) + * - LD_AND_XOR_LINE(buf) + * - ST_LINE(buf) + */ + +void XOR_FUNC_NAME(2)(unsigned long bytes, + unsigned long *v1, unsigned long *v2) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(3)(unsigned long bytes, + unsigned long *v1, unsigned long *v2, + unsigned long *v3) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(4)(unsigned long bytes, + unsigned long *v1, unsigned long *v2, + unsigned long *v3, unsigned long *v4) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + LD_AND_XOR_LINE(v4) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) + : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + v4 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} + +void XOR_FUNC_NAME(5)(unsigned long bytes, + unsigned long *v1, unsigned long *v2, + unsigned long *v3, unsigned long *v4, + unsigned long *v5) +{ + unsigned long lines = bytes / LINE_WIDTH; + + do { + __asm__ __volatile__ ( + LD_INOUT_LINE(v1) + LD_AND_XOR_LINE(v2) + LD_AND_XOR_LINE(v3) + LD_AND_XOR_LINE(v4) + LD_AND_XOR_LINE(v5) + ST_LINE(v1) + : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), + [v5] "r"(v5) : "memory" + ); + + v1 += LINE_WIDTH / sizeof(unsigned long); + v2 += LINE_WIDTH / sizeof(unsigned long); + v3 += LINE_WIDTH / sizeof(unsigned long); + v4 += LINE_WIDTH / sizeof(unsigned long); + v5 += LINE_WIDTH / sizeof(unsigned long); + } while (--lines > 0); +} -- Gitee From e3e7235035054921f57662f956023dbab6d5c209 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Fri, 4 Aug 2023 20:46:10 +0800 Subject: [PATCH 170/241] raid6: Add LoongArch SIMD syndrome calculation The algorithms work on 64 bytes at a time, which is the L1 cache line size of all current and future LoongArch cores (that we care about), as confirmed by Huacai. The code is based on the generic int.uc algorithm, unrolled 4 times for LSX and 2 times for LASX. Further unrolling does not meaningfully improve the performance according to experiments. Performance numbers measured during system boot on a 3A5000 @ 2.5GHz: > raid6: lasx gen() 12726 MB/s > raid6: lsx gen() 10001 MB/s > raid6: int64x8 gen() 2876 MB/s > raid6: int64x4 gen() 3867 MB/s > raid6: int64x2 gen() 2531 MB/s > raid6: int64x1 gen() 1945 MB/s Comparison of xor() speeds (from different boots but meaningful anyway): > lasx: 11226 MB/s > lsx: 6395 MB/s > int64x4: 2147 MB/s Performance as measured by raid6test: > raid6: lasx gen() 25109 MB/s > raid6: lsx gen() 13233 MB/s > raid6: int64x8 gen() 4164 MB/s > raid6: int64x4 gen() 6005 MB/s > raid6: int64x2 gen() 5781 MB/s > raid6: int64x1 gen() 4119 MB/s > raid6: using algorithm lasx gen() 25109 MB/s > raid6: .... xor() 14439 MB/s, rmw enabled Change-Id: Ie22bdfd436640505aff8102ec9befd893eff6124 Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- include/linux/raid/pq.h | 2 + lib/raid6/Makefile | 1 + lib/raid6/algos.c | 8 + lib/raid6/loongarch.h | 38 ++++ lib/raid6/loongarch_simd.c | 420 +++++++++++++++++++++++++++++++++++++ lib/raid6/test/Makefile | 31 ++- 6 files changed, 491 insertions(+), 9 deletions(-) create mode 100644 lib/raid6/loongarch.h create mode 100644 lib/raid6/loongarch_simd.c diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 154e954b711d..7ad5ccdf0c94 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -116,6 +116,8 @@ extern const struct raid6_calls raid6_vpermxor1; extern const struct raid6_calls raid6_vpermxor2; extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; +extern const struct raid6_calls raid6_lsx; +extern const struct raid6_calls raid6_lasx; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index c770570bfe4f..bd6a7179e260 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o +raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d5e5000fdd7..a9cd0bd67cf6 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -75,6 +75,14 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_neonx2, &raid6_neonx1, #endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_lsx, +#endif +#endif #if defined(__ia64__) &raid6_intx32, &raid6_intx16, diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h new file mode 100644 index 000000000000..acfc33ce7056 --- /dev/null +++ b/lib/raid6/loongarch.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023 WANG Xuerui + * + * raid6/loongarch.h + * + * Definitions common to LoongArch RAID-6 code only + */ + +#ifndef _LIB_RAID6_LOONGARCH_H +#define _LIB_RAID6_LOONGARCH_H + +#ifdef __KERNEL__ + +#include +#include + +#else /* for user-space testing */ + +#include + +/* have to supply these defines for glibc 2.37- and musl */ +#ifndef HWCAP_LOONGARCH_LSX +#define HWCAP_LOONGARCH_LSX (1 << 4) +#endif +#ifndef HWCAP_LOONGARCH_LASX +#define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +#define kernel_fpu_begin() +#define kernel_fpu_end() + +#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX) +#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX) + +#endif /* __KERNEL__ */ + +#endif /* _LIB_RAID6_LOONGARCH_H */ diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c new file mode 100644 index 000000000000..6032b83a427a --- /dev/null +++ b/lib/raid6/loongarch_simd.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX) + * + * Copyright 2023 WANG Xuerui + * + * Based on the generic RAID-6 code (int.uc): + * + * Copyright 2002-2004 H. Peter Anvin + */ + +#include +#include "loongarch.h" + +/* + * The vector algorithms are currently priority 0, which means the generic + * scalar algorithms are not being disabled if vector support is present. + * This is like the similar LoongArch RAID5 XOR code, with the main reason + * repeated here: it cannot be ruled out at this point of time, that some + * future (maybe reduced) models could run the vector algorithms slower than + * the scalar ones, maybe for errata or micro-op reasons. It may be + * appropriate to revisit this after one or two more uarch generations. + */ + +#ifdef CONFIG_CPU_HAS_LSX +#define NSIZE 16 + +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1])); + asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2])); + asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1])); + asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2])); + asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3])); + } + + kernel_fpu_end(); +} + +static void raid6_lsx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $vr0, $vr1, $vr2, $vr3: wp + * $vr4, $vr5, $vr6, $vr7: wq + * $vr8, $vr9, $vr10, $vr11: wd + * $vr12, $vr13, $vr14, $vr15: w2 + * $vr16, $vr17, $vr18, $vr19: w1 + */ + for (d = 0; d < bytes; d += NSIZE*4) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE])); + asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE])); + asm volatile("vori.b $vr4, $vr0, 0"); + asm volatile("vori.b $vr5, $vr1, 0"); + asm volatile("vori.b $vr6, $vr2, 0"); + asm volatile("vori.b $vr7, $vr3, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE])); + asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE])); + asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("vxor.v $vr16, $vr16, $vr12"); + asm volatile("vxor.v $vr17, $vr17, $vr13"); + asm volatile("vxor.v $vr18, $vr18, $vr14"); + asm volatile("vxor.v $vr19, $vr19, $vr15"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr8"); + asm volatile("vxor.v $vr5, $vr17, $vr9"); + asm volatile("vxor.v $vr6, $vr18, $vr10"); + asm volatile("vxor.v $vr7, $vr19, $vr11"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("vslti.b $vr12, $vr4, 0"); + asm volatile("vslti.b $vr13, $vr5, 0"); + asm volatile("vslti.b $vr14, $vr6, 0"); + asm volatile("vslti.b $vr15, $vr7, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("vslli.b $vr16, $vr4, 1"); + asm volatile("vslli.b $vr17, $vr5, 1"); + asm volatile("vslli.b $vr18, $vr6, 1"); + asm volatile("vslli.b $vr19, $vr7, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("vandi.b $vr12, $vr12, 0x1d"); + asm volatile("vandi.b $vr13, $vr13, 0x1d"); + asm volatile("vandi.b $vr14, $vr14, 0x1d"); + asm volatile("vandi.b $vr15, $vr15, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("vxor.v $vr4, $vr16, $vr12"); + asm volatile("vxor.v $vr5, $vr17, $vr13"); + asm volatile("vxor.v $vr6, $vr18, $vr14"); + asm volatile("vxor.v $vr7, $vr19, $vr15"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "vld $vr20, %0\n\t" + "vld $vr21, %1\n\t" + "vld $vr22, %2\n\t" + "vld $vr23, %3\n\t" + "vld $vr24, %4\n\t" + "vld $vr25, %5\n\t" + "vld $vr26, %6\n\t" + "vld $vr27, %7\n\t" + "vxor.v $vr20, $vr20, $vr0\n\t" + "vxor.v $vr21, $vr21, $vr1\n\t" + "vxor.v $vr22, $vr22, $vr2\n\t" + "vxor.v $vr23, $vr23, $vr3\n\t" + "vxor.v $vr24, $vr24, $vr4\n\t" + "vxor.v $vr25, $vr25, $vr5\n\t" + "vxor.v $vr26, $vr26, $vr6\n\t" + "vxor.v $vr27, $vr27, $vr7\n\t" + "vst $vr20, %0\n\t" + "vst $vr21, %1\n\t" + "vst $vr22, %2\n\t" + "vst $vr23, %3\n\t" + "vst $vr24, %4\n\t" + "vst $vr25, %5\n\t" + "vst $vr26, %6\n\t" + "vst $vr27, %7\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]), + "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lsx = { + raid6_lsx_gen_syndrome, + raid6_lsx_xor_syndrome, + raid6_has_lsx, + "lsx", +}; + +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +#define NSIZE 32 + +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= 0; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */ + asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0])); + asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1])); + /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */ + asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0])); + asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1])); + } + + kernel_fpu_end(); +} + +static void raid6_lasx_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + /* + * $xr0, $xr1: wp + * $xr2, $xr3: wq + * $xr4, $xr5: wd + * $xr6, $xr7: w2 + * $xr8, $xr9: w1 + */ + for (d = 0; d < bytes; d += NSIZE*2) { + /* P/Q data pages */ + /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ + asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE])); + asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE])); + asm volatile("xvori.b $xr2, $xr0, 0"); + asm volatile("xvori.b $xr3, $xr1, 0"); + for (z = z0-1; z >= start; z--) { + /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */ + asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE])); + asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE])); + /* wp$$ ^= wd$$; */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* w1$$ ^= w2$$; */ + asm volatile("xvxor.v $xr8, $xr8, $xr6"); + asm volatile("xvxor.v $xr9, $xr9, $xr7"); + /* wq$$ = w1$$ ^ wd$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr4"); + asm volatile("xvxor.v $xr3, $xr9, $xr5"); + } + + /* P/Q left side optimization */ + for (z = start-1; z >= 0; z--) { + /* w2$$ = MASK(wq$$); */ + asm volatile("xvslti.b $xr6, $xr2, 0"); + asm volatile("xvslti.b $xr7, $xr3, 0"); + /* w1$$ = SHLBYTE(wq$$); */ + asm volatile("xvslli.b $xr8, $xr2, 1"); + asm volatile("xvslli.b $xr9, $xr3, 1"); + /* w2$$ &= NBYTES(0x1d); */ + asm volatile("xvandi.b $xr6, $xr6, 0x1d"); + asm volatile("xvandi.b $xr7, $xr7, 0x1d"); + /* wq$$ = w1$$ ^ w2$$; */ + asm volatile("xvxor.v $xr2, $xr8, $xr6"); + asm volatile("xvxor.v $xr3, $xr9, $xr7"); + } + /* + * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + */ + asm volatile( + "xvld $xr10, %0\n\t" + "xvld $xr11, %1\n\t" + "xvld $xr12, %2\n\t" + "xvld $xr13, %3\n\t" + "xvxor.v $xr10, $xr10, $xr0\n\t" + "xvxor.v $xr11, $xr11, $xr1\n\t" + "xvxor.v $xr12, $xr12, $xr2\n\t" + "xvxor.v $xr13, $xr13, $xr3\n\t" + "xvst $xr10, %0\n\t" + "xvst $xr11, %1\n\t" + "xvst $xr12, %2\n\t" + "xvst $xr13, %3\n\t" + : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]), + "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]) + ); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_lasx = { + raid6_lasx_gen_syndrome, + raid6_lasx_xor_syndrome, + raid6_has_lasx, + "lasx", +}; +#undef NSIZE +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 4fb7700a741b..8a29f2acb8f6 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -34,6 +34,22 @@ ifeq ($(ARCH),aarch64) HAS_NEON = yes endif +ifeq ($(findstring ppc,$(ARCH)),ppc) + CFLAGS += -I../../../arch/powerpc/include + HAS_ALTIVEC := $(shell printf '$(pound)include \nvector int a;\n' |\ + gcc -c -x c - >/dev/null && rm ./-.o && echo yes) +endif + +ifeq ($(ARCH),loongarch64) + CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1 + CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1) + CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \ + gcc -c -x assembler - >/dev/null 2>&1 && \ + rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1) +endif + ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o CFLAGS += -DCONFIG_X86 @@ -43,15 +59,12 @@ ifeq ($(IS_X86),yes) else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 -else - HAS_ALTIVEC := $(shell printf '$(pound)include \nvector int a;\n' |\ - gcc -c -x c - >/dev/null && rm ./-.o && echo yes) - ifeq ($(HAS_ALTIVEC),yes) - CFLAGS += -I../../../arch/powerpc/include - CFLAGS += -DCONFIG_ALTIVEC - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ - vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o - endif +else ifeq ($(HAS_ALTIVEC),yes) + CFLAGS += -DCONFIG_ALTIVEC + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o +else ifeq ($(ARCH),loongarch64) + OBJS += loongarch_simd.o endif .c.o: -- Gitee From 0828e400ab7b8c15b29793ce09e6aa7e0cafdc66 Mon Sep 17 00:00:00 2001 From: WANG Xuerui Date: Fri, 4 Aug 2023 20:46:11 +0800 Subject: [PATCH 171/241] raid6: Add LoongArch SIMD recovery implementation Similar to the syndrome calculation, the recovery algorithms also work on 64 bytes at a time to align with the L1 cache line size of current and future LoongArch cores (that we care about). Which means unrolled-by-4 LSX and unrolled-by-2 LASX code. The assembly is originally based on the x86 SSSE3/AVX2 ports, but register allocation has been redone to take advantage of LSX/LASX's 32 vector registers, and instruction sequence has been optimized to suit (e.g. LoongArch can perform per-byte srl and andi on vectors, but x86 cannot). Performance numbers measured by instrumenting the raid6test code, on a 3A5000 system clocked at 2.5GHz: > lasx 2data: 354.987 MiB/s > lasx datap: 350.430 MiB/s > lsx 2data: 340.026 MiB/s > lsx datap: 337.318 MiB/s > intx1 2data: 164.280 MiB/s > intx1 datap: 187.966 MiB/s Because recovery algorithms are chosen solely based on priority and availability, lasx is marked as priority 2 and lsx priority 1. At least for the current generation of LoongArch micro-architectures, LASX should always be faster than LSX whenever supported, and have similar power consumption characteristics (because the only known LASX-capable uarch, the LA464, always compute the full 256-bit result for vector ops). Change-Id: I2e8a2af1feff43a9da28cafffa697630c42b604a Signed-off-by: WANG Xuerui Signed-off-by: Huacai Chen --- include/linux/raid/pq.h | 2 + lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 8 + lib/raid6/recov_loongarch_simd.c | 513 +++++++++++++++++++++++++++++++ lib/raid6/test/Makefile | 2 +- 5 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 lib/raid6/recov_loongarch_simd.c diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 7ad5ccdf0c94..393f30470c13 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -133,6 +133,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2; extern const struct raid6_recov_calls raid6_recov_avx512; extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; +extern const struct raid6_recov_calls raid6_recov_lsx; +extern const struct raid6_recov_calls raid6_recov_lasx; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index bd6a7179e260..e2b1ef565f0d 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -9,7 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o -raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o +raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o hostprogs += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index a9cd0bd67cf6..1187431d5cd6 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -113,6 +113,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #endif #if defined(CONFIG_KERNEL_MODE_NEON) &raid6_recov_neon, +#endif +#ifdef CONFIG_LOONGARCH +#ifdef CONFIG_CPU_HAS_LASX + &raid6_recov_lasx, +#endif +#ifdef CONFIG_CPU_HAS_LSX + &raid6_recov_lsx, +#endif #endif &raid6_recov_intx1, NULL diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c new file mode 100644 index 000000000000..94aeac85e6f7 --- /dev/null +++ b/lib/raid6/recov_loongarch_simd.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) + * + * Copyright (C) 2023 WANG Xuerui + * + * Originally based on recov_avx2.c and recov_ssse3.c: + * + * Copyright (C) 2012 Intel Corporation + * Author: Jim Kukunas + */ + +#include +#include "loongarch.h" + +/* + * Unlike with the syndrome calculation algorithms, there's no boot-time + * selection of recovery algorithms by benchmarking, so we have to specify + * the priorities and hope the future cores will all have decent vector + * support (i.e. no LASX slower than LSX, or even scalar code). + */ + +#ifdef CONFIG_CPU_HAS_LSX +static int raid6_has_lsx(void) +{ + return cpu_has_lsx; +} + +static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * vr20, vr21: qmul + * vr22, vr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + + while (bytes) { + /* vr4 - vr7: Q */ + asm volatile("vld $vr4, %0" : : "m" (q[0])); + asm volatile("vld $vr5, %0" : : "m" (q[16])); + asm volatile("vld $vr6, %0" : : "m" (q[32])); + asm volatile("vld $vr7, %0" : : "m" (q[48])); + /* vr4 - vr7: Q + Qxy */ + asm volatile("vld $vr8, %0" : : "m" (dq[0])); + asm volatile("vld $vr9, %0" : : "m" (dq[16])); + asm volatile("vld $vr10, %0" : : "m" (dq[32])); + asm volatile("vld $vr11, %0" : : "m" (dq[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + /* vr0 - vr3: P */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr0 - vr3: P + Pxy */ + asm volatile("vld $vr8, %0" : : "m" (dp[0])); + asm volatile("vld $vr9, %0" : : "m" (dp[16])); + asm volatile("vld $vr10, %0" : : "m" (dp[32])); + asm volatile("vld $vr11, %0" : : "m" (dp[48])); + asm volatile("vxor.v $vr0, $vr0, $vr8"); + asm volatile("vxor.v $vr1, $vr1, $vr9"); + asm volatile("vxor.v $vr2, $vr2, $vr10"); + asm volatile("vxor.v $vr3, $vr3, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); + asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); + asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); + asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); + asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); + asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); + asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); + /* vr16 - vr19: B(Q + Qxy) */ + asm volatile("vxor.v $vr16, $vr8, $vr4"); + asm volatile("vxor.v $vr17, $vr9, $vr5"); + asm volatile("vxor.v $vr18, $vr10, $vr6"); + asm volatile("vxor.v $vr19, $vr11, $vr7"); + + /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("vsrli.b $vr4, $vr0, 4"); + asm volatile("vsrli.b $vr5, $vr1, 4"); + asm volatile("vsrli.b $vr6, $vr2, 4"); + asm volatile("vsrli.b $vr7, $vr3, 4"); + /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("vandi.b $vr12, $vr0, 0x0f"); + asm volatile("vandi.b $vr13, $vr1, 0x0f"); + asm volatile("vandi.b $vr14, $vr2, 0x0f"); + asm volatile("vandi.b $vr15, $vr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); + asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); + asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); + asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); + /* lookup from pbmul[16] */ + asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); + asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); + asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); + asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); + /* vr4 - vr7: A(P + Pxy) */ + asm volatile("vxor.v $vr4, $vr4, $vr12"); + asm volatile("vxor.v $vr5, $vr5, $vr13"); + asm volatile("vxor.v $vr6, $vr6, $vr14"); + asm volatile("vxor.v $vr7, $vr7, $vr15"); + + /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr16"); + asm volatile("vxor.v $vr5, $vr5, $vr17"); + asm volatile("vxor.v $vr6, $vr6, $vr18"); + asm volatile("vxor.v $vr7, $vr7, $vr19"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Pxy + Dx = Dy */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (dp[0])); + asm volatile("vst $vr1, %0" : "=m" (dp[16])); + asm volatile("vst $vr2, %0" : "=m" (dp[32])); + asm volatile("vst $vr3, %0" : "=m" (dp[48])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* vr22, vr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + + while (bytes) { + /* vr0 - vr3: P + Dx */ + asm volatile("vld $vr0, %0" : : "m" (p[0])); + asm volatile("vld $vr1, %0" : : "m" (p[16])); + asm volatile("vld $vr2, %0" : : "m" (p[32])); + asm volatile("vld $vr3, %0" : : "m" (p[48])); + /* vr4 - vr7: Qx */ + asm volatile("vld $vr4, %0" : : "m" (dq[0])); + asm volatile("vld $vr5, %0" : : "m" (dq[16])); + asm volatile("vld $vr6, %0" : : "m" (dq[32])); + asm volatile("vld $vr7, %0" : : "m" (dq[48])); + /* vr4 - vr7: Q + Qx */ + asm volatile("vld $vr8, %0" : : "m" (q[0])); + asm volatile("vld $vr9, %0" : : "m" (q[16])); + asm volatile("vld $vr10, %0" : : "m" (q[32])); + asm volatile("vld $vr11, %0" : : "m" (q[48])); + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + + /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("vsrli.b $vr8, $vr4, 4"); + asm volatile("vsrli.b $vr9, $vr5, 4"); + asm volatile("vsrli.b $vr10, $vr6, 4"); + asm volatile("vsrli.b $vr11, $vr7, 4"); + /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("vandi.b $vr4, $vr4, 0x0f"); + asm volatile("vandi.b $vr5, $vr5, 0x0f"); + asm volatile("vandi.b $vr6, $vr6, 0x0f"); + asm volatile("vandi.b $vr7, $vr7, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); + asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); + asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); + asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); + /* lookup from qmul[16] */ + asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); + asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); + asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); + asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); + /* vr4 - vr7: qmul(Q + Qx) = Dx */ + asm volatile("vxor.v $vr4, $vr4, $vr8"); + asm volatile("vxor.v $vr5, $vr5, $vr9"); + asm volatile("vxor.v $vr6, $vr6, $vr10"); + asm volatile("vxor.v $vr7, $vr7, $vr11"); + asm volatile("vst $vr4, %0" : "=m" (dq[0])); + asm volatile("vst $vr5, %0" : "=m" (dq[16])); + asm volatile("vst $vr6, %0" : "=m" (dq[32])); + asm volatile("vst $vr7, %0" : "=m" (dq[48])); + + /* vr0 - vr3: P + Dx + Dx = P */ + asm volatile("vxor.v $vr0, $vr0, $vr4"); + asm volatile("vxor.v $vr1, $vr1, $vr5"); + asm volatile("vxor.v $vr2, $vr2, $vr6"); + asm volatile("vxor.v $vr3, $vr3, $vr7"); + asm volatile("vst $vr0, %0" : "=m" (p[0])); + asm volatile("vst $vr1, %0" : "=m" (p[16])); + asm volatile("vst $vr2, %0" : "=m" (p[32])); + asm volatile("vst $vr3, %0" : "=m" (p[48])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lsx = { + .data2 = raid6_2data_recov_lsx, + .datap = raid6_datap_recov_lsx, + .valid = raid6_has_lsx, + .name = "lsx", + .priority = 1, +}; +#endif /* CONFIG_CPU_HAS_LSX */ + +#ifdef CONFIG_CPU_HAS_LASX +static int raid6_has_lasx(void) +{ + return cpu_has_lasx; +} + +static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + /* + * xr20, xr21: qmul + * xr22, xr23: pbmul + */ + asm volatile("vld $vr20, %0" : : "m" (qmul[0])); + asm volatile("vld $vr21, %0" : : "m" (qmul[16])); + asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); + asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); + asm volatile("xvreplve0.q $xr20, $xr20"); + asm volatile("xvreplve0.q $xr21, $xr21"); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: Q */ + asm volatile("xvld $xr0, %0" : : "m" (q[0])); + asm volatile("xvld $xr1, %0" : : "m" (q[32])); + /* xr0, xr1: Q + Qxy */ + asm volatile("xvld $xr4, %0" : : "m" (dq[0])); + asm volatile("xvld $xr5, %0" : : "m" (dq[32])); + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + /* xr2, xr3: P */ + asm volatile("xvld $xr2, %0" : : "m" (p[0])); + asm volatile("xvld $xr3, %0" : : "m" (p[32])); + /* xr2, xr3: P + Pxy */ + asm volatile("xvld $xr4, %0" : : "m" (dp[0])); + asm volatile("xvld $xr5, %0" : : "m" (dp[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvsrli.b $xr4, $xr0, 4"); + asm volatile("xvsrli.b $xr5, $xr1, 4"); + /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ + asm volatile("xvandi.b $xr0, $xr0, 0x0f"); + asm volatile("xvandi.b $xr1, $xr1, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); + asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); + asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); + /* xr6, xr7: B(Q + Qxy) */ + asm volatile("xvxor.v $xr6, $xr4, $xr0"); + asm volatile("xvxor.v $xr7, $xr5, $xr1"); + + /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ + asm volatile("xvandi.b $xr0, $xr2, 0x0f"); + asm volatile("xvandi.b $xr1, $xr3, 0x0f"); + /* lookup from pbmul[0] */ + asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); + asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); + /* lookup from pbmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr0, xr1: A(P + Pxy) */ + asm volatile("xvxor.v $xr0, $xr0, $xr4"); + asm volatile("xvxor.v $xr1, $xr1, $xr5"); + + /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ + asm volatile("xvxor.v $xr0, $xr0, $xr6"); + asm volatile("xvxor.v $xr1, $xr1, $xr7"); + + /* xr2, xr3: P + Pxy + Dx = Dy */ + asm volatile("xvxor.v $xr2, $xr2, $xr0"); + asm volatile("xvxor.v $xr3, $xr3, $xr1"); + + asm volatile("xvst $xr0, %0" : "=m" (dq[0])); + asm volatile("xvst $xr1, %0" : "=m" (dq[32])); + asm volatile("xvst $xr2, %0" : "=m" (dp[0])); + asm volatile("xvst $xr3, %0" : "=m" (dp[32])); + + bytes -= 64; + p += 64; + q += 64; + dp += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + /* xr22, xr23: qmul */ + asm volatile("vld $vr22, %0" : : "m" (qmul[0])); + asm volatile("xvreplve0.q $xr22, $xr22"); + asm volatile("vld $vr23, %0" : : "m" (qmul[16])); + asm volatile("xvreplve0.q $xr23, $xr23"); + + while (bytes) { + /* xr0, xr1: P + Dx */ + asm volatile("xvld $xr0, %0" : : "m" (p[0])); + asm volatile("xvld $xr1, %0" : : "m" (p[32])); + /* xr2, xr3: Qx */ + asm volatile("xvld $xr2, %0" : : "m" (dq[0])); + asm volatile("xvld $xr3, %0" : : "m" (dq[32])); + /* xr2, xr3: Q + Qx */ + asm volatile("xvld $xr4, %0" : : "m" (q[0])); + asm volatile("xvld $xr5, %0" : : "m" (q[32])); + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ + asm volatile("xvsrli.b $xr4, $xr2, 4"); + asm volatile("xvsrli.b $xr5, $xr3, 4"); + /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ + asm volatile("xvandi.b $xr2, $xr2, 0x0f"); + asm volatile("xvandi.b $xr3, $xr3, 0x0f"); + /* lookup from qmul[0] */ + asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); + asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); + /* lookup from qmul[16] */ + asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); + asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); + /* xr2, xr3: qmul(Q + Qx) = Dx */ + asm volatile("xvxor.v $xr2, $xr2, $xr4"); + asm volatile("xvxor.v $xr3, $xr3, $xr5"); + + /* xr0, xr1: P + Dx + Dx = P */ + asm volatile("xvxor.v $xr0, $xr0, $xr2"); + asm volatile("xvxor.v $xr1, $xr1, $xr3"); + + asm volatile("xvst $xr2, %0" : "=m" (dq[0])); + asm volatile("xvst $xr3, %0" : "=m" (dq[32])); + asm volatile("xvst $xr0, %0" : "=m" (p[0])); + asm volatile("xvst $xr1, %0" : "=m" (p[32])); + + bytes -= 64; + p += 64; + q += 64; + dq += 64; + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_lasx = { + .data2 = raid6_2data_recov_lasx, + .datap = raid6_datap_recov_lasx, + .valid = raid6_has_lasx, + .name = "lasx", + .priority = 2, +}; +#endif /* CONFIG_CPU_HAS_LASX */ diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 8a29f2acb8f6..0a4e6e867f60 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -64,7 +64,7 @@ else ifeq ($(HAS_ALTIVEC),yes) OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o else ifeq ($(ARCH),loongarch64) - OBJS += loongarch_simd.o + OBJS += loongarch_simd.o recov_loongarch_simd.o endif .c.o: -- Gitee From b79126757ce80cfc6718f6957aee834bb71393a6 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 16 Oct 2023 17:37:56 +0800 Subject: [PATCH 172/241] LoongArch: Use SYM_CODE_* to annotate exception handlers As described in include/linux/linkage.h, FUNC -- C-like functions (proper stack frame etc.) CODE -- non-C code (e.g. irq handlers with different, special stack etc.) SYM_FUNC_{START, END} -- use for global functions SYM_CODE_{START, END} -- use for non-C (special) functions So use SYM_CODE_* to annotate exception handlers. Change-Id: Ic4de3107506c6ef1adee078e2a75d6127f4d6165 Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/linkage.h | 8 +++++++ arch/loongarch/kernel/entry.S | 4 ++-- arch/loongarch/kernel/genex.S | 16 ++++++------- arch/loongarch/mm/tlbex.S | 36 ++++++++++++++-------------- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h index 81b0c4cfbf4f..e2eca1a25b4e 100644 --- a/arch/loongarch/include/asm/linkage.h +++ b/arch/loongarch/include/asm/linkage.h @@ -33,4 +33,12 @@ .cfi_endproc; \ SYM_END(name, SYM_T_FUNC) +#define SYM_CODE_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + .cfi_startproc; + +#define SYM_CODE_END(name) \ + .cfi_endproc; \ + SYM_END(name, SYM_T_NONE) + #endif diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 69d927a138d1..6cda0d20a0f1 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -17,7 +17,7 @@ .text .cfi_sections .debug_frame .align 5 -SYM_FUNC_START(handle_syscall) +SYM_CODE_START(handle_syscall) csrrd t0, PERCPU_BASE_KS la.abs t1, kernelsp add.d t1, t1, t0 @@ -65,7 +65,7 @@ SYM_FUNC_START(handle_syscall) bl do_syscall RESTORE_ALL_AND_RET -SYM_FUNC_END(handle_syscall) +SYM_CODE_END(handle_syscall) SYM_CODE_START(ret_from_fork) bl schedule_tail # a0 = struct task_struct *prev diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 37d1cedb6707..d732b5c8f793 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -25,7 +25,7 @@ SYM_FUNC_START(__arch_cpu_idle) 1: jirl zero, ra, 0 SYM_FUNC_END(__arch_cpu_idle) -SYM_FUNC_START(handle_vint) +SYM_CODE_START(handle_vint) BACKUP_T0T1 SAVE_ALL la.abs t1, __arch_cpu_idle @@ -40,11 +40,11 @@ SYM_FUNC_START(handle_vint) la.abs t0, do_vint jirl ra, t0, 0 RESTORE_ALL_AND_RET -SYM_FUNC_END(handle_vint) +SYM_CODE_END(handle_vint) -SYM_FUNC_START(except_vec_cex) +SYM_CODE_START(except_vec_cex) b cache_parity_error -SYM_FUNC_END(except_vec_cex) +SYM_CODE_END(except_vec_cex) .macro build_prep_badv csrrd t0, LOONGARCH_CSR_BADV @@ -60,7 +60,7 @@ SYM_FUNC_END(except_vec_cex) .macro BUILD_HANDLER exception handler prep .align 5 - SYM_FUNC_START(handle_\exception) + SYM_CODE_START(handle_\exception) BACKUP_T0T1 SAVE_ALL build_prep_\prep @@ -68,7 +68,7 @@ SYM_FUNC_END(except_vec_cex) la.abs t0, do_\handler jirl ra, t0, 0 RESTORE_ALL_AND_RET - SYM_FUNC_END(handle_\exception) + SYM_CODE_END(handle_\exception) .endm BUILD_HANDLER ade ade badv @@ -84,7 +84,7 @@ SYM_FUNC_END(except_vec_cex) BUILD_HANDLER watch watch none BUILD_HANDLER reserved reserved none /* others */ -SYM_FUNC_START(handle_sys) +SYM_CODE_START(handle_sys) la.abs t0, handle_syscall jirl zero, t0, 0 -SYM_FUNC_END(handle_sys) +SYM_CODE_END(handle_sys) diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 374baa6d5505..421aacd7929c 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -22,7 +22,7 @@ #define PTRS_PER_PTE_BITS (PAGE_SHIFT - 3) .macro tlb_do_page_fault, write - SYM_FUNC_START(tlb_do_page_fault_\write) + SYM_CODE_START(tlb_do_page_fault_\write) SAVE_ALL csrrd a2, LOONGARCH_CSR_BADV move a0, sp @@ -31,13 +31,13 @@ la.abs t0, do_page_fault jirl ra, t0, 0 RESTORE_ALL_AND_RET - SYM_FUNC_END(tlb_do_page_fault_\write) + SYM_CODE_END(tlb_do_page_fault_\write) .endm tlb_do_page_fault 0 tlb_do_page_fault 1 -SYM_FUNC_START(handle_tlb_protect) +SYM_CODE_START(handle_tlb_protect) BACKUP_T0T1 SAVE_ALL move a0, sp @@ -47,9 +47,9 @@ SYM_FUNC_START(handle_tlb_protect) la.abs t0, do_page_fault jirl ra, t0, 0 RESTORE_ALL_AND_RET -SYM_FUNC_END(handle_tlb_protect) +SYM_CODE_END(handle_tlb_protect) -SYM_FUNC_START(handle_tlb_load) +SYM_CODE_START(handle_tlb_load) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -193,16 +193,16 @@ nopage_tlb_load: csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_0 jr t0 -SYM_FUNC_END(handle_tlb_load) +SYM_CODE_END(handle_tlb_load) -SYM_FUNC_START(handle_tlb_load_ptw) +SYM_CODE_START(handle_tlb_load_ptw) csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la.abs t0, tlb_do_page_fault_0 jirl zero, t0, 0 -SYM_FUNC_END(handle_tlb_load_ptw) +SYM_CODE_END(handle_tlb_load_ptw) -SYM_FUNC_START(handle_tlb_store) +SYM_CODE_START(handle_tlb_store) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -349,16 +349,16 @@ nopage_tlb_store: csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jr t0 -SYM_FUNC_END(handle_tlb_store) +SYM_CODE_END(handle_tlb_store) -SYM_FUNC_START(handle_tlb_store_ptw) +SYM_CODE_START(handle_tlb_store_ptw) csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la.abs t0, tlb_do_page_fault_1 jirl zero, t0, 0 -SYM_FUNC_END(handle_tlb_store_ptw) +SYM_CODE_END(handle_tlb_store_ptw) -SYM_FUNC_START(handle_tlb_modify) +SYM_CODE_START(handle_tlb_modify) csrwr t0, EXCEPTION_KS0 csrwr t1, EXCEPTION_KS1 csrwr ra, EXCEPTION_KS2 @@ -503,16 +503,16 @@ nopage_tlb_modify: csrrd ra, EXCEPTION_KS2 la.abs t0, tlb_do_page_fault_1 jr t0 -SYM_FUNC_END(handle_tlb_modify) +SYM_CODE_END(handle_tlb_modify) -SYM_FUNC_START(handle_tlb_modify_ptw) +SYM_CODE_START(handle_tlb_modify_ptw) csrwr t0, LOONGARCH_CSR_KS0 csrwr t1, LOONGARCH_CSR_KS1 la.abs t0, tlb_do_page_fault_1 jirl zero, t0, 0 -SYM_FUNC_END(handle_tlb_modify_ptw) +SYM_CODE_END(handle_tlb_modify_ptw) -SYM_FUNC_START(handle_tlb_refill) +SYM_CODE_START(handle_tlb_refill) csrwr t0, LOONGARCH_CSR_TLBRSAVE csrrd t0, LOONGARCH_CSR_PGD lddir t0, t0, 3 @@ -527,4 +527,4 @@ SYM_FUNC_START(handle_tlb_refill) tlbfill csrrd t0, LOONGARCH_CSR_TLBRSAVE ertn -SYM_FUNC_END(handle_tlb_refill) +SYM_CODE_END(handle_tlb_refill) -- Gitee From 054329e6c31f17cf9d04081476a0f83a11817cd2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 7 Oct 2023 14:13:43 +0800 Subject: [PATCH 173/241] LoongArch: Export symbol invalid_pud_table for modules building Export symbol invalid_pud_table for modules building (such as the KVM module) if 4-level page tables enabled. Otherwise we get: ERROR: modpost: "invalid_pud_table" [arch/loongarch/kvm/kvm.ko] undefined! Change-Id: I5c3167c1dd285ca103025fdfe7ef8193b8a3b198 Reported-by: Randy Dunlap Signed-off-by: Tianrui Zhao Signed-off-by: Huacai Chen --- arch/loongarch/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index 151144642dd8..48b0cac9b3cb 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -387,6 +387,7 @@ pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); pgd_t invalid_pg_dir[_PTRS_PER_PGD] __page_aligned_bss; #ifndef __PAGETABLE_PUD_FOLDED pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pud_table); #endif #ifndef __PAGETABLE_PMD_FOLDED pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; -- Gitee From d27dd9f04adda5c739c49d305c38c0d229eb2178 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Mon, 9 Oct 2023 12:28:41 +0800 Subject: [PATCH 174/241] LoongArch: Disable WUC for pgprot_writecombine() like ioremap_wc() Currently the code disables WUC only disables it for ioremap_wc(), which is only used when mapping writecombine pages like ioremap() (mapped to the kernel space). But for VRAM mapped in TTM/GEM, it is mapped with a crafted pgprot by the pgprot_writecombine() function, in which case WUC isn't disabled now. Disable WUC for pgprot_writecombine() (fallback to SUC) if needed, like ioremap_wc(). This improves the AMDGPU driver's stability on Loongson-3A5000/3A6000 machines. Change-Id: Ie111436150eaba4e52bce9349dc5e19ceb3dfcb0 Signed-off-by: Icenowy Zheng Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/io.h | 5 ++--- arch/loongarch/include/asm/pgtable-bits.h | 4 +++- arch/loongarch/kernel/setup.c | 10 +++++----- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 29481b6fcef3..2fcc2db6f87e 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -54,10 +54,9 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, * @offset: bus address of the memory * @size: size of the resource to map */ -extern pgprot_t pgprot_wc; - #define ioremap_wc(offset, size) \ - ioremap_prot((offset), (size), pgprot_val(pgprot_wc)) + ioremap_prot((offset), (size), \ + pgprot_val(wc_enabled ? PAGE_KERNEL_WUC : PAGE_KERNEL_SUC)) #define ioremap_cache(offset, size) \ ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL)) diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h index 780bec355066..4c980be4065a 100644 --- a/arch/loongarch/include/asm/pgtable-bits.h +++ b/arch/loongarch/include/asm/pgtable-bits.h @@ -119,13 +119,15 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot) return __pgprot(prot); } +extern bool wc_enabled; + #define pgprot_writecombine pgprot_writecombine static inline pgprot_t pgprot_writecombine(pgprot_t _prot) { unsigned long prot = pgprot_val(_prot); - prot = (prot & ~_CACHE_MASK) | _CACHE_WUC; + prot = (prot & ~_CACHE_MASK) | (wc_enabled ? _CACHE_WUC : _CACHE_SUC); return __pgprot(prot); } diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index fe659a14c898..62c07a824c6d 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -78,19 +78,19 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add } #ifdef CONFIG_ARCH_WRITECOMBINE -pgprot_t pgprot_wc = PAGE_KERNEL_WUC; +bool wc_enabled = true; #else -pgprot_t pgprot_wc = PAGE_KERNEL_SUC; +bool wc_enabled = false; #endif -EXPORT_SYMBOL(pgprot_wc); +EXPORT_SYMBOL(wc_enabled); static int __init setup_writecombine(char *p) { if (!strcmp(p, "on")) - pgprot_wc = PAGE_KERNEL_WUC; + wc_enabled = true; else if (!strcmp(p, "off")) - pgprot_wc = PAGE_KERNEL_SUC; + wc_enabled = false; else pr_warn("Unknown writecombine setting \"%s\".\n", p); -- Gitee From 34231c498c43e866e3e59586d21515a9bca02fb0 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 8 Nov 2023 12:04:47 +0800 Subject: [PATCH 175/241] LoongArch: Disable module from accessing external data directly The distance between vmlinux and the module is too far so that PC-REL cannot be accessed directly, only GOT. When compiling module with GCC, the option `-mdirect-extern-access` is disabled by default. The Clang option `-fdirect-access-external-data` is enabled by default, so it needs to be explicitly disabled. Change-Id: Ia64ab1103eba3bd19e2cdca80ad1b1c405a97382 Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 92f0d21d690c..89e986254ce2 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -62,6 +62,8 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) +KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else -- Gitee From 763ed0a381508cb19142ebec91c253ac478c9bd5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 2 Nov 2023 08:43:02 -0700 Subject: [PATCH 176/241] LoongArch: Mark __percpu functions as always inline A recent change to the optimization pipeline in LLVM reveals some fragility around the inlining of LoongArch's __percpu functions, which manifests as a BUILD_BUG() failure: In file included from kernel/sched/build_policy.c:17: In file included from include/linux/sched/cputime.h:5: In file included from include/linux/sched/signal.h:5: In file included from include/linux/rculist.h:11: In file included from include/linux/rcupdate.h:26: In file included from include/linux/irqflags.h:18: arch/loongarch/include/asm/percpu.h:97:3: error: call to '__compiletime_assert_51' declared with 'error' attribute: BUILD_BUG failed 97 | BUILD_BUG(); | ^ include/linux/build_bug.h:59:21: note: expanded from macro 'BUILD_BUG' 59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed") | ^ include/linux/build_bug.h:39:37: note: expanded from macro 'BUILD_BUG_ON_MSG' 39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) | ^ include/linux/compiler_types.h:425:2: note: expanded from macro 'compiletime_assert' 425 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^ include/linux/compiler_types.h:413:2: note: expanded from macro '_compiletime_assert' 413 | __compiletime_assert(condition, msg, prefix, suffix) | ^ include/linux/compiler_types.h:406:4: note: expanded from macro '__compiletime_assert' 406 | prefix ## suffix(); \ | ^ :86:1: note: expanded from here 86 | __compiletime_assert_51 | ^ 1 error generated. If these functions are not inlined (which the compiler is free to do even with functions marked with the standard 'inline' keyword), the BUILD_BUG() in the default case cannot be eliminated since the compiler cannot prove it is never used, resulting in a build failure due to the error attribute. Mark these functions as __always_inline to guarantee inlining so that the BUILD_BUG() only triggers when the default case genuinely cannot be eliminated due to an unexpected size. Change-Id: I519c333a404a0328b04e34cf1e71b0953b54fdad Cc: Closes: https://github.com/ClangBuiltLinux/linux/issues/1955 Fixes: 46859ac8af52 ("LoongArch: Add multi-processor (SMP) support") Link: https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index 7d0861554c29..c7256d9525c6 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -31,7 +31,7 @@ static inline void set_my_cpu_offset(unsigned long off) #define __my_cpu_offset __my_cpu_offset #define PERCPU_OP(op, asm_op, c_op) \ -static inline unsigned long __percpu_##op(void *ptr, \ +static __always_inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ { \ unsigned long ret; \ @@ -62,7 +62,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void *ptr, int size) { unsigned long ret; @@ -99,7 +99,7 @@ static inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -131,8 +131,8 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size) } } -static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) { switch (size) { case 1: -- Gitee From 142312d5128b552900b227f0933a83165375aa98 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Mon, 6 Nov 2023 16:47:34 +0800 Subject: [PATCH 177/241] LoongArch: Relax memory ordering for atomic operations This patch relaxes the implementation while satisfying the memory ordering requirements for atomic operations, which will help improve performance on LA664+. Unixbench with full threads (8) before after Dhrystone 2 using register variables 203910714.2 203909539.8 0.00% Double-Precision Whetstone 37930.9 37931 0.00% Execl Throughput 29431.5 29545.8 0.39% File Copy 1024 bufsize 2000 maxblocks 6645759.5 6676320 0.46% File Copy 256 bufsize 500 maxblocks 2138772.4 2144182.4 0.25% File Copy 4096 bufsize 8000 maxblocks 11640698.4 11602703 -0.33% Pipe Throughput 8849077.7 8917009.4 0.77% Pipe-based Context Switching 1255108.5 1287277.3 2.56% Process Creation 50825.9 50442.1 -0.76% Shell Scripts (1 concurrent) 25795.8 25942.3 0.57% Shell Scripts (8 concurrent) 3812.6 3835.2 0.59% System Call Overhead 9248212.6 9353348.6 1.14% ======= System Benchmarks Index Score 8076.6 8114.4 0.47% Change-Id: I4f29f095001c7d02890d7b441f689c7e824e92b7 Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/atomic.h | 88 ++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index 8927561c9a8d..f054952b122b 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -59,19 +59,19 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " $zero, %1, %0 \n" \ + "am"#asm_op".w" " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ -static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static __inline__ int atomic_##op##_return##suffix(int i, atomic_t * v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -79,13 +79,13 @@ static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \ return result c_op I; \ } -#define ATOMIC_FETCH_OP(op, I, asm_op) \ -static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ +#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \ +static __inline__ int atomic_fetch_##op##suffix(int i, atomic_t * v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -95,29 +95,53 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \ #define ATOMIC_OPS(op, I, asm_op, c_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(add, i, add, +) ATOMIC_OPS(sub, -i, add, +) +#define atomic_add_return atomic_add_return +#define atomic_add_return_acquire atomic_add_return +#define atomic_add_return_release atomic_add_return #define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_sub_return atomic_sub_return +#define atomic_sub_return_acquire atomic_sub_return +#define atomic_sub_return_release atomic_sub_return #define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_fetch_add atomic_fetch_add +#define atomic_fetch_add_acquire atomic_fetch_add +#define atomic_fetch_add_release atomic_fetch_add #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_sub atomic_fetch_sub +#define atomic_fetch_sub_acquire atomic_fetch_sub +#define atomic_fetch_sub_release atomic_fetch_sub #define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed #undef ATOMIC_OPS #define ATOMIC_OPS(op, I, asm_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(and, i, and) ATOMIC_OPS(or, i, or) ATOMIC_OPS(xor, i, xor) +#define atomic_fetch_and atomic_fetch_and +#define atomic_fetch_and_acquire atomic_fetch_and +#define atomic_fetch_and_release atomic_fetch_and #define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_or atomic_fetch_or +#define atomic_fetch_or_acquire atomic_fetch_or +#define atomic_fetch_or_release atomic_fetch_or #define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_xor atomic_fetch_xor +#define atomic_fetch_xor_acquire atomic_fetch_xor +#define atomic_fetch_xor_release atomic_fetch_xor #define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed #undef ATOMIC_OPS @@ -221,18 +245,18 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) static __inline__ void atomic64_##op(long i, atomic64_t * v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " $zero, %1, %0 \n" \ + "am"#asm_op".d " " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ -static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static __inline__ long atomic64_##op##_return##suffix(long i, atomic64_t * v) \ { \ long result; \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -240,13 +264,13 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ return result c_op I; \ } -#define ATOMIC64_FETCH_OP(op, I, asm_op) \ -static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \ +static __inline__ long atomic64_fetch_##op##suffix(long i, atomic64_t * v) \ { \ long result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -256,29 +280,53 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ #define ATOMIC64_OPS(op, I, asm_op, c_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(add, i, add, +) ATOMIC64_OPS(sub, -i, add, +) +#define atomic64_add_return atomic64_add_return +#define atomic64_add_return_acquire atomic64_add_return +#define atomic64_add_return_release atomic64_add_return #define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_sub_return atomic64_sub_return +#define atomic64_sub_return_acquire atomic64_sub_return +#define atomic64_sub_return_release atomic64_sub_return #define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_fetch_add atomic64_fetch_add +#define atomic64_fetch_add_acquire atomic64_fetch_add +#define atomic64_fetch_add_release atomic64_fetch_add #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_sub atomic64_fetch_sub +#define atomic64_fetch_sub_acquire atomic64_fetch_sub +#define atomic64_fetch_sub_release atomic64_fetch_sub #define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, I, asm_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(and, i, and) ATOMIC64_OPS(or, i, or) ATOMIC64_OPS(xor, i, xor) +#define atomic64_fetch_and atomic64_fetch_and +#define atomic64_fetch_and_acquire atomic64_fetch_and +#define atomic64_fetch_and_release atomic64_fetch_and #define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_or atomic64_fetch_or +#define atomic64_fetch_or_acquire atomic64_fetch_or +#define atomic64_fetch_or_release atomic64_fetch_or #define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_xor atomic64_fetch_xor +#define atomic64_fetch_xor_acquire atomic64_fetch_xor +#define atomic64_fetch_xor_release atomic64_fetch_xor #define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS -- Gitee From 3152ab6f72623eef221acff6dac08363ca2fdd8d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 7 Nov 2023 19:14:18 +0800 Subject: [PATCH 178/241] LoongArch/smp: Call rcutree_report_cpu_starting() earlier rcutree_report_cpu_starting() must be called before cpu_probe() to avoid the following lockdep splat that triggered by calling __alloc_pages() when CONFIG_PROVE_RCU_LIST=y: ============================= WARNING: suspicious RCU usage 6.6.0+ #980 Not tainted ----------------------------- kernel/locking/lockdep.c:3761 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 1 lock held by swapper/1/0: #0: 900000000c82ef98 (&pcp->lock){+.+.}-{2:2}, at: get_page_from_freelist+0x894/0x1790 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.0+ #980 Stack : 0000000000000001 9000000004f79508 9000000004893670 9000000100310000 90000001003137d0 0000000000000000 90000001003137d8 9000000004f79508 0000000000000000 0000000000000001 0000000000000000 90000000048a3384 203a656d616e2065 ca43677b3687e616 90000001002c3480 0000000000000008 000000000000009d 0000000000000000 0000000000000001 80000000ffffe0b8 000000000000000d 0000000000000033 0000000007ec0000 13bbf50562dad831 9000000005140748 0000000000000000 9000000004f79508 0000000000000004 0000000000000000 9000000005140748 90000001002bad40 0000000000000000 90000001002ba400 0000000000000000 9000000003573ec8 0000000000000000 00000000000000b0 0000000000000004 0000000000000000 0000000000070000 ... Call Trace: [<9000000003573ec8>] show_stack+0x38/0x150 [<9000000004893670>] dump_stack_lvl+0x74/0xa8 [<900000000360d2bc>] lockdep_rcu_suspicious+0x14c/0x190 [<900000000361235c>] __lock_acquire+0xd0c/0x2740 [<90000000036146f4>] lock_acquire+0x104/0x2c0 [<90000000048a955c>] _raw_spin_lock_irqsave+0x5c/0x90 [<900000000381cd5c>] rmqueue_bulk+0x6c/0x950 [<900000000381fc0c>] get_page_from_freelist+0xd4c/0x1790 [<9000000003821c6c>] __alloc_pages+0x1bc/0x3e0 [<9000000003583b40>] tlb_init+0x150/0x2a0 [<90000000035742a0>] per_cpu_trap_init+0xf0/0x110 [<90000000035712fc>] cpu_probe+0x3dc/0x7a0 [<900000000357ed20>] start_secondary+0x40/0xb0 [<9000000004897138>] smpboot_entry+0x54/0x58 raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. See also commit 29368e093921 ("x86/smpboot: Move rcu_cpu_starting() earlier"), commit de5d9dae150c ("s390/smp: move rcu_cpu_starting() earlier") and commit 99f070b62322 ("powerpc/smp: Call rcu_cpu_starting() earlier"). Change-Id: Iae331dacee7ecedafe172e909a40e850d900d516 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 7dc70cee41d3..0e2aa8d337d4 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -132,8 +132,9 @@ asmlinkage void start_secondary(void) unsigned int cpu; sync_counter(); - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); + rcu_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); -- Gitee From c716bb5cb62fe83c689f86753fee277d905a955d Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 15 Nov 2023 21:21:37 +0800 Subject: [PATCH 179/241] LoongArch: Explicitly set -fdirect-access-external-data for vmlinux After this llvm commit [1], The -fno-pic does not imply direct access external data. Explicitly set -fdirect-access-external-data for vmlinux that can avoids GOT entries. Change-Id: I73f1f263dfebc3d7b6c01f11e838b20bb2cf142f Link: https://github.com/llvm/llvm-project/commit/47eeee297775347cbdb7624d6a766c2a3eec4a59 Suggested-by: Xi Ruoyao Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 89e986254ce2..1b82b20e83f4 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -62,6 +62,7 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) -- Gitee From 0725daff040ff89350ef7f0bedfa54dc5d9aee86 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 13 Nov 2023 11:06:16 +0800 Subject: [PATCH 180/241] LoongArch: Add __percpu annotation for __percpu_read()/__percpu_write() When build kernel with C=1, we get: arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * Add __percpu annotation for __percpu_read()/__percpu_write() can avoid such warnings. __percpu_xchg() and other functions don't need annotation because their wrapper, i.e. _pcp_protect(), already suppresses warnings. Also adjust the indentations in this file. Change-Id: I14334881b382bfcca6fa10daf2c30b8d322ce4d6 Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311080409.LlOfTR3m-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311080840.Vc2kXhfp-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311081340.3k72KKdg-lkp@intel.com/ Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index c7256d9525c6..3599b826dca4 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -39,13 +39,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \ switch (size) { \ case 4: \ __asm__ __volatile__( \ - "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \ : [val] "r" (val)); \ break; \ case 8: \ __asm__ __volatile__( \ - "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \ : [val] "r" (val)); \ break; \ @@ -62,7 +62,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static __always_inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size) { unsigned long ret; @@ -99,7 +99,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -131,8 +131,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz } } -static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { case 1: -- Gitee From 50bf18aee07a3a4d454e37b90e41cbcf36791bb0 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 15 Nov 2023 17:58:34 +0800 Subject: [PATCH 181/241] LoongArch: Silence the boot warning about 'nokaslr' The kernel parameter 'nokaslr' is handled before start_kernel(), so we don't need early_param() to mark it technically. But it can cause a boot warning as follows: Unknown kernel command line parameters "nokaslr", will be passed to user space. When we use 'init=/bin/bash', 'nokaslr' which passed to user space will even cause a kernel panic. So we use early_param() to mark 'nokaslr', simply print a notice and silence the boot warning (also fix a potential panic). This logic is similar to RISC-V. Change-Id: Id485899817a7e2bd8aab7eb9c24c9b8db97fbf9a Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index d2933c4f0856..baa3d0d1f28c 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -134,6 +134,14 @@ static inline __init unsigned long get_random_boot(void) return hash; } +static int __init nokaslr(char *p) +{ + pr_info("KASLR is disabled.\n"); + + return 0; /* Print a notice and silence the boot warning */ +} +early_param("nokaslr", nokaslr); + static inline __init bool kaslr_disabled(void) { char *str; -- Gitee From 208d6cc8690202ee60759497e7a0348479224f6e Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Mon, 20 Nov 2023 10:46:52 +0800 Subject: [PATCH 182/241] LoongArch: Implement constant timer shutdown interface When a cpu is hot-unplugged, it is put in idle state and the function arch_cpu_idle_dead() is called. The timer interrupt for this processor should be disabled, otherwise there will be pending timer interrupt for the unplugged cpu, so that vcpu is prevented from giving up scheduling when system is running in vm mode. This patch implements the timer shutdown interface so that the constant timer will be properly disabled when a CPU is hot-unplugged. Change-Id: I390b06151ac7785797980aaa4b4e32cc4f2513ce Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 60f4d4ed52da..60323be17101 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -63,14 +63,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt) return 0; } -static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) +static int constant_set_state_periodic(struct clock_event_device *evt) { + unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - timer_config = csr_read64(LOONGARCH_CSR_TCFG); - timer_config &= ~CSR_TCFG_EN; + period = const_clock_freq / HZ; + timer_config = period & CSR_TCFG_VAL; + timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -78,16 +80,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) return 0; } -static int constant_set_state_periodic(struct clock_event_device *evt) +static int constant_set_state_shutdown(struct clock_event_device *evt) { - unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - period = const_clock_freq / HZ; - timer_config = period & CSR_TCFG_VAL; - timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -95,11 +95,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt) return 0; } -static int constant_set_state_shutdown(struct clock_event_device *evt) -{ - return 0; -} - static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned long timer_config; @@ -153,7 +148,7 @@ int constant_clockevent_init(void) cd->rating = 320; cd->cpumask = cpumask_of(cpu); cd->set_state_oneshot = constant_set_state_oneshot; - cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_oneshot_stopped = constant_set_state_shutdown; cd->set_state_periodic = constant_set_state_periodic; cd->set_state_shutdown = constant_set_state_shutdown; cd->set_next_event = constant_timer_next_event; -- Gitee From f0f1c618400c71fe9b1ab85b8ab42e622701aa99 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 26 Nov 2023 20:17:28 +0800 Subject: [PATCH 183/241] LoongArch: Slightly clean up drdtime() As we are just discarding the stable clock ID, simply write it into $zero instead of allocating a temporary register. Change-Id: Icaaeeb86577f995f9c88e7d223986d1fafe9a8b5 Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarchregs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index aaef3119ebec..72fbc7477aa8 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -1132,12 +1132,11 @@ __asm__(".macro parse_r var r\n\t" static inline u64 drdtime(void) { - int rID = 0; u64 val = 0; __asm__ __volatile__( - "rdtime.d %0, %1 \n\t" - : "=r"(val), "=r"(rID) + "rdtime.d %0, $zero\n\t" + : "=r"(val) : ); return val; -- Gitee From cfd8c4ca9eba4bfe469a9a3c4f07bd9cccb7ec62 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Sun, 26 Nov 2023 20:10:02 +0800 Subject: [PATCH 184/241] LoongArch: Set unwind stack type to unknown rather than set error flag During unwinding, unwind_done() is used as an end condition. Normally it unwind to the user stack and then set the stack type to unknown, which is a normal exit. When something unexpected happens in unwind process and we cannot unwind anymore, we should set the error flag, and also set the stack type to unknown to indicate that the unwind process can not continue. The error flag emphasizes that the unwind process produce an unexpected error. There is no unexpected things when we unwind the PT_REGS in the top of IRQ stack and find out that is an user mode PT_REGS. Thus, we should not set error flag and just set stack type to unknown. Change-Id: I0d1166344de2e82e87a5b569aa2d6c89792b9dbb Reported-by: Hengqi Chen Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/kernel/stacktrace.c | 2 +- arch/loongarch/kernel/unwind_guess.c | 1 - arch/loongarch/kernel/unwind_prologue.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 17fde4891397..55ae37018fff 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -63,7 +63,7 @@ static void save_context_stack(struct task_struct *tsk, regs->regs[22] = 0; for (unwind_start(&state, tsk, regs); - !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !fn(trace, addr)) return; diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index 9c5fea318bdf..a3c47a9e2d8c 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -44,7 +44,6 @@ bool unwind_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); - state->error = true; return false; } EXPORT_SYMBOL_GPL(unwind_next_frame); diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 4fe1f4de4135..8fcaa83c5600 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -172,7 +172,7 @@ bool unwind_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); out: - state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } EXPORT_SYMBOL_GPL(unwind_next_frame); -- Gitee From f132f0a2b41637bc080dab9c38100f931da69a93 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Mon, 4 Dec 2023 05:37:45 +0000 Subject: [PATCH 185/241] LoongArch: Preserve syscall nr across execve() Currently, we store syscall nr in pt_regs::regs[11] and syscall execve() accidentally overrides it during its execution: sys_execve() -> do_execve() -> do_execveat_common() -> bprm_execve() -> exec_binprm() -> search_binary_handler() -> load_elf_binary() -> ELF_PLAT_INIT() ELF_PLAT_INIT() reset regs[11] to 0, so in syscall_exit_to_user_mode() we later get a wrong syscall nr. This breaks tools like execsnoop since it relies on execve() tracepoints. Skip pt_regs::regs[11] reset in ELF_PLAT_INIT() to fix the issue. Change-Id: I93caaa4f5c2632c2f3d547604da29c9bf6a0ab47 Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 97427d4fa531..32511237d884 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -296,7 +296,7 @@ extern const char *__elf_platform; #define ELF_PLAT_INIT(_r, load_addr) do { \ _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ - _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \ _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ -- Gitee From b29d0b37ad30d5879dd9398a81d10ee930726894 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 9 Jan 2024 20:25:25 +0800 Subject: [PATCH 186/241] LoongArch: Add a missing call to efi_esrt_init() ESRT (EFI System Resource Table) is needed for UEFI's "Capsule Update" feature. But ESRT initialization is missing on LoongArch now, so add a call to efi_esrt_init() at the end of efi_init(). Change-Id: If716193d34e12f2f7b181ca0363d74c669b55a9a Signed-off-by: Huacai Chen --- arch/loongarch/kernel/efi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 2ea536621120..2f1d7ac5dddf 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -120,4 +120,6 @@ void __init efi_init(void) early_memunmap(tbl, sizeof(*tbl)); } + + efi_esrt_init(); } -- Gitee From 2020439f3242e19d19b3cdee0f3318134ebafb2b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 8 Dec 2023 23:14:33 +0800 Subject: [PATCH 187/241] LoongArch: Change SHMLBA from SZ_64K to PAGE_SIZE LoongArch has hardware page coloring for L1 Cache, so we don't have cache aliases. But SFB (Store Fill Buffer) still has aliases. So we define SHMLBA to SZ_64K previously. But there are losts of applications use PAGE_SIZE rather than SHMLBA to mmap() file pages and shared pages. Of course we can fix them one by one, but not easy. On the other hand, we can simply disable SFB for 4KB page size to fix cache alias (there will be performance decrease, but acceptable), and in future we will fix SFB in hardware. So we can safely define SHMLBA to PAGE_SIZE (use the generic shmparam.h) to make life easier. Change-Id: I08402b574661fcc1f92fb58b2465d0dd4be31890 Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/shmparam.h | 13 ------------- arch/loongarch/kernel/head.S | 10 ++++++++++ 2 files changed, 10 insertions(+), 13 deletions(-) delete mode 100644 arch/loongarch/include/asm/shmparam.h diff --git a/arch/loongarch/include/asm/shmparam.h b/arch/loongarch/include/asm/shmparam.h deleted file mode 100644 index 0a4af240090c..000000000000 --- a/arch/loongarch/include/asm/shmparam.h +++ /dev/null @@ -1,13 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#ifndef _ASM_SHMPARAM_H -#define _ASM_SHMPARAM_H - -#define __ARCH_FORCE_SHMLBA 1 - -#define SHMLBA SZ_64K /* attach addr a multiple of this */ - -#endif /* _ASM_SHMPARAM_H */ diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 908a225b190d..46be1c3ec1a2 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -52,6 +52,11 @@ SYM_CODE_START(kernel_entry) # kernel entry point la.pcrel t0, fw_arg2 st.d a2, t0, 0 +#ifdef CONFIG_PAGE_SIZE_4KB + li.d t0, 0 + li.d t1, CSR_STFILL + csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1 +#endif /* KSave3 used for percpu base, initialized as 0 */ csrwr zero, PERCPU_BASE_KS /* GPR21 used for percpu base (runtime), initialized as 0 */ @@ -109,6 +114,11 @@ SYM_CODE_START(smpboot_entry) JUMP_VIRT_ADDR t0, t1 +#ifdef CONFIG_PAGE_SIZE_4KB + li.d t0, 0 + li.d t1, CSR_STFILL + csrxchg t0, t1, LOONGARCH_CSR_IMPCTL1 +#endif /* Enable PG */ li.w t0, 0xb0 # PLV=0, IE=0, PG=1 csrwr t0, LOONGARCH_CSR_CRMD -- Gitee From 9ba000ce53ac49158cdb327e6332e2c96738cad0 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 2 Jan 2024 20:37:07 +0800 Subject: [PATCH 188/241] LoongArch: Fix and simplify fcsr initialization on execve() There has been a lingering bug in LoongArch Linux systems causing some GCC tests to intermittently fail (see Closes link). I've made a minimal reproducer: zsh% cat measure.s .align 4 .globl _start _start: movfcsr2gr $a0, $fcsr0 bstrpick.w $a0, $a0, 16, 16 beqz $a0, .ok break 0 .ok: li.w $a7, 93 syscall 0 zsh% cc mesaure.s -o measure -nostdlib zsh% echo $((1.0/3)) 0.33333333333333331 zsh% while ./measure; do ; done This while loop should not stop as POSIX is clear that execve must set fenv to the default, where FCSR should be zero. But in fact it will just stop after running for a while (normally less than 30 seconds). Note that "$((1.0/3))" is needed to reproduce this issue because it raises FE_INVALID and makes fcsr0 non-zero. The problem is we are currently relying on SET_PERSONALITY2() to reset current->thread.fpu.fcsr. But SET_PERSONALITY2() is executed before start_thread which calls lose_fpu(0). We can see if kernel preempt is enabled, we may switch to another thread after SET_PERSONALITY2() but before lose_fpu(0). Then bad thing happens: during the thread switch the value of the fcsr0 register is stored into current->thread.fpu.fcsr, making it dirty again. The issue can be fixed by setting current->thread.fpu.fcsr after lose_fpu(0) because lose_fpu() clears TIF_USEDFPU, then the thread switch won't touch current->thread.fpu.fcsr. The only other architecture setting FCSR in SET_PERSONALITY2() is MIPS. I've ran a similar test on MIPS with mainline kernel and it turns out MIPS is buggy, too. Anyway MIPS do this for supporting different FP flavors (NaN encodings, etc.) which do not exist on LoongArch. So for LoongArch, we can simply remove the current->thread.fpu.fcsr setting from SET_PERSONALITY2() and do it in start_thread(), after lose_fpu(0). The while loop failing with the mainline kernel has survived one hour after this change on LoongArch. Change-Id: Ie13532f1e591dab35abd9e81d4e5fdc5387e5e2d Fixes: 803b0fc5c3f2baa ("LoongArch: Add process management") Closes: https://github.com/loongson-community/discussions/issues/7 Link: https://lore.kernel.org/linux-mips/7a6aa1bbdbbe2e63ae96ff163fab0349f58f1b9e.camel@xry111.site/ Cc: stable@vger.kernel.org Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 5 ----- arch/loongarch/kernel/elf.c | 5 ----- arch/loongarch/kernel/process.c | 1 + 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index 32511237d884..2715cfc10b30 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -246,8 +246,6 @@ void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs); do { \ current->thread.vdso = &vdso_info; \ \ - loongarch_set_personality_fcsr(state); \ - \ if (personality(current->personality) != PER_LINUX) \ set_personality(PER_LINUX); \ } while (0) @@ -264,7 +262,6 @@ do { \ clear_thread_flag(TIF_32BIT_ADDR); \ \ current->thread.vdso = &vdso_info; \ - loongarch_set_personality_fcsr(state); \ \ p = personality(current->personality); \ if (p != PER_LINUX32 && p != PER_LINUX) \ @@ -343,6 +340,4 @@ extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf, extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr, struct arch_elf_state *state); -extern void loongarch_set_personality_fcsr(struct arch_elf_state *state); - #endif /* _ASM_ELF_H */ diff --git a/arch/loongarch/kernel/elf.c b/arch/loongarch/kernel/elf.c index 900e8d377b7a..9f4d0a8b1168 100644 --- a/arch/loongarch/kernel/elf.c +++ b/arch/loongarch/kernel/elf.c @@ -28,8 +28,3 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr, { return 0; } - -void loongarch_set_personality_fcsr(struct arch_elf_state *state) -{ - current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; -} diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 3702a80cf2e7..2984d18a28a2 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -71,6 +71,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) euen = regs->csr_euen & ~(CSR_EUEN_FPEN); regs->csr_euen = euen; lose_fpu(0); + current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; clear_thread_flag(TIF_LSX_CTX_LIVE); clear_thread_flag(TIF_LASX_CTX_LIVE); -- Gitee From 808cc30c1b843a96ceac2eae780643906a3e4b38 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 25 Jan 2024 15:30:37 +0800 Subject: [PATCH 189/241] LoongArch/smp: Call rcutree_report_cpu_starting() at tlb_init() Machines which have more than 8 nodes fail to boot SMP after commit a2ccf46333d7b2cf96 ("LoongArch/smp: Call rcutree_report_cpu_starting() earlier"). Because such machines use tlb-based per-cpu base address rather than dmw-based per-cpu base address, resulting per-cpu variables can only be accessed after tlb_init(). But rcutree_report_cpu_starting() is now called before tlb_init() and accesses per-cpu variables indeed. Since the original patch want to avoid the lockdep warning caused by page allocation in tlb_init(), we can move rcutree_report_cpu_starting() to tlb_init() where after tlb exception configuration but before page allocation. Change-Id: Iafc59656b0b457721271fd8d832e2d936f8b7281 Signed-off-by: Huacai Chen --- arch/loongarch/kernel/smp.c | 1 - arch/loongarch/mm/tlb.c | 16 ++++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 0e2aa8d337d4..71eabb526f01 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -134,7 +134,6 @@ asmlinkage void start_secondary(void) sync_counter(); cpu = raw_smp_processor_id(); set_my_cpu_offset(per_cpu_offset(cpu)); - rcu_cpu_starting(cpu); cpu_probe(); constant_clockevent_init(); diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 2edb1d7fdd74..108fe47257e5 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -283,12 +283,16 @@ static void setup_tlb_handler(int cpu) set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); - } + } else { + int vec_sz __maybe_unused; + void *addr __maybe_unused; + struct page *page __maybe_unused; + + /* Avoid lockdep warning */ + rcu_cpu_starting(cpu); + #ifdef CONFIG_NUMA - else { - void *addr; - struct page *page; - const int vec_sz = sizeof(exception_handlers); + vec_sz = sizeof(exception_handlers); if (pcpu_handlers[cpu]) return; @@ -304,8 +308,8 @@ static void setup_tlb_handler(int cpu) csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY); csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY); csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY); - } #endif + } } void tlb_init(int cpu) -- Gitee From 92abe13b6c88898c259ce5a73a4a872f03177632 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 4 Feb 2024 22:49:46 +0900 Subject: [PATCH 190/241] LoongArch: Select HAVE_ARCH_SECCOMP to use the common SECCOMP menu LoongArch missed the refactoring made by commit 282a181b1a0d ("seccomp: Move config option SECCOMP to arch/Kconfig") because LoongArch was not mainlined at that time. The 'depends on PROC_FS' statement is stale as described in that commit. Select HAVE_ARCH_SECCOMP, and remove the duplicated config entry. Change-Id: I4d709d4e93c2c516c23d8815abb7ad2523e0a904 Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 362957502d52..d2ca36ad907f 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -76,6 +76,7 @@ config LOONGARCH select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE @@ -715,23 +716,6 @@ config ARCH_STRICT_ALIGN to run kernel only on systems with h/w unaligned access support in order to optimise for performance. -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu config ARCH_ENABLE_MEMORY_HOTPLUG -- Gitee From 960ea82334773bdd91eb338259ec3bef4aa55831 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 6 Feb 2024 09:42:02 +0800 Subject: [PATCH 191/241] LoongArch: Change acpi_core_pic[NR_CPUS] to acpi_core_pic[MAX_CORE_PIC] With default config, the value of NR_CPUS is 64. When HW platform has more then 64 cpus, system will crash on these platforms. MAX_CORE_PIC is the maximum cpu number in MADT table (max physical number) which can exceed the supported maximum cpu number (NR_CPUS, max logical number), but kernel should not crash. Kernel should boot cpus with NR_CPUS, let the remainder cpus stay in BIOS. The potential crash reason is that the array acpi_core_pic[NR_CPUS] can be overflowed when parsing MADT table, and it is obvious that CORE_PIC should be corresponding to physical core rather than logical core, so it is better to define the array as acpi_core_pic[MAX_CORE_PIC]. With the patch, system can boot up 64 vcpus with qemu parameter -smp 128, otherwise system will crash with the following message. [ 0.000000] CPU 0 Unable to handle kernel paging request at virtual address 0000420000004259, era == 90000000037a5f0c, ra == 90000000037a46ec [ 0.000000] Oops[#1]: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.8.0-rc2+ #192 [ 0.000000] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 0.000000] pc 90000000037a5f0c ra 90000000037a46ec tp 9000000003c90000 sp 9000000003c93d60 [ 0.000000] a0 0000000000000019 a1 9000000003d93bc0 a2 0000000000000000 a3 9000000003c93bd8 [ 0.000000] a4 9000000003c93a74 a5 9000000083c93a67 a6 9000000003c938f0 a7 0000000000000005 [ 0.000000] t0 0000420000004201 t1 0000000000000000 t2 0000000000000001 t3 0000000000000001 [ 0.000000] t4 0000000000000003 t5 0000000000000000 t6 0000000000000030 t7 0000000000000063 [ 0.000000] t8 0000000000000014 u0 ffffffffffffffff s9 0000000000000000 s0 9000000003caee98 [ 0.000000] s1 90000000041b0480 s2 9000000003c93da0 s3 9000000003c93d98 s4 9000000003c93d90 [ 0.000000] s5 9000000003caa000 s6 000000000a7fd000 s7 000000000f556b60 s8 000000000e0a4330 [ 0.000000] ra: 90000000037a46ec platform_init+0x214/0x250 [ 0.000000] ERA: 90000000037a5f0c efi_runtime_init+0x30/0x94 [ 0.000000] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 0.000000] PRMD: 00000000 (PPLV0 -PIE -PWE) [ 0.000000] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 0.000000] ECFG: 00070800 (LIE=11 VS=7) [ 0.000000] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 0.000000] BADV: 0000420000004259 [ 0.000000] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 0.000000] Modules linked in: [ 0.000000] Process swapper (pid: 0, threadinfo=(____ptrval____), task=(____ptrval____)) [ 0.000000] Stack : 9000000003c93a14 9000000003800898 90000000041844f8 90000000037a46ec [ 0.000000] 000000000a7fd000 0000000008290000 0000000000000000 0000000000000000 [ 0.000000] 0000000000000000 0000000000000000 00000000019d8000 000000000f556b60 [ 0.000000] 000000000a7fd000 000000000f556b08 9000000003ca7700 9000000003800000 [ 0.000000] 9000000003c93e50 9000000003800898 9000000003800108 90000000037a484c [ 0.000000] 000000000e0a4330 000000000f556b60 000000000a7fd000 000000000f556b08 [ 0.000000] 9000000003ca7700 9000000004184000 0000000000200000 000000000e02b018 [ 0.000000] 000000000a7fd000 90000000037a0790 9000000003800108 0000000000000000 [ 0.000000] 0000000000000000 000000000e0a4330 000000000f556b60 000000000a7fd000 [ 0.000000] 000000000f556b08 000000000eaae298 000000000eaa5040 0000000000200000 [ 0.000000] ... [ 0.000000] Call Trace: [ 0.000000] [<90000000037a5f0c>] efi_runtime_init+0x30/0x94 [ 0.000000] [<90000000037a46ec>] platform_init+0x214/0x250 [ 0.000000] [<90000000037a484c>] setup_arch+0x124/0x45c [ 0.000000] [<90000000037a0790>] start_kernel+0x90/0x670 [ 0.000000] [<900000000378b0d8>] kernel_entry+0xd8/0xdc Change-Id: I7a93a24d3b19c288fb8c9008f58568711705581f Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/acpi.h | 4 +++- arch/loongarch/kernel/acpi.c | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h index 93bc0ebf303b..e76f5bd9b7f3 100644 --- a/arch/loongarch/include/asm/acpi.h +++ b/arch/loongarch/include/asm/acpi.h @@ -34,8 +34,10 @@ static inline bool acpi_has_cpu_in_madt(void) return true; } +#define MAX_CORE_PIC 256 + extern struct list_head acpi_wakeup_device_list; -extern struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; +extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; extern int __init parse_acpi_topology(void); diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index ea72ff898d6f..f44435ec1d6e 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -29,11 +29,9 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_LPIC; u64 acpi_saved_sp; -#define MAX_CORE_PIC 256 - #define PREFIX "ACPI: " -struct acpi_madt_core_pic acpi_core_pic[NR_CPUS]; +struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC]; int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) { -- Gitee From 0ce0b4d210f07c7deec7f54d3c27e7caa7852eff Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 22 Feb 2024 15:03:44 +0800 Subject: [PATCH 192/241] LoongArch: Disable IRQ before init_fn() for nonboot CPUs Disable IRQ before init_fn() for nonboot CPUs when hotplug, in order to silence such warnings (and also avoid potential errors due to unexpected interrupts): WARNING: CPU: 1 PID: 0 at kernel/rcu/tree.c:4503 rcu_cpu_starting+0x214/0x280 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 pc 90000000048e3334 ra 90000000047bd56c tp 900000010039c000 sp 900000010039fdd0 a0 0000000000000001 a1 0000000000000006 a2 900000000802c040 a3 0000000000000000 a4 0000000000000001 a5 0000000000000004 a6 0000000000000000 a7 90000000048e3f4c t0 0000000000000001 t1 9000000005c70968 t2 0000000004000000 t3 000000000005e56e t4 00000000000002e4 t5 0000000000001000 t6 ffffffff80000000 t7 0000000000040000 t8 9000000007931638 u0 0000000000000006 s9 0000000000000004 s0 0000000000000001 s1 9000000006356ac0 s2 9000000007244000 s3 0000000000000001 s4 0000000000000001 s5 900000000636f000 s6 7fffffffffffffff s7 9000000002123940 s8 9000000001ca55f8 ra: 90000000047bd56c tlb_init+0x24c/0x528 ERA: 90000000048e3334 rcu_cpu_starting+0x214/0x280 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000000 (PPLV0 -PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071000 (LIE=12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.6.17+ #1198 Stack : 0000000000000000 9000000006375000 9000000005b61878 900000010039c000 900000010039fa30 0000000000000000 900000010039fa38 900000000619a140 9000000006456888 9000000006456880 900000010039f950 0000000000000001 0000000000000001 cb0cb028ec7e52e1 0000000002b90000 9000000100348700 0000000000000000 0000000000000001 ffffffff916d12f1 0000000000000003 0000000000040000 9000000007930370 0000000002b90000 0000000000000004 9000000006366000 900000000619a140 0000000000000000 0000000000000004 0000000000000000 0000000000000009 ffffffffffc681f2 9000000002123940 9000000001ca55f8 9000000006366000 90000000047a4828 00007ffff057ded8 00000000000000b0 0000000000000000 0000000000000000 0000000000071000 ... Call Trace: [<90000000047a4828>] show_stack+0x48/0x1a0 [<9000000005b61874>] dump_stack_lvl+0x84/0xcc [<90000000047f60ac>] __warn+0x8c/0x1e0 [<9000000005b0ab34>] report_bug+0x1b4/0x280 [<9000000005b63110>] do_bp+0x2d0/0x480 [<90000000047a2e20>] handle_bp+0x120/0x1c0 [<90000000048e3334>] rcu_cpu_starting+0x214/0x280 [<90000000047bd568>] tlb_init+0x248/0x528 [<90000000047a4c44>] per_cpu_trap_init+0x124/0x160 [<90000000047a19f4>] cpu_probe+0x494/0xa00 [<90000000047b551c>] start_secondary+0x3c/0xc0 [<9000000005b66134>] smpboot_entry+0x50/0x58 Change-Id: I93ac4fc2220fa28bdd74825a5803839473b9c6cb Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/loongson64/smp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 8d7050fdcd25..0f3b6ab639e6 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -378,6 +378,8 @@ void __noreturn arch_cpu_idle_dead(void) [count] "=&r" (count), [init_fn] "=&r" (addr) : /* No Input */ : "a1"); + + local_irq_disable(); init_fn = __va(addr); init_fn(); -- Gitee From 2f50811c3bc11864c437debbd3099ac0b3d23a7a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 23 Feb 2024 14:04:21 +0800 Subject: [PATCH 193/241] LoongArch: Update cpu_sibling_map when disabling nonboot CPUs Update cpu_sibling_map when disabling nonboot CPUs by defining & calling clear_cpu_sibling_map(), otherwise we get such errors on SMT systems: jump label: negative count! WARNING: CPU: 6 PID: 45 at kernel/jump_label.c:263 __static_key_slow_dec_cpuslocked+0xec/0x100 CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 pc 90000000004c302c ra 90000000004c302c tp 90000001005bc000 sp 90000001005bfd20 a0 000000000000001b a1 900000000224c278 a2 90000001005bfb58 a3 900000000224c280 a4 900000000224c278 a5 90000001005bfb50 a6 0000000000000001 a7 0000000000000001 t0 ce87a4763eb5234a t1 ce87a4763eb5234a t2 0000000000000000 t3 0000000000000000 t4 0000000000000006 t5 0000000000000000 t6 0000000000000064 t7 0000000000001964 t8 000000000009ebf6 u0 9000000001f2a068 s9 0000000000000000 s0 900000000246a2d8 s1 ffffffffffffffff s2 ffffffffffffffff s3 90000000021518c0 s4 0000000000000040 s5 9000000002151058 s6 9000000009828e40 s7 00000000000000b4 s8 0000000000000006 ra: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 ERA: 90000000004c302c __static_key_slow_dec_cpuslocked+0xec/0x100 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1c (LIE=2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014d000 (Loongson-64bit, Loongson-3A6000-HV) CPU: 6 PID: 45 Comm: cpuhp/6 Not tainted 6.8.0-rc5+ #1340 Stack : 0000000000000000 900000000203f258 900000000179afc8 90000001005bc000 90000001005bf980 0000000000000000 90000001005bf988 9000000001fe0be0 900000000224c280 900000000224c278 90000001005bf8c0 0000000000000001 0000000000000001 ce87a4763eb5234a 0000000007f38000 90000001003f8cc0 0000000000000000 0000000000000006 0000000000000000 4c206e6f73676e6f 6f4c203a656d616e 000000000009ec99 0000000007f38000 0000000000000000 900000000214b000 9000000001fe0be0 0000000000000004 0000000000000000 0000000000000107 0000000000000009 ffffffffffafdabe 00000000000000b4 0000000000000006 90000000004c302c 9000000000224528 00005555939a0c7c 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c ... Call Trace: [<9000000000224528>] show_stack+0x48/0x1a0 [<900000000179afc8>] dump_stack_lvl+0x78/0xa0 [<9000000000263ed0>] __warn+0x90/0x1a0 [<90000000017419b8>] report_bug+0x1b8/0x280 [<900000000179c564>] do_bp+0x264/0x420 [<90000000004c302c>] __static_key_slow_dec_cpuslocked+0xec/0x100 [<90000000002b4d7c>] sched_cpu_deactivate+0x2fc/0x300 [<9000000000266498>] cpuhp_invoke_callback+0x178/0x8a0 [<9000000000267f70>] cpuhp_thread_fun+0xf0/0x240 [<90000000002a117c>] smpboot_thread_fn+0x1dc/0x2e0 [<900000000029a720>] kthread+0x140/0x160 [<9000000000222288>] ret_from_kernel_thread+0xc/0xa4 Change-Id: I7b0379842d41a50f78a315152e59f255bc8219fa Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/smp.h | 2 ++ arch/loongarch/kernel/smp.c | 30 ++++++++++++++++++++++-------- arch/loongarch/loongson64/smp.c | 1 + 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 9cba390dfdfe..eb0d6e8c2f5c 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -97,6 +97,8 @@ extern struct secondary_data cpuboot_data; extern asmlinkage void smpboot_entry(void); extern asmlinkage void start_secondary(void); +extern void set_cpu_sibling_map(int cpu); +extern void clear_cpu_sibling_map(int cpu); extern void calculate_cpu_foreign_map(void); /* diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 71eabb526f01..96c62520f5f4 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -59,7 +59,21 @@ static cpumask_t cpu_sibling_setup_map; /* representing cpus for which core maps can be computed */ static cpumask_t cpu_core_setup_map; -static inline void set_cpu_sibling_map(int cpu) +static inline void set_cpu_core_map(int cpu) +{ + int i; + + cpumask_set_cpu(cpu, &cpu_core_setup_map); + + for_each_cpu(i, &cpu_core_setup_map) { + if (cpu_data[cpu].package == cpu_data[i].package) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } + } +} + +void set_cpu_sibling_map(int cpu) { int i; @@ -73,18 +87,18 @@ static inline void set_cpu_sibling_map(int cpu) } } -static inline void set_cpu_core_map(int cpu) +void clear_cpu_sibling_map(int cpu) { int i; - cpumask_set_cpu(cpu, &cpu_core_setup_map); - - for_each_cpu(i, &cpu_core_setup_map) { - if (cpu_data[cpu].package == cpu_data[i].package) { - cpumask_set_cpu(i, &cpu_core_map[cpu]); - cpumask_set_cpu(cpu, &cpu_core_map[i]); + for_each_cpu(i, &cpu_sibling_setup_map) { + if (cpus_are_siblings(cpu, i)) { + cpumask_clear_cpu(i, &cpu_sibling_map[cpu]); + cpumask_clear_cpu(cpu, &cpu_sibling_map[i]); } } + + cpumask_clear_cpu(cpu, &cpu_sibling_setup_map); } /* diff --git a/arch/loongarch/loongson64/smp.c b/arch/loongarch/loongson64/smp.c index 0f3b6ab639e6..9f7df4236af3 100644 --- a/arch/loongarch/loongson64/smp.c +++ b/arch/loongarch/loongson64/smp.c @@ -329,6 +329,7 @@ static int loongson3_cpu_disable(void) numa_remove_cpu(cpu); #endif set_cpu_online(cpu, false); + clear_cpu_sibling_map(cpu); calculate_cpu_foreign_map(); local_irq_save(flags); irq_migrate_all_off_this_cpu(); -- Gitee From ab3041dee8a6452ea71517cc2e1cfcfeba3900a4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 194/241] LoongArch: Add Loongson-3 default config file Change-Id: I5b8932df7e8cf0361d39cbac6f5362fcfda10514 Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 + arch/loongarch/configs/loongson3_defconfig | 785 +++++++++++++++++++++ 2 files changed, 787 insertions(+) create mode 100644 arch/loongarch/configs/loongson3_defconfig diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 1b82b20e83f4..13eaa36b0949 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -7,6 +7,8 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/loongarch/tools elf-entry $(Q)$(MAKE) $(build)=arch/loongarch/boot/tools relocs +KBUILD_DEFCONFIG := loongson3_defconfig + # # Select the object file format to substitute into the linker script. # diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig new file mode 100644 index 000000000000..1acf3c3ba153 --- /dev/null +++ b/arch/loongarch/configs/loongson3_defconfig @@ -0,0 +1,785 @@ +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_LZMA=y +CONFIG_SYSVIPC=y +CONFIG_LOONGARCH=y +CONFIG_MACH_LOONGSON64=y +CONFIG_CPU_LOONGSON64=y +CONFIG_64BIT=y +CONFIG_DMI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_NUMA_BALANCING=y +CONFIG_MEMCG=y +CONFIG_BLK_CGROUP=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +CONFIG_USER_NS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BPF_SYSCALL=y +CONFIG_EXPERT=y +CONFIG_USERFAULTFD=y +CONFIG_PERF_EVENTS=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PAGE_SIZE_16KB=y +CONFIG_CPU_HAS_LSX=y +CONFIG_CPU_HAS_LASX=y +CONFIG_NUMA=y +CONFIG_EFI=y +CONFIG_SMP=y +CONFIG_NR_CPUS=64 +CONFIG_HZ_250=y +CONFIG_KEXEC=y +CONFIG_HIBERNATION=y +CONFIG_ACPI=y +CONFIG_ACPI_SPCR_TABLE=y +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_TAD=y +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_IPMI=m +CONFIG_ACPI_PCI_SLOT=y +CONFIG_ACPI_HOTPLUG_MEMORY=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y +CONFIG_EFI_CAPSULE_LOADER=m +CONFIG_EFI_TEST=m +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_IOSCHED_BFQ=y +CONFIG_BFQ_GROUP_IOSCHED=y +CONFIG_BINFMT_MISC=m +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_FRONTSWAP=y +CONFIG_ZSWAP=y +CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y +CONFIG_ZPOOL=y +CONFIG_ZBUD=y +CONFIG_Z3FOLD=y +CONFIG_ZSMALLOC=m +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_XFRM_USER=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NET_IPIP=m +CONFIG_IP_MROUTE=y +CONFIG_INET_ESP=m +CONFIG_INET_UDP_DIAG=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_BBR=m +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_MROUTE=y +CONFIG_NETWORK_PHY_TIMESTAMPING=y +CONFIG_NETFILTER=y +CONFIG_BRIDGE_NETFILTER=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_LOG_NETDEV=m +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_CONNLIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_NAT=m +CONFIG_NFT_TUNNEL=m +CONFIG_NFT_OBJREF=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REJECT=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_HASH=m +CONFIG_NFT_SOCKET=m +CONFIG_NFT_OSF=m +CONFIG_NFT_TPROXY=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LED=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CGROUP=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_SOCKET=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_NFCT=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_DUP_IPV4=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NF_TABLES_ARP=y +CONFIG_NF_LOG_ARP=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_SYNPROXY=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_SRH=m +CONFIG_IP6_NF_FILTER=y +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_SYNPROXY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_IP6=m +CONFIG_BPFILTER=y +CONFIG_IP_SCTP=m +CONFIG_RDS=y +CONFIG_L2TP=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y +CONFIG_VLAN_8021Q_MVRP=y +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_CGROUP=m +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_BPF=m +CONFIG_OPENVSWITCH=m +CONFIG_NETLINK_DIAG=y +CONFIG_CGROUP_NET_PRIO=y +CONFIG_BT=m +CONFIG_BT_HCIBTUSB=m +# CONFIG_BT_HCIBTUSB_BCM is not set +CONFIG_CFG80211=m +CONFIG_CFG80211_WEXT=y +CONFIG_MAC80211=m +CONFIG_RFKILL=m +CONFIG_RFKILL_INPUT=y +CONFIG_NET_9P=y +CONFIG_CEPH_LIB=m +CONFIG_PCIEPORTBUS=y +CONFIG_HOTPLUG_PCI_PCIE=y +CONFIG_PCIEAER=y +# CONFIG_PCIEASPM is not set +CONFIG_PCI_IOV=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_SHPC=y +CONFIG_PCCARD=m +CONFIG_YENTA=m +CONFIG_RAPIDIO=y +CONFIG_RAPIDIO_TSI721=y +CONFIG_RAPIDIO_ENABLE_RX_TX_PORTS=y +CONFIG_RAPIDIO_ENUM_BASIC=m +CONFIG_RAPIDIO_CHMAN=m +CONFIG_RAPIDIO_MPORT_CDEV=m +CONFIG_UEVENT_HELPER=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MTD=m +CONFIG_MTD_BLOCK=m +CONFIG_MTD_CFI=m +CONFIG_MTD_JEDECPROBE=m +CONFIG_MTD_CFI_INTELEXT=m +CONFIG_MTD_CFI_AMDSTD=m +CONFIG_MTD_CFI_STAA=m +CONFIG_MTD_RAM=m +CONFIG_MTD_ROM=m +CONFIG_PARPORT=y +CONFIG_PARPORT_PC=y +CONFIG_PARPORT_SERIAL=y +CONFIG_PARPORT_PC_FIFO=y +CONFIG_ZRAM=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=y +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=y +CONFIG_EEPROM_AT24=m +CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_FC_ATTRS=m +CONFIG_SCSI_SAS_ATA=y +CONFIG_ISCSI_TCP=m +CONFIG_SCSI_MVSAS=y +# CONFIG_SCSI_MVSAS_DEBUG is not set +CONFIG_SCSI_MVSAS_TASKLET=y +CONFIG_SCSI_MVUMI=y +CONFIG_MEGARAID_NEWGEN=y +CONFIG_MEGARAID_MM=y +CONFIG_MEGARAID_MAILBOX=y +CONFIG_MEGARAID_LEGACY=y +CONFIG_MEGARAID_SAS=y +CONFIG_SCSI_MPT2SAS=y +CONFIG_LIBFC=m +CONFIG_LIBFCOE=m +CONFIG_FCOE=m +CONFIG_SCSI_QLOGIC_1280=m +CONFIG_SCSI_QLA_FC=m +CONFIG_TCM_QLA2XXX=m +CONFIG_SCSI_QLA_ISCSI=m +CONFIG_SCSI_LPFC=m +CONFIG_ATA=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_PATA_ATIIXP=y +CONFIG_PATA_PCMCIA=m +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_RAID456=m +CONFIG_MD_MULTIPATH=m +CONFIG_BCACHE=m +CONFIG_BLK_DEV_DM=y +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m +CONFIG_DM_CACHE=m +CONFIG_DM_WRITECACHE=m +CONFIG_DM_MIRROR=m +CONFIG_DM_RAID=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_MULTIPATH_QL=m +CONFIG_DM_MULTIPATH_ST=m +CONFIG_TARGET_CORE=m +CONFIG_TCM_IBLOCK=m +CONFIG_TCM_FILEIO=m +CONFIG_TCM_PSCSI=m +CONFIG_TCM_USER2=m +CONFIG_LOOPBACK_TARGET=m +CONFIG_ISCSI_TARGET=m +CONFIG_NETDEVICES=y +CONFIG_BONDING=m +CONFIG_DUMMY=y +CONFIG_WIREGUARD=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_IPVLAN=m +CONFIG_VXLAN=y +CONFIG_RIONET=m +CONFIG_TUN=m +CONFIG_VETH=m +# CONFIG_NET_VENDOR_3COM is not set +# CONFIG_NET_VENDOR_ADAPTEC is not set +# CONFIG_NET_VENDOR_AGERE is not set +# CONFIG_NET_VENDOR_ALACRITECH is not set +# CONFIG_NET_VENDOR_ALTEON is not set +# CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_ATHEROS is not set +CONFIG_BNX2=y +# CONFIG_NET_VENDOR_BROCADE is not set +# CONFIG_NET_VENDOR_CAVIUM is not set +CONFIG_CHELSIO_T1=m +CONFIG_CHELSIO_T1_1G=y +CONFIG_CHELSIO_T3=m +CONFIG_CHELSIO_T4=m +# CONFIG_NET_VENDOR_CIRRUS is not set +# CONFIG_NET_VENDOR_CISCO is not set +# CONFIG_NET_VENDOR_DEC is not set +# CONFIG_NET_VENDOR_DLINK is not set +# CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_EZCHIP is not set +# CONFIG_NET_VENDOR_I825XX is not set +CONFIG_E1000=y +CONFIG_E1000E=y +CONFIG_IGB=y +CONFIG_IXGB=y +CONFIG_IXGBE=y +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MELLANOX is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_MYRI is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NVIDIA is not set +# CONFIG_NET_VENDOR_OKI is not set +# CONFIG_NET_VENDOR_QLOGIC is not set +# CONFIG_NET_VENDOR_QUALCOMM is not set +# CONFIG_NET_VENDOR_RDC is not set +CONFIG_8139CP=m +CONFIG_8139TOO=m +CONFIG_R8169=y +# CONFIG_NET_VENDOR_RENESAS is not set +# CONFIG_NET_VENDOR_ROCKER is not set +# CONFIG_NET_VENDOR_SAMSUNG is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_SOLARFLARE is not set +# CONFIG_NET_VENDOR_SILAN is not set +# CONFIG_NET_VENDOR_SIS is not set +# CONFIG_NET_VENDOR_SMSC is not set +CONFIG_STMMAC_ETH=y +# CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_TEHUTI is not set +# CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +# CONFIG_NET_VENDOR_XILINX is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y +CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOE=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_USB_RTL8150=m +CONFIG_USB_RTL8152=m +# CONFIG_USB_NET_AX8817X is not set +# CONFIG_USB_NET_AX88179_178A is not set +CONFIG_USB_NET_CDC_EEM=m +CONFIG_USB_NET_HUAWEI_CDC_NCM=m +CONFIG_USB_NET_CDC_MBIM=m +# CONFIG_USB_NET_NET1080 is not set +# CONFIG_USB_BELKIN is not set +# CONFIG_USB_ARMLINUX is not set +# CONFIG_USB_NET_ZAURUS is not set +CONFIG_ATH9K=m +CONFIG_ATH9K_HTC=m +CONFIG_IWLWIFI=m +CONFIG_IWLDVM=m +CONFIG_IWLMVM=m +CONFIG_IWLWIFI_BCAST_FILTERING=y +CONFIG_HOSTAP=m +CONFIG_MT7601U=m +CONFIG_RT2X00=m +CONFIG_RT2800USB=m +CONFIG_RTL8192CE=m +CONFIG_RTL8192SE=m +CONFIG_RTL8192DE=m +CONFIG_RTL8723AE=m +CONFIG_RTL8723BE=m +CONFIG_RTL8188EE=m +CONFIG_RTL8192EE=m +CONFIG_RTL8821AE=m +CONFIG_RTL8192CU=m +# CONFIG_RTLWIFI_DEBUG is not set +CONFIG_RTL8XXXU=m +CONFIG_ZD1211RW=m +CONFIG_USB_NET_RNDIS_WLAN=m +CONFIG_INPUT_POLLDEV=m +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_EVDEV=y +CONFIG_KEYBOARD_XTKBD=m +CONFIG_MOUSE_PS2_ELANTECH=y +CONFIG_MOUSE_PS2_SENTELIC=y +CONFIG_MOUSE_SERIAL=m +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=m +CONFIG_SERIO_SERPORT=m +CONFIG_SERIO_RAW=m +CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=16 +CONFIG_SERIAL_8250_RUNTIME_UARTS=16 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_PRINTER=m +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_SI=m +CONFIG_HW_RANDOM=y +CONFIG_RAW_DRIVER=m +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_PIIX4=y +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_LOONGSON=y +CONFIG_SENSORS_LM75=m +CONFIG_SENSORS_LM93=m +CONFIG_SENSORS_W83795=m +CONFIG_SENSORS_W83627HF=m +CONFIG_RC_CORE=m +CONFIG_LIRC=y +CONFIG_RC_DECODERS=y +CONFIG_IR_NEC_DECODER=m +CONFIG_IR_RC5_DECODER=m +CONFIG_IR_RC6_DECODER=m +CONFIG_IR_JVC_DECODER=m +CONFIG_IR_SONY_DECODER=m +CONFIG_IR_SANYO_DECODER=m +CONFIG_IR_SHARP_DECODER=m +CONFIG_IR_MCE_KBD_DECODER=m +CONFIG_IR_XMP_DECODER=m +CONFIG_IR_IMON_DECODER=m +CONFIG_MEDIA_SUPPORT=m +CONFIG_MEDIA_USB_SUPPORT=y +CONFIG_USB_VIDEO_CLASS=m +CONFIG_MEDIA_PCI_SUPPORT=y +CONFIG_VIDEO_BT848=m +CONFIG_DVB_BT8XX=m +CONFIG_DRM=y +CONFIG_DRM_RADEON=m +CONFIG_DRM_RADEON_USERPTR=y +CONFIG_DRM_AMDGPU=m +CONFIG_DRM_AMDGPU_SI=y +CONFIG_DRM_AMDGPU_CIK=y +CONFIG_DRM_AMDGPU_USERPTR=y +CONFIG_DRM_AST=y +CONFIG_FB_EFI=y +CONFIG_FB_RADEON=y +CONFIG_LCD_PLATFORM=m +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_LOGO=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +# CONFIG_SND_ISA is not set +CONFIG_SND_BT87X=m +CONFIG_SND_BT87X_OVERCLOCK=y +CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_HWDEP=y +CONFIG_SND_HDA_INPUT_BEEP=y +CONFIG_SND_HDA_PATCH_LOADER=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SND_HDA_CODEC_SIGMATEL=y +CONFIG_SND_HDA_CODEC_HDMI=y +CONFIG_SND_HDA_CODEC_CONEXANT=y +CONFIG_SND_USB_AUDIO=m +CONFIG_HIDRAW=y +CONFIG_UHID=m +CONFIG_HID_A4TECH=m +CONFIG_HID_CHERRY=m +CONFIG_HID_LOGITECH=m +CONFIG_HID_LOGITECH_DJ=m +CONFIG_LOGITECH_FF=y +CONFIG_LOGIRUMBLEPAD2_FF=y +CONFIG_LOGIG940_FF=y +CONFIG_HID_MICROSOFT=m +CONFIG_HID_MULTITOUCH=m +CONFIG_HID_SUNPLUS=m +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_OTG=y +CONFIG_USB_MON=y +CONFIG_USB_XHCI_HCD=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_EHCI_HCD_PLATFORM=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PLATFORM=y +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +CONFIG_USB_STORAGE_REALTEK=m +CONFIG_USB_UAS=m +CONFIG_USB_DWC2=y +CONFIG_USB_DWC2_HOST=y +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_CH341=m +CONFIG_USB_SERIAL_CP210X=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_OPTION=m +CONFIG_USB_GADGET=y +CONFIG_INFINIBAND=m +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_DMADEVICES=y +CONFIG_UIO=m +CONFIG_UIO_PDRV_GENIRQ=m +CONFIG_UIO_DMEM_GENIRQ=m +CONFIG_UIO_PCI_GENERIC=m +# CONFIG_VIRTIO_MENU is not set +CONFIG_STAGING=y +CONFIG_COMEDI=m +CONFIG_COMEDI_PCI_DRIVERS=m +CONFIG_COMEDI_8255_PCI=m +CONFIG_COMEDI_ADL_PCI6208=m +CONFIG_COMEDI_ADL_PCI7X3X=m +CONFIG_COMEDI_ADL_PCI8164=m +CONFIG_COMEDI_ADL_PCI9111=m +CONFIG_COMEDI_ADL_PCI9118=m +CONFIG_COMEDI_ADV_PCI1710=m +CONFIG_COMEDI_ADV_PCI1720=m +CONFIG_COMEDI_ADV_PCI1723=m +CONFIG_COMEDI_ADV_PCI1724=m +CONFIG_COMEDI_ADV_PCI1760=m +CONFIG_COMEDI_ADV_PCI_DIO=m +CONFIG_COMEDI_NI_LABPC_PCI=m +CONFIG_COMEDI_NI_PCIDIO=m +CONFIG_COMEDI_NI_PCIMIO=m +CONFIG_R8188EU=m +# CONFIG_88EU_AP_MODE is not set +CONFIG_PM_DEVFREQ=y +CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y +CONFIG_DEVFREQ_GOV_PERFORMANCE=y +CONFIG_DEVFREQ_GOV_POWERSAVE=y +CONFIG_DEVFREQ_GOV_USERSPACE=y +CONFIG_PWM=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_BTRFS_FS=y +CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA=y +# CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=y +CONFIG_FUSE_FS=m +CONFIG_OVERLAY_FS=y +CONFIG_OVERLAY_FS_INDEX=y +CONFIG_OVERLAY_FS_XINO_AUTO=y +CONFIG_OVERLAY_FS_METACOPY=y +CONFIG_FSCACHE=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=y +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_CODEPAGE=936 +CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_CONFIGFS_FS=y +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZ4=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_V4_1=y +CONFIG_NFS_V4_2=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_BLOCKLAYOUT=y +CONFIG_CIFS=m +# CONFIG_CIFS_DEBUG is not set +CONFIG_9P_FS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_UTF8=y +CONFIG_KEY_DH_OPERATIONS=y +CONFIG_SECURITY=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_APPARMOR=y +CONFIG_SECURITY_YAMA=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_PCRYPT=m +CONFIG_CRYPTO_CRYPTD=m +CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_TGR192=m +CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_KHAZAD=m +CONFIG_CRYPTO_SALSA20=m +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_LZO=m +CONFIG_CRYPTO_842=m +CONFIG_CRYPTO_LZ4=m +CONFIG_CRYPTO_LZ4HC=m +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_USER_API_SKCIPHER=m +CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_PRINTK_TIME=y +CONFIG_STRIP_ASM_SYMS=y +CONFIG_MAGIC_SYSRQ=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_SCHEDSTATS=y +# CONFIG_DEBUG_PREEMPT is not set +# CONFIG_FTRACE is not set +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="e1000e.InterruptThrottleRate=4,4,4,4" -- Gitee From af1be4dfaa419b5a90ca315fc91ee2b7708005dc Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 2 May 2021 19:16:12 +0800 Subject: [PATCH 195/241] MAINTAINERS: Add maintainer information for LoongArch Add the maintainer information for the LoongArch (LA or LArch for short) architecture. Change-Id: Ie0bf392ea3ef0f3663ef1d638a9bede7cad85f99 Signed-off-by: Huacai Chen --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 52fd22591b1a..d45732e67141 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10282,6 +10282,14 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git F: drivers/hid/hid-lg-g15.c +LOONGARCH +M: Huacai Chen +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git +F: Documentation/loongson/ +F: arch/loongarch/ +F: drivers/platform/loongarch/ + LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Sathya Prakash M: Sreekanth Reddy -- Gitee From 076ee78b1762211f2ad191d4bd198103a57ee881 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 17 Nov 2021 15:16:39 +0800 Subject: [PATCH 196/241] tools/perf: Add basic support for LoongArch Change-Id: Iffc73abda323ad4a34623d99df6f71d35a465e85 Signed-off-by: Huacai Chen --- scripts/Makefile | 9 +- .../loongarch/include/uapi/asm/perf_regs.h | 40 +++++++++ .../arch/loongarch/include/uapi/asm/unistd.h | 22 +++++ tools/perf/Makefile.config | 12 ++- tools/perf/arch/loongarch/Build | 1 + tools/perf/arch/loongarch/Makefile | 28 ++++++ .../arch/loongarch/annotate/instructions.c | 45 ++++++++++ .../loongarch/entry/syscalls/mksyscalltbl | 60 +++++++++++++ .../arch/loongarch/include/dwarf-regs-table.h | 27 ++++++ tools/perf/arch/loongarch/include/perf_regs.h | 88 +++++++++++++++++++ tools/perf/arch/loongarch/util/Build | 5 ++ tools/perf/arch/loongarch/util/dwarf-regs.c | 55 ++++++++++++ tools/perf/arch/loongarch/util/perf_regs.c | 6 ++ tools/perf/arch/loongarch/util/unwind-libdw.c | 56 ++++++++++++ .../arch/loongarch/util/unwind-libunwind.c | 82 +++++++++++++++++ tools/perf/check-headers.sh | 1 + tools/perf/util/annotate.c | 8 ++ tools/perf/util/dwarf-regs.c | 7 ++ tools/perf/util/env.c | 2 + tools/perf/util/genelf.h | 3 + tools/perf/util/syscalltbl.c | 4 + tools/scripts/Makefile.arch | 12 ++- 22 files changed, 568 insertions(+), 5 deletions(-) create mode 100644 tools/arch/loongarch/include/uapi/asm/perf_regs.h create mode 100644 tools/arch/loongarch/include/uapi/asm/unistd.h create mode 100644 tools/perf/arch/loongarch/Build create mode 100644 tools/perf/arch/loongarch/Makefile create mode 100644 tools/perf/arch/loongarch/annotate/instructions.c create mode 100755 tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl create mode 100644 tools/perf/arch/loongarch/include/dwarf-regs-table.h create mode 100644 tools/perf/arch/loongarch/include/perf_regs.h create mode 100644 tools/perf/arch/loongarch/util/Build create mode 100644 tools/perf/arch/loongarch/util/dwarf-regs.c create mode 100644 tools/perf/arch/loongarch/util/perf_regs.c create mode 100644 tools/perf/arch/loongarch/util/unwind-libdw.c create mode 100644 tools/perf/arch/loongarch/util/unwind-libunwind.c diff --git a/scripts/Makefile b/scripts/Makefile index 9adb6d247818..5d89c2ee2748 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -24,10 +24,17 @@ HOSTCFLAGS_extract-cert.o = $(CRYPTO_CFLAGS) HOSTLDLIBS_extract-cert = $(CRYPTO_LIBS) ifdef CONFIG_UNWINDER_ORC +# Additional ARCH settings for x86 ifeq ($(ARCH),x86_64) ARCH := x86 endif -HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include + +# Additional ARCH settings for loongarch +ifeq ($(ARCH),loongarch64) +ARCH := loongarch +endif + +HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/$(ARCH)/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED HOSTLDLIBS_sorttable = -lpthread endif diff --git a/tools/arch/loongarch/include/uapi/asm/perf_regs.h b/tools/arch/loongarch/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..29d69c00fc7a --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_LOONGARCH_PERF_REGS_H +#define _ASM_LOONGARCH_PERF_REGS_H + +enum perf_event_loongarch_regs { + PERF_REG_LOONGARCH_PC, + PERF_REG_LOONGARCH_R1, + PERF_REG_LOONGARCH_R2, + PERF_REG_LOONGARCH_R3, + PERF_REG_LOONGARCH_R4, + PERF_REG_LOONGARCH_R5, + PERF_REG_LOONGARCH_R6, + PERF_REG_LOONGARCH_R7, + PERF_REG_LOONGARCH_R8, + PERF_REG_LOONGARCH_R9, + PERF_REG_LOONGARCH_R10, + PERF_REG_LOONGARCH_R11, + PERF_REG_LOONGARCH_R12, + PERF_REG_LOONGARCH_R13, + PERF_REG_LOONGARCH_R14, + PERF_REG_LOONGARCH_R15, + PERF_REG_LOONGARCH_R16, + PERF_REG_LOONGARCH_R17, + PERF_REG_LOONGARCH_R18, + PERF_REG_LOONGARCH_R19, + PERF_REG_LOONGARCH_R20, + PERF_REG_LOONGARCH_R21, + PERF_REG_LOONGARCH_R22, + PERF_REG_LOONGARCH_R23, + PERF_REG_LOONGARCH_R24, + PERF_REG_LOONGARCH_R25, + PERF_REG_LOONGARCH_R26, + PERF_REG_LOONGARCH_R27, + PERF_REG_LOONGARCH_R28, + PERF_REG_LOONGARCH_R29, + PERF_REG_LOONGARCH_R30, + PERF_REG_LOONGARCH_R31, + PERF_REG_LOONGARCH_MAX, +}; +#endif /* _ASM_LOONGARCH_PERF_REGS_H */ diff --git a/tools/arch/loongarch/include/uapi/asm/unistd.h b/tools/arch/loongarch/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..d3666a55f7a6 --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/unistd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 + +#include diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 89905b4e9309..2772085f059f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -32,7 +32,7 @@ ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 0 endif else - ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 loongarch)) NO_SYSCALL_TABLE := 0 endif endif @@ -74,6 +74,12 @@ ifeq ($(SRCARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(SRCARCH),loongarch) + NO_PERF_REGS := 0 + CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated + LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64 +endif + ifeq ($(SRCARCH),riscv) NO_PERF_REGS := 0 endif @@ -95,7 +101,7 @@ endif # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures # to the check. -ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv)) +ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv loongarch)) NO_LIBDW_DWARF_UNWIND := 1 endif @@ -117,7 +123,7 @@ endef ifdef LIBUNWIND_DIR LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib - LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 + LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 loongarch $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch))) endif diff --git a/tools/perf/arch/loongarch/Build b/tools/perf/arch/loongarch/Build new file mode 100644 index 000000000000..e4e5f33c84d8 --- /dev/null +++ b/tools/perf/arch/loongarch/Build @@ -0,0 +1 @@ +perf-y += util/ diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile new file mode 100644 index 000000000000..c392e7af4743 --- /dev/null +++ b/tools/perf/arch/loongarch/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif +PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/loongarch/include/generated/asm +header := $(out)/syscalls.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/loongarch/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, loongarch) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c new file mode 100644 index 000000000000..206e1fca38a4 --- /dev/null +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Perf annotate functions. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +static +struct ins_ops *loongarch__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "beqz", 4) || + !strncmp(name, "bnez", 4) || + !strncmp(name, "beq", 3) || + !strncmp(name, "bne", 3) || + !strncmp(name, "blt", 3) || + !strncmp(name, "bge", 3) || + !strncmp(name, "bltu", 4) || + !strncmp(name, "bgeu", 4) || + !strncmp(name, "bl", 2)) + ops = &call_ops; + else if (!strncmp(name, "jirl", 4)) + ops = &ret_ops; + else if (name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = loongarch__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..86dad5a018b7 --- /dev/null +++ b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl @@ -0,0 +1,60 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright (C) 2020 Loongson Technology Co., Ltd. +# Author(s): Ming Wang + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table_from_c() +{ + local sc nr last_sc + + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` + + { + + cat <<-_EoHEADER + #include + #include "$input" + int main(int argc, char *argv[]) + { + _EoHEADER + + while read sc nr; do + printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", $nr);" + last_sc=$nr + done + + printf "%s\n" " printf(\"#define SYSCALLTBL_LOONGARCH_MAX_ID %d\\n\", $last_sc);" + printf "}\n" + + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - + + $create_table_exe + + rm -f $create_table_exe +} + +create_table() +{ + echo "static const char *syscalltbl_loongarch[] = {" + create_table_from_c + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -n \ + |create_table diff --git a/tools/perf/arch/loongarch/include/dwarf-regs-table.h b/tools/perf/arch/loongarch/include/dwarf-regs-table.h new file mode 100644 index 000000000000..676c54a226a5 --- /dev/null +++ b/tools/perf/arch/loongarch/include/dwarf-regs-table.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into + * register names. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifdef DEFINE_DWARF_REGSTR_TABLE +static const char * const loongarch_regstr_tbl[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", + "$30", "$31", +}; +#endif diff --git a/tools/perf/arch/loongarch/include/perf_regs.h b/tools/perf/arch/loongarch/include/perf_regs.h new file mode 100644 index 000000000000..82d531dcd90f --- /dev/null +++ b/tools/perf/arch/loongarch/include/perf_regs.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include + +#define PERF_REGS_MAX PERF_REG_LOONGARCH_MAX +#define PERF_REG_IP PERF_REG_LOONGARCH_PC +#define PERF_REG_SP PERF_REG_LOONGARCH_R3 + +#define PERF_REGS_MASK ((1ULL << PERF_REG_LOONGARCH_MAX) - 1) + +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_LOONGARCH_PC: + return "PC"; + case PERF_REG_LOONGARCH_R1: + return "$1"; + case PERF_REG_LOONGARCH_R2: + return "$2"; + case PERF_REG_LOONGARCH_R3: + return "$3"; + case PERF_REG_LOONGARCH_R4: + return "$4"; + case PERF_REG_LOONGARCH_R5: + return "$5"; + case PERF_REG_LOONGARCH_R6: + return "$6"; + case PERF_REG_LOONGARCH_R7: + return "$7"; + case PERF_REG_LOONGARCH_R8: + return "$8"; + case PERF_REG_LOONGARCH_R9: + return "$9"; + case PERF_REG_LOONGARCH_R10: + return "$10"; + case PERF_REG_LOONGARCH_R11: + return "$11"; + case PERF_REG_LOONGARCH_R12: + return "$12"; + case PERF_REG_LOONGARCH_R13: + return "$13"; + case PERF_REG_LOONGARCH_R14: + return "$14"; + case PERF_REG_LOONGARCH_R15: + return "$15"; + case PERF_REG_LOONGARCH_R16: + return "$16"; + case PERF_REG_LOONGARCH_R17: + return "$17"; + case PERF_REG_LOONGARCH_R18: + return "$18"; + case PERF_REG_LOONGARCH_R19: + return "$19"; + case PERF_REG_LOONGARCH_R20: + return "$20"; + case PERF_REG_LOONGARCH_R21: + return "$21"; + case PERF_REG_LOONGARCH_R22: + return "$22"; + case PERF_REG_LOONGARCH_R23: + return "$23"; + case PERF_REG_LOONGARCH_R24: + return "$24"; + case PERF_REG_LOONGARCH_R25: + return "$25"; + case PERF_REG_LOONGARCH_R26: + return "$26"; + case PERF_REG_LOONGARCH_R27: + return "$27"; + case PERF_REG_LOONGARCH_R28: + return "$28"; + case PERF_REG_LOONGARCH_R29: + return "$29"; + case PERF_REG_LOONGARCH_R30: + return "$30"; + case PERF_REG_LOONGARCH_R31: + return "$31"; + default: + break; + } + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build new file mode 100644 index 000000000000..d776125a2d06 --- /dev/null +++ b/tools/perf/arch/loongarch/util/Build @@ -0,0 +1,5 @@ +perf-y += perf_regs.o + +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o +perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/loongarch/util/dwarf-regs.c b/tools/perf/arch/loongarch/util/dwarf-regs.c new file mode 100644 index 000000000000..0509cfd597d6 --- /dev/null +++ b/tools/perf/arch/loongarch/util/dwarf-regs.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include /* for EINVAL */ +#include /* for strcmp */ +#include + +struct pt_regs_dwarfnum { + const char *name; + unsigned int dwarfnum; +}; + +static struct pt_regs_dwarfnum loongarch_gpr_table[] = { + {"$0", 0}, {"$1", 1}, {"$2", 2}, {"$3", 3}, + {"$4", 4}, {"$5", 5}, {"$6", 6}, {"$7", 7}, + {"$8", 8}, {"$9", 9}, {"$10", 10}, {"$11", 11}, + {"$12", 12}, {"$13", 13}, {"$14", 14}, {"$15", 15}, + {"$16", 16}, {"$17", 17}, {"$18", 18}, {"$19", 19}, + {"$20", 20}, {"$21", 21}, {"$22", 22}, {"$23", 23}, + {"$24", 24}, {"$25", 25}, {"$26", 26}, {"$27", 27}, + {"$28", 28}, {"$29", 29}, {"$30", 30}, {"$31", 31}, + {NULL, 0} +}; + +const char *get_arch_regstr(unsigned int n) +{ + n %= 32; + return loongarch_gpr_table[n].name; +} + +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_dwarfnum *roff; + + for (roff = loongarch_gpr_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->dwarfnum; + return -EINVAL; +} diff --git a/tools/perf/arch/loongarch/util/perf_regs.c b/tools/perf/arch/loongarch/util/perf_regs.c new file mode 100644 index 000000000000..2833e101a7c6 --- /dev/null +++ b/tools/perf/arch/loongarch/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/loongarch/util/unwind-libdw.c b/tools/perf/arch/loongarch/util/unwind-libdw.c new file mode 100644 index 000000000000..a9415385230a --- /dev/null +++ b/tools/perf/arch/loongarch/util/unwind-libdw.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2020-2023 Loongson Technology Corporation Limited */ + +#include +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" +#include "../../util/sample.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[PERF_REG_LOONGARCH_MAX]; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_LOONGARCH_##r); \ + val; \ +}) + + dwarf_regs[0] = 0; + dwarf_regs[1] = REG(R1); + dwarf_regs[2] = REG(R2); + dwarf_regs[3] = REG(R3); + dwarf_regs[4] = REG(R4); + dwarf_regs[5] = REG(R5); + dwarf_regs[6] = REG(R6); + dwarf_regs[7] = REG(R7); + dwarf_regs[8] = REG(R8); + dwarf_regs[9] = REG(R9); + dwarf_regs[10] = REG(R10); + dwarf_regs[11] = REG(R11); + dwarf_regs[12] = REG(R12); + dwarf_regs[13] = REG(R13); + dwarf_regs[14] = REG(R14); + dwarf_regs[15] = REG(R15); + dwarf_regs[16] = REG(R16); + dwarf_regs[17] = REG(R17); + dwarf_regs[18] = REG(R18); + dwarf_regs[19] = REG(R19); + dwarf_regs[20] = REG(R20); + dwarf_regs[21] = REG(R21); + dwarf_regs[22] = REG(R22); + dwarf_regs[23] = REG(R23); + dwarf_regs[24] = REG(R24); + dwarf_regs[25] = REG(R25); + dwarf_regs[26] = REG(R26); + dwarf_regs[27] = REG(R27); + dwarf_regs[28] = REG(R28); + dwarf_regs[29] = REG(R29); + dwarf_regs[30] = REG(R30); + dwarf_regs[31] = REG(R31); + dwfl_thread_state_register_pc(thread, REG(PC)); + + return dwfl_thread_state_registers(thread, 0, PERF_REG_LOONGARCH_MAX, dwarf_regs); +} diff --git a/tools/perf/arch/loongarch/util/unwind-libunwind.c b/tools/perf/arch/loongarch/util/unwind-libunwind.c new file mode 100644 index 000000000000..f693167b86ef --- /dev/null +++ b/tools/perf/arch/loongarch/util/unwind-libunwind.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" +#include "util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_LOONGARCH64_R1: + return PERF_REG_LOONGARCH_R1; + case UNW_LOONGARCH64_R2: + return PERF_REG_LOONGARCH_R2; + case UNW_LOONGARCH64_R3: + return PERF_REG_LOONGARCH_R3; + case UNW_LOONGARCH64_R4: + return PERF_REG_LOONGARCH_R4; + case UNW_LOONGARCH64_R5: + return PERF_REG_LOONGARCH_R5; + case UNW_LOONGARCH64_R6: + return PERF_REG_LOONGARCH_R6; + case UNW_LOONGARCH64_R7: + return PERF_REG_LOONGARCH_R7; + case UNW_LOONGARCH64_R8: + return PERF_REG_LOONGARCH_R8; + case UNW_LOONGARCH64_R9: + return PERF_REG_LOONGARCH_R9; + case UNW_LOONGARCH64_R10: + return PERF_REG_LOONGARCH_R10; + case UNW_LOONGARCH64_R11: + return PERF_REG_LOONGARCH_R11; + case UNW_LOONGARCH64_R12: + return PERF_REG_LOONGARCH_R12; + case UNW_LOONGARCH64_R13: + return PERF_REG_LOONGARCH_R13; + case UNW_LOONGARCH64_R14: + return PERF_REG_LOONGARCH_R14; + case UNW_LOONGARCH64_R15: + return PERF_REG_LOONGARCH_R15; + case UNW_LOONGARCH64_R16: + return PERF_REG_LOONGARCH_R16; + case UNW_LOONGARCH64_R17: + return PERF_REG_LOONGARCH_R17; + case UNW_LOONGARCH64_R18: + return PERF_REG_LOONGARCH_R18; + case UNW_LOONGARCH64_R19: + return PERF_REG_LOONGARCH_R19; + case UNW_LOONGARCH64_R20: + return PERF_REG_LOONGARCH_R20; + case UNW_LOONGARCH64_R21: + return PERF_REG_LOONGARCH_R21; + case UNW_LOONGARCH64_R22: + return PERF_REG_LOONGARCH_R22; + case UNW_LOONGARCH64_R23: + return PERF_REG_LOONGARCH_R23; + case UNW_LOONGARCH64_R24: + return PERF_REG_LOONGARCH_R24; + case UNW_LOONGARCH64_R25: + return PERF_REG_LOONGARCH_R25; + case UNW_LOONGARCH64_R26: + return PERF_REG_LOONGARCH_R26; + case UNW_LOONGARCH64_R27: + return PERF_REG_LOONGARCH_R27; + case UNW_LOONGARCH64_R28: + return PERF_REG_LOONGARCH_R28; + case UNW_LOONGARCH64_R29: + return PERF_REG_LOONGARCH_R29; + case UNW_LOONGARCH64_R30: + return PERF_REG_LOONGARCH_R30; + case UNW_LOONGARCH64_R31: + return PERF_REG_LOONGARCH_R31; + case UNW_LOONGARCH64_PC: + return PERF_REG_LOONGARCH_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return -EINVAL; +} diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 9f085aa09c97..95d3d99e84ac 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -39,6 +39,7 @@ arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk arch/arm/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h +arch/loongarch/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h arch/s390/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/perf_regs.h diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1f56a29802e4..93b56bc42c73 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -156,6 +156,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" +#include "arch/loongarch/annotate/instructions.c" static struct arch architectures[] = { { @@ -202,6 +203,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "loongarch", + .init = loongarch__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 1b49ecee5aff..43a77aeeb310 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -14,6 +14,10 @@ #define EM_AARCH64 183 /* ARM 64 bit */ #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -24,6 +28,7 @@ #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" +#include "../arch/loongarch/include/dwarf-regs-table.h" #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) @@ -53,6 +58,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(sparc_regstr_tbl, n); case EM_XTENSA: return __get_dwarf_regstr(xtensa_regstr_tbl, n); + case EM_LOONGARCH: + return __get_dwarf_regstr(loongarch_regstr_tbl, n); default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 342a83025a86..f4fcd1b72ff2 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -345,6 +345,8 @@ static const char *normalize_arch(char *arch) return "mips"; if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) return "sh"; + if (!strncmp(arch, "loongarch", 9)) + return "loongarch"; return arch; } diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index ac638945b4cb..43dac6998d2b 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -38,6 +38,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__s390x__) #define GEN_ELF_ARCH EM_S390 #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__loongarch__) +#define GEN_ELF_ARCH EM_LOONGARCH +#define GEN_ELF_CLASS ELFCLASS64 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 03bd99d3be16..e441f12c7272 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32; #include const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; static const char **syscalltbl_native = syscalltbl_arm64; +#elif defined(__loongarch__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; +static const char **syscalltbl_native = syscalltbl_loongarch; #endif struct syscall { diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index b10b7a27c33f..0b2af44c167f 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -4,7 +4,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ ) + -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ + -e s/loongarch.*/loongarch/) ifndef ARCH ARCH := $(HOSTARCH) @@ -33,6 +34,15 @@ ifeq ($(ARCH),sh64) SRCARCH := sh endif +# Additional ARCH settings for loongarch +ifeq ($(ARCH),loongarch32) + SRCARCH := loongarch +endif + +ifeq ($(ARCH),loongarch64) + SRCARCH := loongarch +endif + LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) IS_64_BIT := 1 -- Gitee From e09e0e06544847b41fa723b3f19a821dafc8d8d1 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 3 Jun 2021 18:25:38 +0800 Subject: [PATCH 197/241] LoongArch: Add alternative runtime patching support Change-Id: Id2ba6e44040213054d1da1f9e641227b2886d118 Signed-off-by: Jun Yi Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 1 + arch/loongarch/include/asm/alternative-asm.h | 82 ++++++ arch/loongarch/include/asm/alternative.h | 111 ++++++++ arch/loongarch/include/asm/inst.h | 56 ++++ arch/loongarch/kernel/Makefile | 2 +- arch/loongarch/kernel/alternative.c | 265 +++++++++++++++++++ arch/loongarch/kernel/module.c | 15 ++ arch/loongarch/kernel/setup.c | 7 + arch/loongarch/kernel/vmlinux.lds.S | 12 + arch/loongarch/lib/clear_user.S | 70 ++++- arch/loongarch/lib/copy_user.S | 91 ++++++- arch/loongarch/lib/memcpy.S | 71 ++++- arch/loongarch/lib/memmove.S | 98 ++++++- arch/loongarch/lib/memset.S | 71 ++++- 14 files changed, 921 insertions(+), 31 deletions(-) create mode 100644 arch/loongarch/include/asm/alternative-asm.h create mode 100644 arch/loongarch/include/asm/alternative.h create mode 100644 arch/loongarch/kernel/alternative.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d2ca36ad907f..fc3c14e947cf 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -7,6 +7,7 @@ config LOONGARCH select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_SPECIAL if !32BIT diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h new file mode 100644 index 000000000000..ff3d10ac393f --- /dev/null +++ b/arch/loongarch/include/asm/alternative-asm.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_ASM_H +#define _ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +#include + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro altinstruction_entry orig alt feature orig_len alt_len + .long \orig - . + .long \alt - . + .short \feature + .byte \orig_len + .byte \alt_len +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".fill" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature +140 : + \oldinstr +141 : + .fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f + .popsection + + .subsection 1 +143 : + \newinstr +144 : + .previous +.endm + +#define old_len (141b-140b) +#define new_len1 (144f-143f) +#define new_len2 (145f-144f) + +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +/* + * Same as ALTERNATIVE macro above but for two alternatives. If CPU + * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has + * @feature2, it replaces @oldinstr with @feature2. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140 : + \oldinstr +141 : + .fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x03400000 +142 : + + .pushsection .altinstructions, "a" + altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b + altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b + .popsection + + .subsection 1 +143 : + \newinstr1 +144 : + \newinstr2 +145 : + .previous +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h new file mode 100644 index 000000000000..cee7b29785ab --- /dev/null +++ b/arch/loongarch/include/asm/alternative.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ALTERNATIVE_H +#define _ASM_ALTERNATIVE_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +struct alt_instr { + s32 instr_offset; /* offset to original instruction */ + s32 replace_offset; /* offset to replacement instruction */ + u16 feature; /* feature bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +extern void alternative_instructions(void); +extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num + +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" + +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x03400000\n" + +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, " \ + "4, 0x03400000\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ + " .long 661b - .\n" /* label */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ + " .short " __stringify(feature) "\n" /* feature bit */ \ + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ + +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + OLDINSTR(oldinstr, 1) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature, 1) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ + ".previous\n" + +#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ + OLDINSTR_2(oldinstr, 1, 2) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ + ".popsection\n" \ + ".subsection 1\n" \ + ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ + ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ + ".previous\n" + +/* + * Alternative instructions for different CPU types or capabilities. + * + * This allows to use optimized instructions even on generic binary + * kernels. + * + * length of oldinstr must be longer or equal the length of newinstr + * It can be padded with nops as needed. + * + * For non barrier like inlines please define new variants + * without volatile and memory clobber. + */ +#define alternative(oldinstr, newinstr, feature) \ + (asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")) + +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + (asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ALTERNATIVE_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 05ebb222e0e7..2e4fb22fd3c8 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -78,6 +78,56 @@ typedef unsigned int loongarch_instruction; /* Recode table from 16-bit register notation to 32-bit GPR. Do NOT export!!! */ extern const int reg16to32[]; +#define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction) + +enum loongarch_gpr { + LOONGARCH_GPR_ZERO = 0, + LOONGARCH_GPR_RA = 1, + LOONGARCH_GPR_TP = 2, + LOONGARCH_GPR_SP = 3, + LOONGARCH_GPR_A0 = 4, /* Reused as V0 for return value */ + LOONGARCH_GPR_A1, /* Reused as V1 for return value */ + LOONGARCH_GPR_A2, + LOONGARCH_GPR_A3, + LOONGARCH_GPR_A4, + LOONGARCH_GPR_A5, + LOONGARCH_GPR_A6, + LOONGARCH_GPR_A7, + LOONGARCH_GPR_T0 = 12, + LOONGARCH_GPR_T1, + LOONGARCH_GPR_T2, + LOONGARCH_GPR_T3, + LOONGARCH_GPR_T4, + LOONGARCH_GPR_T5, + LOONGARCH_GPR_T6, + LOONGARCH_GPR_T7, + LOONGARCH_GPR_T8, + LOONGARCH_GPR_FP = 22, + LOONGARCH_GPR_S0 = 23, + LOONGARCH_GPR_S1, + LOONGARCH_GPR_S2, + LOONGARCH_GPR_S3, + LOONGARCH_GPR_S4, + LOONGARCH_GPR_S5, + LOONGARCH_GPR_S6, + LOONGARCH_GPR_S7, + LOONGARCH_GPR_S8, + LOONGARCH_GPR_MAX +}; + +#define INSN_NOP 0x03400000 +#define INSN_BREAK 0x002a0000 + +#define ADDR_IMMMASK_ADDU16ID 0x00000000FFFF0000 +#define ADDR_IMMMASK_LU32ID 0x000FFFFF00000000 +#define ADDR_IMMMASK_LU52ID 0xFFF0000000000000 + +#define ADDR_IMMSHIFT_ADDU16ID 16 +#define ADDR_IMMSHIFT_LU32ID 32 +#define ADDR_IMMSHIFT_LU52ID 52 + +#define ADDR_IMM(addr, INSN) ((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN) + void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc); unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign); unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n); @@ -94,4 +144,10 @@ static inline bool is_pc_insn(union loongarch_instruction insn) insn.reg1i20_format.opcode <= pcaddu18i_op; } +u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); +u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); + +u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, + unsigned long pc, unsigned long dest); + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 18d04b7fe2a9..0f802db755c4 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -8,7 +8,7 @@ extra-y := head.o vmlinux.lds obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ process.o ptrace.o reset.o setup.o signal.o io.o \ syscall.o time.o topology.o traps.o switch.o \ - cacheinfo.o cmpxchg.o vdso.o + cacheinfo.o cmpxchg.o vdso.o alternative.o inst.o obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_EFI) += efi.o diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c new file mode 100644 index 000000000000..c9b0c877f870 --- /dev/null +++ b/arch/loongarch/kernel/alternative.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include + +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + +#define MAX_PATCH_SIZE (((u8)(-1)) / LOONGARCH_INSN_SIZE) + +static int __initdata_or_module debug_alternative; + +static int __init debug_alt(char *str) +{ + debug_alternative = 1; + return 1; +} +__setup("debug-alternative", debug_alt); + +#define DPRINTK(fmt, args...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ +} while (0) + +#define DUMP_WORDS(buf, count, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int _j; \ + union loongarch_instruction *_buf = buf; \ + \ + if (!(count)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (_j = 0; _j < count - 1; _j++) \ + printk(KERN_CONT "<%08x> ", _buf[_j].word); \ + printk(KERN_CONT "<%08x>\n", _buf[_j].word); \ + } \ +} while (0) + +#define __SIGNEX(X, SIDX) ((X) >= (1 << SIDX) ? ~((1 << SIDX) - 1) | (X) : (X)) +#define SIGNEX16(X) __SIGNEX(((unsigned long)(X)), 15) +#define SIGNEX20(X) __SIGNEX(((unsigned long)(X)), 19) +#define SIGNEX21(X) __SIGNEX(((unsigned long)(X)), 20) +#define SIGNEX26(X) __SIGNEX(((unsigned long)(X)), 25) + +static inline unsigned long bs_dest_16(unsigned long now, unsigned int si) +{ + return now + (SIGNEX16(si) << 2); +} + +static inline unsigned long bs_dest_21(unsigned long now, unsigned int h, unsigned int l) +{ + return now + (SIGNEX21(h << 16 | l) << 2); +} + +static inline unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l) +{ + return now + (SIGNEX26(h << 16 | l) << 2); +} + +/* Use this to add nops to a buffer, then text_poke the whole buffer. */ +static void __init_or_module add_nops(union loongarch_instruction *insn, int count) +{ + while (count--) { + insn->word = INSN_NOP; + insn++; + } +} + +/* Is the jump addr in local .altinstructions */ +static inline bool in_alt_jump(unsigned long jump, void *start, void *end) +{ + return jump >= (unsigned long)start && jump < (unsigned long)end; +} + +static void __init_or_module recompute_jump(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, + void *start, void *end) +{ + unsigned int si, si_l, si_h; + unsigned long cur_pc, jump_addr, pc; + long offset; + + cur_pc = (unsigned long)src; + pc = (unsigned long)dest; + + si_l = src->reg0i26_format.simmediate_l; + si_h = src->reg0i26_format.simmediate_h; + switch (src->reg0i26_format.opcode) { + case b_op: + case bl_op: + jump_addr = bs_dest_26(cur_pc, si_h, si_l); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + offset >>= 2; + buf->reg0i26_format.simmediate_h = offset >> 16; + buf->reg0i26_format.simmediate_l = offset; + return; + } + + si_l = src->reg1i21_format.simmediate_l; + si_h = src->reg1i21_format.simmediate_h; + switch (src->reg1i21_format.opcode) { + case beqz_op: + case bnez_op: + case bceqz_op: + jump_addr = bs_dest_21(cur_pc, si_h, si_l); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_4M || offset >= SZ_4M); + offset >>= 2; + buf->reg1i21_format.simmediate_h = offset >> 16; + buf->reg1i21_format.simmediate_l = offset; + return; + } + + si = src->reg2i16_format.simmediate; + switch (src->reg2i16_format.opcode) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + jump_addr = bs_dest_16(cur_pc, si); + if (in_alt_jump(jump_addr, start, end)) + return; + offset = jump_addr - pc; + BUG_ON(offset < -SZ_128K || offset >= SZ_128K); + offset >>= 2; + buf->reg2i16_format.simmediate = offset; + return; + } +} + +static int __init_or_module copy_alt_insns(union loongarch_instruction *buf, + union loongarch_instruction *dest, union loongarch_instruction *src, int nr) +{ + int i; + + for (i = 0; i < nr; i++) { + buf[i].word = src[i].word; + + if (is_pc_insn(src[i])) { + pr_err("Not support pcrel instruction at present!"); + return -EINVAL; + } + + if (is_branch_insn(src[i]) && + src[i].reg2i16_format.opcode != jirl_op) { + recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr); + } + } + + return 0; +} + +/* + * text_poke_early - Update instructions on a live kernel at boot time + * + * When you use this code to patch more than one byte of an instruction + * you need to make sure that other CPUs cannot execute this code in parallel. + * Also no thread must be currently preempted in the middle of these + * instructions. And on the local CPU you need to be protected again NMI or MCE + * handlers seeing an inconsistent instruction while you patch. + */ +static void *__init_or_module text_poke_early(union loongarch_instruction *insn, + union loongarch_instruction *buf, unsigned int nr) +{ + int i; + unsigned long flags; + + local_irq_save(flags); + + for (i = 0; i < nr; i++) + insn[i].word = buf[i].word; + + local_irq_restore(flags); + + wbflush(); + flush_icache_range((unsigned long)insn, (unsigned long)(insn + nr)); + + return insn; +} + +/* + * Replace instructions with better alternatives for this CPU type. This runs + * before SMP is initialized to avoid SMP problems with self modifying code. + * This implies that asymmetric systems where APs have less capabilities than + * the boot processor are not handled. Tough. Make sure you disable such + * features by hand. + */ +void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + unsigned int nr_instr, nr_repl, nr_insnbuf; + union loongarch_instruction *instr, *replacement; + union loongarch_instruction insnbuf[MAX_PATCH_SIZE]; + + DPRINTK("alt table %px, -> %px", start, end); + /* + * The scan order should be from start to end. A later scanned + * alternative code can overwrite previously scanned alternative code. + * Some kernel functions (e.g. memcpy, memset, etc) use this order to + * patch code. + * + * So be careful if you want to change the scan order to any other + * order. + */ + for (a = start; a < end; a++) { + nr_insnbuf = 0; + + instr = (void *)&a->instr_offset + a->instr_offset; + replacement = (void *)&a->replace_offset + a->replace_offset; + + BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->instrlen & 0x3); + BUG_ON(a->replacementlen & 0x3); + + nr_instr = a->instrlen / LOONGARCH_INSN_SIZE; + nr_repl = a->replacementlen / LOONGARCH_INSN_SIZE; + + if (!cpu_has(a->feature)) { + DPRINTK("feat not exist: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + continue; + } + + DPRINTK("feat: %d, old: (%px len: %d), repl: (%px, len: %d)", + a->feature, instr, a->instrlen, + replacement, a->replacementlen); + + DUMP_WORDS(instr, nr_instr, "%px: old_insn: ", instr); + DUMP_WORDS(replacement, nr_repl, "%px: rpl_insn: ", replacement); + + copy_alt_insns(insnbuf, instr, replacement, nr_repl); + nr_insnbuf = nr_repl; + + if (nr_instr > nr_repl) { + add_nops(insnbuf + nr_repl, nr_instr - nr_repl); + nr_insnbuf += nr_instr - nr_repl; + } + DUMP_WORDS(insnbuf, nr_insnbuf, "%px: final_insn: ", instr); + + text_poke_early(instr, insnbuf, nr_insnbuf); + } +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + alternatives_patched = 1; +} diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 492200917a7a..88dc90001a2e 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -17,6 +17,7 @@ #include #include #include +#include static inline bool signed_imm_check(long val, unsigned int bit) { @@ -501,3 +502,17 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (!strcmp(".altinstructions", secstrs + s->sh_name)) + apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + } + + return 0; +} diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 62c07a824c6d..48207a9a2a1d 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include +#include #include #include #include @@ -56,6 +58,11 @@ static struct resource bss_resource = { .name = "Kernel bss", }; unsigned long __kaslr_offset __ro_after_init; EXPORT_SYMBOL(__kaslr_offset); +void __init arch_cpu_finalize_init(void) +{ + alternative_instructions(); +} + static void *detect_magic __initdata = detect_memory_region; void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_addr_t sz_max) diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 2d98def8ef44..50662b99cb98 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -42,6 +42,18 @@ SECTIONS } :text = 0 _etext = .; /* End of text section */ + /* + * struct alt_inst entries. From the header (alternative.h): + * "Alternative instructions for different CPU types or capabilities" + * Think locking instructions on spinlocks. + */ + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .got : ALIGN(16) { *(.got) } .plt : ALIGN(16) { *(.plt) } .got.plt : ALIGN(16) { *(.got.plt) } diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 93deae5000ec..839fb63588a9 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,25 +3,37 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a1, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__clear_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __clear_user_generic", \ + "b __clear_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__clear_user) + +EXPORT_SYMBOL(__clear_user) + /* - * unsigned long __clear_user(void *addr, size_t size) + * unsigned long __clear_user_generic(void *addr, size_t size) * * a0: addr * a1: size */ -SYM_FUNC_START(__clear_user) +SYM_FUNC_START(__clear_user_generic) beqz a1, 2f 1: st.b zero, a0, 0 @@ -33,6 +45,54 @@ SYM_FUNC_START(__clear_user) jr ra _asm_extable 1b, .L_fixup_handle_0 -SYM_FUNC_END(__clear_user) +SYM_FUNC_END(__clear_user_generic) -EXPORT_SYMBOL(__clear_user) +/* + * unsigned long __clear_user_fast(void *addr, unsigned long size) + * + * a0: addr + * a1: size + */ +SYM_FUNC_START(__clear_user_fast) + beqz a1, 10f + + ori a2, zero, 64 + blt a1, a2, 9f + + /* set 64 bytes at a time */ +1: st.d zero, a0, 0 +2: st.d zero, a0, 8 +3: st.d zero, a0, 16 +4: st.d zero, a0, 24 +5: st.d zero, a0, 32 +6: st.d zero, a0, 40 +7: st.d zero, a0, 48 +8: st.d zero, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, -64 + bge a1, a2, 1b + + beqz a1, 10f + + /* set the remaining bytes */ +9: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 9b + + /* return */ +10: move a0, a1 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 +SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 9388ed184dce..26675d6f8fb5 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -3,26 +3,38 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include #include #include #include +#include #include #include -.irp to, 0 +.irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): addi.d a0, a2, (\to) * (-8) jr ra .endr +SYM_FUNC_START(__copy_user) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __copy_user_generic", \ + "b __copy_user_fast", CPU_FEATURE_UAL +SYM_FUNC_END(__copy_user) + +EXPORT_SYMBOL(__copy_user) + /* - * unsigned long __copy_user(void *to, const void *from, size_t n) + * unsigned long __copy_user_generic(void *to, const void *from, size_t n) * * a0: to * a1: from * a2: n */ -SYM_FUNC_START(__copy_user) +SYM_FUNC_START(__copy_user_generic) beqz a2, 3f 1: ld.b t0, a1, 0 @@ -37,6 +49,75 @@ SYM_FUNC_START(__copy_user) _asm_extable 1b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_0 -SYM_FUNC_END(__copy_user) +SYM_FUNC_END(__copy_user_generic) -EXPORT_SYMBOL(__copy_user) +/* + * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n) + * + * a0: to + * a1: from + * a2: n + */ +SYM_FUNC_START(__copy_user_fast) + beqz a2, 19f + + ori a3, zero, 64 + blt a2, a3, 17f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 +2: ld.d t1, a1, 8 +3: ld.d t2, a1, 16 +4: ld.d t3, a1, 24 +5: ld.d t4, a1, 32 +6: ld.d t5, a1, 40 +7: ld.d t6, a1, 48 +8: ld.d t7, a1, 56 +9: st.d t0, a0, 0 +10: st.d t1, a0, 8 +11: st.d t2, a0, 16 +12: st.d t3, a0, 24 +13: st.d t4, a0, 32 +14: st.d t5, a0, 40 +15: st.d t6, a0, 48 +16: st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a3, 1b + + beqz a2, 19f + + /* copy the remaining bytes */ +17: ld.b t0, a1, 0 +18: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 17b + + /* return */ +19: move a0, a2 + jr ra + + /* fixup and ex_table */ + _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 2b, .L_fixup_handle_1 + _asm_extable 3b, .L_fixup_handle_2 + _asm_extable 4b, .L_fixup_handle_3 + _asm_extable 5b, .L_fixup_handle_4 + _asm_extable 6b, .L_fixup_handle_5 + _asm_extable 7b, .L_fixup_handle_6 + _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 9b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_1 + _asm_extable 11b, .L_fixup_handle_2 + _asm_extable 12b, .L_fixup_handle_3 + _asm_extable 13b, .L_fixup_handle_4 + _asm_extable 14b, .L_fixup_handle_5 + _asm_extable 15b, .L_fixup_handle_6 + _asm_extable 16b, .L_fixup_handle_7 + _asm_extable 17b, .L_fixup_handle_0 + _asm_extable 18b, .L_fixup_handle_0 +SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index ddb3f710012b..81d71fcb069f 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -3,18 +3,31 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include +#include #include +#include #include #include +SYM_FUNC_START(memcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memcpy_generic", \ + "b __memcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memcpy) + +EXPORT_SYMBOL(memcpy) + /* - * void *memcpy(void *dst, const void *src, size_t n) + * void *__memcpy_generic(void *dst, const void *src, size_t n) * * a0: dst * a1: src * a2: n */ -SYM_FUNC_START(memcpy) +SYM_FUNC_START(__memcpy_generic) move a3, a0 beqz a2, 2f @@ -27,6 +40,56 @@ SYM_FUNC_START(memcpy) 2: move a0, a3 jr ra -SYM_FUNC_END(memcpy) +SYM_FUNC_END(__memcpy_generic) -EXPORT_SYMBOL(memcpy) +/* + * void *__memcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__memcpy_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* copy 64 bytes at a time */ +1: ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + ld.d t4, a1, 32 + ld.d t5, a1, 40 + ld.d t6, a1, 48 + ld.d t7, a1, 56 + st.d t0, a0, 0 + st.d t1, a0, 8 + st.d t2, a0, 16 + st.d t3, a0, 24 + st.d t4, a0, 32 + st.d t5, a0, 40 + st.d t6, a0, 48 + st.d t7, a0, 56 + + addi.d a0, a0, 64 + addi.d a1, a1, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, 0 + st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index bdc2e877aca9..d179b717c4b7 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -3,18 +3,51 @@ * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include +#include #include +#include #include #include +SYM_FUNC_START(memmove) + blt a0, a1, 1f /* dst < src, memcpy */ + blt a1, a0, 3f /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ + + /* if (src - dst) < 64, copy 1 byte at a time */ +1: ori a3, zero, 64 + sub.d t0, a1, a0 + blt t0, a3, 2f + b memcpy +2: b __memcpy_generic + + /* if (dst - src) < 64, copy 1 byte at a time */ +3: ori a3, zero, 64 + sub.d t0, a0, a1 + blt t0, a3, 4f + b rmemcpy +4: b __rmemcpy_generic +SYM_FUNC_END(memmove) + +EXPORT_SYMBOL(memmove) + +SYM_FUNC_START(rmemcpy) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __rmemcpy_generic", \ + "b __rmemcpy_fast", CPU_FEATURE_UAL +SYM_FUNC_END(rmemcpy) + /* - * void *rmemcpy(void *dst, const void *src, size_t n) + * void *__rmemcpy_generic(void *dst, const void *src, size_t n) * * a0: dst * a1: src * a2: n */ -SYM_FUNC_START(rmemcpy) +SYM_FUNC_START(__rmemcpy_generic) move a3, a0 beqz a2, 2f @@ -30,16 +63,59 @@ SYM_FUNC_START(rmemcpy) 2: move a0, a3 jr ra -SYM_FUNC_END(rmemcpy) +SYM_FUNC_END(__rmemcpy_generic) -SYM_FUNC_START(memmove) - blt a0, a1, 1f /* dst < src, memcpy */ - blt a1, a0, 2f /* src < dst, rmemcpy */ - jr ra /* dst == src, return */ +/* + * void *__rmemcpy_fast(void *dst, const void *src, size_t n) + * + * a0: dst + * a1: src + * a2: n + */ +SYM_FUNC_START(__rmemcpy_fast) + move a3, a0 + beqz a2, 3f -1: b memcpy + add.d a0, a0, a2 + add.d a1, a1, a2 -2: b rmemcpy -SYM_FUNC_END(memmove) + ori a4, zero, 64 + blt a2, a4, 2f -EXPORT_SYMBOL(memmove) + /* copy 64 bytes at a time */ +1: ld.d t0, a1, -8 + ld.d t1, a1, -16 + ld.d t2, a1, -24 + ld.d t3, a1, -32 + ld.d t4, a1, -40 + ld.d t5, a1, -48 + ld.d t6, a1, -56 + ld.d t7, a1, -64 + st.d t0, a0, -8 + st.d t1, a0, -16 + st.d t2, a0, -24 + st.d t3, a0, -32 + st.d t4, a0, -40 + st.d t5, a0, -48 + st.d t6, a0, -56 + st.d t7, a0, -64 + + addi.d a0, a0, -64 + addi.d a1, a1, -64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* copy the remaining bytes */ +2: ld.b t0, a1, -1 + st.b t0, a0, -1 + addi.d a0, a0, -1 + addi.d a1, a1, -1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 4efef40815d1..414b857f5d0f 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -1,20 +1,39 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2020-2021 Loongson Technology Co., Ltd. + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited */ +#include +#include #include +#include #include #include +.macro fill_to_64 r0 + bstrins.d \r0, \r0, 15, 8 + bstrins.d \r0, \r0, 31, 16 + bstrins.d \r0, \r0, 63, 32 +.endm + +SYM_FUNC_START(memset) + /* + * Some CPUs support hardware unaligned access + */ + ALTERNATIVE "b __memset_generic", \ + "b __memset_fast", CPU_FEATURE_UAL +SYM_FUNC_END(memset) + +EXPORT_SYMBOL(memset) + /* - * void *memset(void *s, int c, size_t n) + * void *__memset_generic(void *s, int c, size_t n) * * a0: s * a1: c * a2: n */ -SYM_FUNC_START(memset) +SYM_FUNC_START(__memset_generic) move a3, a0 beqz a2, 2f @@ -25,6 +44,48 @@ SYM_FUNC_START(memset) 2: move a0, a3 jr ra -SYM_FUNC_END(memset) +SYM_FUNC_END(__memset_generic) -EXPORT_SYMBOL(memset) +/* + * void *__memset_fast(void *s, int c, size_t n) + * + * a0: s + * a1: c + * a2: n + */ +SYM_FUNC_START(__memset_fast) + move a3, a0 + beqz a2, 3f + + ori a4, zero, 64 + blt a2, a4, 2f + + /* fill a1 to 64 bits */ + fill_to_64 a1 + + /* set 64 bytes at a time */ +1: st.d a1, a0, 0 + st.d a1, a0, 8 + st.d a1, a0, 16 + st.d a1, a0, 24 + st.d a1, a0, 32 + st.d a1, a0, 40 + st.d a1, a0, 48 + st.d a1, a0, 56 + + addi.d a0, a0, 64 + addi.d a2, a2, -64 + bge a2, a4, 1b + + beqz a2, 3f + + /* set the remaining bytes */ +2: st.b a1, a0, 0 + addi.d a0, a0, 1 + addi.d a2, a2, -1 + bgt a2, zero, 2b + + /* return */ +3: move a0, a3 + jr ra +SYM_FUNC_END(__memset_fast) -- Gitee From a94168ccee6291b7f952f1637e683c1d57d842c8 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 12 Apr 2023 23:23:02 +0800 Subject: [PATCH 198/241] LoongArch: Optimize memory ops (memset/memcpy/memmove) To optimize memset()/memcpy()/memmove() and so on, we use a jump table to dispatch cases for short data lengths; and for long data lengths, we split the destination into head part (first 8 bytes), tail part (last 8 bytes) and middle part. The head part and tail part may be at unaligned addresses, while the middle part is always aligned (the middle part is allowed to overlap the head/tail part). In this way, the first and last 8 bytes may be unaligned accesses, but we can make sure the data in the middle is processed at an aligned destination address. We have tested micro-bench[1] on a Loongson-3C5000 16-core machine (2.2GHz): 1. memset | length | src offset | dst offset | speed before | speed after | % | |--------|------------|------------|--------------|-------------|---------| | 8 | 0 | 0 | 696.191 | 1518.785 | 118.16% | | 8 | 0 | 1 | 696.325 | 1518.937 | 118.14% | | 50 | 0 | 0 | 969.976 | 8053.902 | 730.32% | | 50 | 0 | 1 | 970.034 | 8058.475 | 730.74% | | 300 | 0 | 0 | 5876.612 | 16544.703 | 181.53% | | 300 | 0 | 1 | 5030.849 | 16549.011 | 228.95% | | 1200 | 0 | 0 | 11797.077 | 16752.137 | 42.00% | | 1200 | 0 | 1 | 5687.141 | 16645.233 | 192.68% | | 4000 | 0 | 0 | 15723.27 | 16761.557 | 6.60% | | 4000 | 0 | 1 | 5906.114 | 16732.316 | 183.30% | | 8000 | 0 | 0 | 16751.403 | 16770.002 | 0.11% | | 8000 | 0 | 1 | 5995.449 | 16754.07 | 179.45% | 2. memcpy | length | src offset | dst offset | speed before | speed after | % | |--------|------------|------------|--------------|-------------|---------| | 8 | 0 | 0 | 696.2 | 1670.605 | 139.96% | | 8 | 0 | 1 | 696.325 | 1671.138 | 139.99% | | 50 | 0 | 0 | 969.974 | 8724.999 | 799.51% | | 50 | 0 | 1 | 970.032 | 8730.138 | 799.98% | | 300 | 0 | 0 | 5564.662 | 16272.652 | 192.43% | | 300 | 0 | 1 | 4670.436 | 14972.842 | 220.59% | | 1200 | 0 | 0 | 10740.23 | 16751.728 | 55.97% | | 1200 | 0 | 1 | 5027.741 | 14874.564 | 195.85% | | 4000 | 0 | 0 | 15122.367 | 16737.642 | 10.68% | | 4000 | 0 | 1 | 5536.918 | 14890.397 | 168.93% | | 8000 | 0 | 0 | 16505.453 | 16553.543 | 0.29% | | 8000 | 0 | 1 | 5821.619 | 14841.804 | 154.94% | 3. memmove | length | src offset | dst offset | speed before | speed after | % | |--------|------------|------------|--------------|-------------|---------| | 8 | 0 | 0 | 982.693 | 1670.568 | 70.00% | | 8 | 0 | 1 | 983.023 | 1671.174 | 70.00% | | 50 | 0 | 0 | 1230.87 | 8727.625 | 609.06% | | 50 | 0 | 1 | 1232.515 | 8730.138 | 608.32% | | 300 | 0 | 0 | 6490.375 | 16296.993 | 151.09% | | 300 | 0 | 1 | 4282.687 | 14972.842 | 249.61% | | 1200 | 0 | 0 | 11742.755 | 16752.546 | 42.66% | | 1200 | 0 | 1 | 5039.338 | 14872.951 | 195.14% | | 4000 | 0 | 0 | 15467.786 | 16737.09 | 8.21% | | 4000 | 0 | 1 | 5009.905 | 14890.542 | 197.22% | | 8000 | 0 | 0 | 16489.664 | 16553.273 | 0.39% | | 8000 | 0 | 1 | 5823.786 | 14858.646 | 155.14% | * speed: MB/s * length: byte [1] https://github.com/heiher/mem-bench Change-Id: I5e9a80e8c05aef32f9965d8706ae31355c6eea22 Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/lib/clear_user.S | 136 +++++++++++++++-- arch/loongarch/lib/copy_user.S | 251 +++++++++++++++++++++++++------- arch/loongarch/lib/memcpy.S | 146 +++++++++++++++---- arch/loongarch/lib/memmove.S | 120 ++++++++------- arch/loongarch/lib/memset.S | 116 ++++++++++++--- 5 files changed, 602 insertions(+), 167 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 839fb63588a9..d7ee1e6666b4 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -13,7 +13,14 @@ .irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): - addi.d a0, a1, (\to) * (-8) + sub.d a0, a2, a0 + addi.d a0, a0, (\to) * (-8) + jr ra +.endr + +.irp to, 0, 2, 4 +.L_fixup_handle_s\to\(): + addi.d a0, a1, -\to jr ra .endr @@ -44,7 +51,7 @@ SYM_FUNC_START(__clear_user_generic) 2: move a0, a1 jr ra - _asm_extable 1b, .L_fixup_handle_0 + _asm_extable 1b, .L_fixup_handle_s0 SYM_FUNC_END(__clear_user_generic) /* @@ -54,12 +61,21 @@ SYM_FUNC_END(__clear_user_generic) * a1: size */ SYM_FUNC_START(__clear_user_fast) - beqz a1, 10f + sltui t0, a1, 9 + bnez t0, .Lsmall - ori a2, zero, 64 - blt a1, a2, 9f + add.d a2, a0, a1 +0: st.d zero, a0, 0 + + /* align up address */ + addi.d a0, a0, 8 + bstrins.d a0, zero, 2, 0 + + addi.d a3, a2, -64 + bgeu a0, a3, .Llt64 /* set 64 bytes at a time */ +.Lloop64: 1: st.d zero, a0, 0 2: st.d zero, a0, 8 3: st.d zero, a0, 16 @@ -68,24 +84,95 @@ SYM_FUNC_START(__clear_user_fast) 6: st.d zero, a0, 40 7: st.d zero, a0, 48 8: st.d zero, a0, 56 - addi.d a0, a0, 64 - addi.d a1, a1, -64 - bge a1, a2, 1b - - beqz a1, 10f + bltu a0, a3, .Lloop64 /* set the remaining bytes */ -9: st.b zero, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, -1 - bgt a1, zero, 9b +.Llt64: + addi.d a3, a2, -32 + bgeu a0, a3, .Llt32 +9: st.d zero, a0, 0 +10: st.d zero, a0, 8 +11: st.d zero, a0, 16 +12: st.d zero, a0, 24 + addi.d a0, a0, 32 + +.Llt32: + addi.d a3, a2, -16 + bgeu a0, a3, .Llt16 +13: st.d zero, a0, 0 +14: st.d zero, a0, 8 + addi.d a0, a0, 16 + +.Llt16: + addi.d a3, a2, -8 + bgeu a0, a3, .Llt8 +15: st.d zero, a0, 0 + +.Llt8: +16: st.d zero, a2, -8 /* return */ -10: move a0, a1 + move a0, zero + jr ra + + .align 4 +.Lsmall: + pcaddi t0, 4 + slli.d a2, a1, 4 + add.d t0, t0, a2 + jr t0 + + .align 4 + move a0, zero + jr ra + + .align 4 +17: st.b zero, a0, 0 + move a0, zero + jr ra + + .align 4 +18: st.h zero, a0, 0 + move a0, zero + jr ra + + .align 4 +19: st.h zero, a0, 0 +20: st.b zero, a0, 2 + move a0, zero + jr ra + + .align 4 +21: st.w zero, a0, 0 + move a0, zero + jr ra + + .align 4 +22: st.w zero, a0, 0 +23: st.b zero, a0, 4 + move a0, zero + jr ra + + .align 4 +24: st.w zero, a0, 0 +25: st.h zero, a0, 4 + move a0, zero + jr ra + + .align 4 +26: st.w zero, a0, 0 +27: st.w zero, a0, 3 + move a0, zero + jr ra + + .align 4 +28: st.d zero, a0, 0 + move a0, zero jr ra /* fixup and ex_table */ + _asm_extable 0b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_0 _asm_extable 2b, .L_fixup_handle_1 _asm_extable 3b, .L_fixup_handle_2 @@ -95,4 +182,23 @@ SYM_FUNC_START(__clear_user_fast) _asm_extable 7b, .L_fixup_handle_6 _asm_extable 8b, .L_fixup_handle_7 _asm_extable 9b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_1 + _asm_extable 11b, .L_fixup_handle_2 + _asm_extable 12b, .L_fixup_handle_3 + _asm_extable 13b, .L_fixup_handle_0 + _asm_extable 14b, .L_fixup_handle_1 + _asm_extable 15b, .L_fixup_handle_0 + _asm_extable 16b, .L_fixup_handle_1 + _asm_extable 17b, .L_fixup_handle_s0 + _asm_extable 18b, .L_fixup_handle_s0 + _asm_extable 19b, .L_fixup_handle_s0 + _asm_extable 20b, .L_fixup_handle_s2 + _asm_extable 21b, .L_fixup_handle_s0 + _asm_extable 22b, .L_fixup_handle_s0 + _asm_extable 23b, .L_fixup_handle_s4 + _asm_extable 24b, .L_fixup_handle_s0 + _asm_extable 25b, .L_fixup_handle_s4 + _asm_extable 26b, .L_fixup_handle_s0 + _asm_extable 27b, .L_fixup_handle_s4 + _asm_extable 28b, .L_fixup_handle_s0 SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 26675d6f8fb5..106536e48419 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -13,7 +13,14 @@ .irp to, 0, 1, 2, 3, 4, 5, 6, 7 .L_fixup_handle_\to\(): - addi.d a0, a2, (\to) * (-8) + sub.d a0, a2, a0 + addi.d a0, a0, (\to) * (-8) + jr ra +.endr + +.irp to, 0, 2, 4 +.L_fixup_handle_s\to\(): + addi.d a0, a2, -\to jr ra .endr @@ -47,8 +54,8 @@ SYM_FUNC_START(__copy_user_generic) 3: move a0, a2 jr ra - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_0 + _asm_extable 1b, .L_fixup_handle_s0 + _asm_extable 2b, .L_fixup_handle_s0 SYM_FUNC_END(__copy_user_generic) /* @@ -59,65 +66,209 @@ SYM_FUNC_END(__copy_user_generic) * a2: n */ SYM_FUNC_START(__copy_user_fast) - beqz a2, 19f + sltui t0, a2, 9 + bnez t0, .Lsmall - ori a3, zero, 64 - blt a2, a3, 17f + add.d a3, a1, a2 + add.d a2, a0, a2 +0: ld.d t0, a1, 0 +1: st.d t0, a0, 0 - /* copy 64 bytes at a time */ -1: ld.d t0, a1, 0 -2: ld.d t1, a1, 8 -3: ld.d t2, a1, 16 -4: ld.d t3, a1, 24 -5: ld.d t4, a1, 32 -6: ld.d t5, a1, 40 -7: ld.d t6, a1, 48 -8: ld.d t7, a1, 56 -9: st.d t0, a0, 0 -10: st.d t1, a0, 8 -11: st.d t2, a0, 16 -12: st.d t3, a0, 24 -13: st.d t4, a0, 32 -14: st.d t5, a0, 40 -15: st.d t6, a0, 48 -16: st.d t7, a0, 56 + /* align up destination address */ + andi t1, a0, 7 + sub.d t0, zero, t1 + addi.d t0, t0, 8 + add.d a1, a1, t0 + add.d a0, a0, t0 - addi.d a0, a0, 64 - addi.d a1, a1, 64 - addi.d a2, a2, -64 - bge a2, a3, 1b + addi.d a4, a3, -64 + bgeu a1, a4, .Llt64 - beqz a2, 19f + /* copy 64 bytes at a time */ +.Lloop64: +2: ld.d t0, a1, 0 +3: ld.d t1, a1, 8 +4: ld.d t2, a1, 16 +5: ld.d t3, a1, 24 +6: ld.d t4, a1, 32 +7: ld.d t5, a1, 40 +8: ld.d t6, a1, 48 +9: ld.d t7, a1, 56 + addi.d a1, a1, 64 +10: st.d t0, a0, 0 +11: st.d t1, a0, 8 +12: st.d t2, a0, 16 +13: st.d t3, a0, 24 +14: st.d t4, a0, 32 +15: st.d t5, a0, 40 +16: st.d t6, a0, 48 +17: st.d t7, a0, 56 + addi.d a0, a0, 64 + bltu a1, a4, .Lloop64 /* copy the remaining bytes */ -17: ld.b t0, a1, 0 -18: st.b t0, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, 1 - addi.d a2, a2, -1 - bgt a2, zero, 17b +.Llt64: + addi.d a4, a3, -32 + bgeu a1, a4, .Llt32 +18: ld.d t0, a1, 0 +19: ld.d t1, a1, 8 +20: ld.d t2, a1, 16 +21: ld.d t3, a1, 24 + addi.d a1, a1, 32 +22: st.d t0, a0, 0 +23: st.d t1, a0, 8 +24: st.d t2, a0, 16 +25: st.d t3, a0, 24 + addi.d a0, a0, 32 + +.Llt32: + addi.d a4, a3, -16 + bgeu a1, a4, .Llt16 +26: ld.d t0, a1, 0 +27: ld.d t1, a1, 8 + addi.d a1, a1, 16 +28: st.d t0, a0, 0 +29: st.d t1, a0, 8 + addi.d a0, a0, 16 + +.Llt16: + addi.d a4, a3, -8 + bgeu a1, a4, .Llt8 +30: ld.d t0, a1, 0 +31: st.d t0, a0, 0 + +.Llt8: +32: ld.d t0, a3, -8 +33: st.d t0, a2, -8 /* return */ -19: move a0, a2 + move a0, zero + jr ra + + .align 5 +.Lsmall: + pcaddi t0, 8 + slli.d a3, a2, 5 + add.d t0, t0, a3 + jr t0 + + .align 5 + move a0, zero + jr ra + + .align 5 +34: ld.b t0, a1, 0 +35: st.b t0, a0, 0 + move a0, zero + jr ra + + .align 5 +36: ld.h t0, a1, 0 +37: st.h t0, a0, 0 + move a0, zero + jr ra + + .align 5 +38: ld.h t0, a1, 0 +39: ld.b t1, a1, 2 +40: st.h t0, a0, 0 +41: st.b t1, a0, 2 + move a0, zero + jr ra + + .align 5 +42: ld.w t0, a1, 0 +43: st.w t0, a0, 0 + move a0, zero + jr ra + + .align 5 +44: ld.w t0, a1, 0 +45: ld.b t1, a1, 4 +46: st.w t0, a0, 0 +47: st.b t1, a0, 4 + move a0, zero + jr ra + + .align 5 +48: ld.w t0, a1, 0 +49: ld.h t1, a1, 4 +50: st.w t0, a0, 0 +51: st.h t1, a0, 4 + move a0, zero + jr ra + + .align 5 +52: ld.w t0, a1, 0 +53: ld.w t1, a1, 3 +54: st.w t0, a0, 0 +55: st.w t1, a0, 3 + move a0, zero + jr ra + + .align 5 +56: ld.d t0, a1, 0 +57: st.d t0, a0, 0 + move a0, zero jr ra /* fixup and ex_table */ + _asm_extable 0b, .L_fixup_handle_0 _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_1 - _asm_extable 3b, .L_fixup_handle_2 - _asm_extable 4b, .L_fixup_handle_3 - _asm_extable 5b, .L_fixup_handle_4 - _asm_extable 6b, .L_fixup_handle_5 - _asm_extable 7b, .L_fixup_handle_6 - _asm_extable 8b, .L_fixup_handle_7 + _asm_extable 2b, .L_fixup_handle_0 + _asm_extable 3b, .L_fixup_handle_0 + _asm_extable 4b, .L_fixup_handle_0 + _asm_extable 5b, .L_fixup_handle_0 + _asm_extable 6b, .L_fixup_handle_0 + _asm_extable 7b, .L_fixup_handle_0 + _asm_extable 8b, .L_fixup_handle_0 _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_1 - _asm_extable 11b, .L_fixup_handle_2 - _asm_extable 12b, .L_fixup_handle_3 - _asm_extable 13b, .L_fixup_handle_4 - _asm_extable 14b, .L_fixup_handle_5 - _asm_extable 15b, .L_fixup_handle_6 - _asm_extable 16b, .L_fixup_handle_7 - _asm_extable 17b, .L_fixup_handle_0 + _asm_extable 10b, .L_fixup_handle_0 + _asm_extable 11b, .L_fixup_handle_1 + _asm_extable 12b, .L_fixup_handle_2 + _asm_extable 13b, .L_fixup_handle_3 + _asm_extable 14b, .L_fixup_handle_4 + _asm_extable 15b, .L_fixup_handle_5 + _asm_extable 16b, .L_fixup_handle_6 + _asm_extable 17b, .L_fixup_handle_7 _asm_extable 18b, .L_fixup_handle_0 + _asm_extable 19b, .L_fixup_handle_0 + _asm_extable 20b, .L_fixup_handle_0 + _asm_extable 21b, .L_fixup_handle_0 + _asm_extable 22b, .L_fixup_handle_0 + _asm_extable 23b, .L_fixup_handle_1 + _asm_extable 24b, .L_fixup_handle_2 + _asm_extable 25b, .L_fixup_handle_3 + _asm_extable 26b, .L_fixup_handle_0 + _asm_extable 27b, .L_fixup_handle_0 + _asm_extable 28b, .L_fixup_handle_0 + _asm_extable 29b, .L_fixup_handle_1 + _asm_extable 30b, .L_fixup_handle_0 + _asm_extable 31b, .L_fixup_handle_0 + _asm_extable 32b, .L_fixup_handle_0 + _asm_extable 33b, .L_fixup_handle_1 + _asm_extable 34b, .L_fixup_handle_s0 + _asm_extable 35b, .L_fixup_handle_s0 + _asm_extable 36b, .L_fixup_handle_s0 + _asm_extable 37b, .L_fixup_handle_s0 + _asm_extable 38b, .L_fixup_handle_s0 + _asm_extable 39b, .L_fixup_handle_s0 + _asm_extable 40b, .L_fixup_handle_s0 + _asm_extable 41b, .L_fixup_handle_s2 + _asm_extable 42b, .L_fixup_handle_s0 + _asm_extable 43b, .L_fixup_handle_s0 + _asm_extable 44b, .L_fixup_handle_s0 + _asm_extable 45b, .L_fixup_handle_s0 + _asm_extable 46b, .L_fixup_handle_s0 + _asm_extable 47b, .L_fixup_handle_s4 + _asm_extable 48b, .L_fixup_handle_s0 + _asm_extable 49b, .L_fixup_handle_s0 + _asm_extable 50b, .L_fixup_handle_s0 + _asm_extable 51b, .L_fixup_handle_s4 + _asm_extable 52b, .L_fixup_handle_s0 + _asm_extable 53b, .L_fixup_handle_s0 + _asm_extable 54b, .L_fixup_handle_s0 + _asm_extable 55b, .L_fixup_handle_s4 + _asm_extable 56b, .L_fixup_handle_s0 + _asm_extable 57b, .L_fixup_handle_s0 SYM_FUNC_END(__copy_user_fast) diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index 81d71fcb069f..9f9ea3014af0 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -42,6 +42,65 @@ SYM_FUNC_START(__memcpy_generic) jr ra SYM_FUNC_END(__memcpy_generic) + .align 5 +SYM_FUNC_START_NOALIGN(__memcpy_small) + pcaddi t0, 8 + slli.d a2, a2, 5 + add.d t0, t0, a2 + jr t0 + + .align 5 +0: jr ra + + .align 5 +1: ld.b t0, a1, 0 + st.b t0, a0, 0 + jr ra + + .align 5 +2: ld.h t0, a1, 0 + st.h t0, a0, 0 + jr ra + + .align 5 +3: ld.h t0, a1, 0 + ld.b t1, a1, 2 + st.h t0, a0, 0 + st.b t1, a0, 2 + jr ra + + .align 5 +4: ld.w t0, a1, 0 + st.w t0, a0, 0 + jr ra + + .align 5 +5: ld.w t0, a1, 0 + ld.b t1, a1, 4 + st.w t0, a0, 0 + st.b t1, a0, 4 + jr ra + + .align 5 +6: ld.w t0, a1, 0 + ld.h t1, a1, 4 + st.w t0, a0, 0 + st.h t1, a0, 4 + jr ra + + .align 5 +7: ld.w t0, a1, 0 + ld.w t1, a1, 3 + st.w t0, a0, 0 + st.w t1, a0, 3 + jr ra + + .align 5 +8: ld.d t0, a1, 0 + st.d t0, a0, 0 + jr ra +SYM_FUNC_END(__memcpy_small) + /* * void *__memcpy_fast(void *dst, const void *src, size_t n) * @@ -50,14 +109,27 @@ SYM_FUNC_END(__memcpy_generic) * a2: n */ SYM_FUNC_START(__memcpy_fast) - move a3, a0 - beqz a2, 3f + sltui t0, a2, 9 + bnez t0, __memcpy_small + + add.d a3, a1, a2 + add.d a2, a0, a2 + ld.d a6, a1, 0 + ld.d a7, a3, -8 + + /* align up destination address */ + andi t1, a0, 7 + sub.d t0, zero, t1 + addi.d t0, t0, 8 + add.d a1, a1, t0 + add.d a5, a0, t0 - ori a4, zero, 64 - blt a2, a4, 2f + addi.d a4, a3, -64 + bgeu a1, a4, .Llt64 /* copy 64 bytes at a time */ -1: ld.d t0, a1, 0 +.Lloop64: + ld.d t0, a1, 0 ld.d t1, a1, 8 ld.d t2, a1, 16 ld.d t3, a1, 24 @@ -65,31 +137,53 @@ SYM_FUNC_START(__memcpy_fast) ld.d t5, a1, 40 ld.d t6, a1, 48 ld.d t7, a1, 56 - st.d t0, a0, 0 - st.d t1, a0, 8 - st.d t2, a0, 16 - st.d t3, a0, 24 - st.d t4, a0, 32 - st.d t5, a0, 40 - st.d t6, a0, 48 - st.d t7, a0, 56 - - addi.d a0, a0, 64 addi.d a1, a1, 64 - addi.d a2, a2, -64 - bge a2, a4, 1b - - beqz a2, 3f + st.d t0, a5, 0 + st.d t1, a5, 8 + st.d t2, a5, 16 + st.d t3, a5, 24 + st.d t4, a5, 32 + st.d t5, a5, 40 + st.d t6, a5, 48 + st.d t7, a5, 56 + addi.d a5, a5, 64 + bltu a1, a4, .Lloop64 /* copy the remaining bytes */ -2: ld.b t0, a1, 0 - st.b t0, a0, 0 - addi.d a0, a0, 1 - addi.d a1, a1, 1 - addi.d a2, a2, -1 - bgt a2, zero, 2b +.Llt64: + addi.d a4, a3, -32 + bgeu a1, a4, .Llt32 + ld.d t0, a1, 0 + ld.d t1, a1, 8 + ld.d t2, a1, 16 + ld.d t3, a1, 24 + addi.d a1, a1, 32 + st.d t0, a5, 0 + st.d t1, a5, 8 + st.d t2, a5, 16 + st.d t3, a5, 24 + addi.d a5, a5, 32 + +.Llt32: + addi.d a4, a3, -16 + bgeu a1, a4, .Llt16 + ld.d t0, a1, 0 + ld.d t1, a1, 8 + addi.d a1, a1, 16 + st.d t0, a5, 0 + st.d t1, a5, 8 + addi.d a5, a5, 16 + +.Llt16: + addi.d a4, a3, -8 + bgeu a1, a4, .Llt8 + ld.d t0, a1, 0 + st.d t0, a5, 0 + +.Llt8: + st.d a6, a0, 0 + st.d a7, a2, -8 /* return */ -3: move a0, a3 jr ra SYM_FUNC_END(__memcpy_fast) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index d179b717c4b7..af2f7ef15142 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -11,23 +11,9 @@ #include SYM_FUNC_START(memmove) - blt a0, a1, 1f /* dst < src, memcpy */ - blt a1, a0, 3f /* src < dst, rmemcpy */ + blt a0, a1, memcpy /* dst < src, memcpy */ + blt a1, a0, rmemcpy /* src < dst, rmemcpy */ jr ra /* dst == src, return */ - - /* if (src - dst) < 64, copy 1 byte at a time */ -1: ori a3, zero, 64 - sub.d t0, a1, a0 - blt t0, a3, 2f - b memcpy -2: b __memcpy_generic - - /* if (dst - src) < 64, copy 1 byte at a time */ -3: ori a3, zero, 64 - sub.d t0, a0, a1 - blt t0, a3, 4f - b rmemcpy -4: b __rmemcpy_generic SYM_FUNC_END(memmove) EXPORT_SYMBOL(memmove) @@ -73,49 +59,79 @@ SYM_FUNC_END(__rmemcpy_generic) * a2: n */ SYM_FUNC_START(__rmemcpy_fast) - move a3, a0 - beqz a2, 3f + sltui t0, a2, 9 + bnez t0, __memcpy_small - add.d a0, a0, a2 - add.d a1, a1, a2 + add.d a3, a1, a2 + add.d a2, a0, a2 + ld.d a6, a1, 0 + ld.d a7, a3, -8 + + /* align up destination address */ + andi t1, a2, 7 + sub.d a3, a3, t1 + sub.d a5, a2, t1 - ori a4, zero, 64 - blt a2, a4, 2f + addi.d a4, a1, 64 + bgeu a4, a3, .Llt64 /* copy 64 bytes at a time */ -1: ld.d t0, a1, -8 - ld.d t1, a1, -16 - ld.d t2, a1, -24 - ld.d t3, a1, -32 - ld.d t4, a1, -40 - ld.d t5, a1, -48 - ld.d t6, a1, -56 - ld.d t7, a1, -64 - st.d t0, a0, -8 - st.d t1, a0, -16 - st.d t2, a0, -24 - st.d t3, a0, -32 - st.d t4, a0, -40 - st.d t5, a0, -48 - st.d t6, a0, -56 - st.d t7, a0, -64 - - addi.d a0, a0, -64 - addi.d a1, a1, -64 - addi.d a2, a2, -64 - bge a2, a4, 1b - - beqz a2, 3f +.Lloop64: + ld.d t0, a3, -8 + ld.d t1, a3, -16 + ld.d t2, a3, -24 + ld.d t3, a3, -32 + ld.d t4, a3, -40 + ld.d t5, a3, -48 + ld.d t6, a3, -56 + ld.d t7, a3, -64 + addi.d a3, a3, -64 + st.d t0, a5, -8 + st.d t1, a5, -16 + st.d t2, a5, -24 + st.d t3, a5, -32 + st.d t4, a5, -40 + st.d t5, a5, -48 + st.d t6, a5, -56 + st.d t7, a5, -64 + addi.d a5, a5, -64 + bltu a4, a3, .Lloop64 /* copy the remaining bytes */ -2: ld.b t0, a1, -1 - st.b t0, a0, -1 - addi.d a0, a0, -1 - addi.d a1, a1, -1 - addi.d a2, a2, -1 - bgt a2, zero, 2b +.Llt64: + addi.d a4, a1, 32 + bgeu a4, a3, .Llt32 + ld.d t0, a3, -8 + ld.d t1, a3, -16 + ld.d t2, a3, -24 + ld.d t3, a3, -32 + addi.d a3, a3, -32 + st.d t0, a5, -8 + st.d t1, a5, -16 + st.d t2, a5, -24 + st.d t3, a5, -32 + addi.d a5, a5, -32 + +.Llt32: + addi.d a4, a1, 16 + bgeu a4, a3, .Llt16 + ld.d t0, a3, -8 + ld.d t1, a3, -16 + addi.d a3, a3, -16 + st.d t0, a5, -8 + st.d t1, a5, -16 + addi.d a5, a5, -16 + +.Llt16: + addi.d a4, a1, 8 + bgeu a4, a3, .Llt8 + ld.d t0, a3, -8 + st.d t0, a5, -8 + +.Llt8: + st.d a6, a0, 0 + st.d a7, a2, -8 /* return */ -3: move a0, a3 jr ra SYM_FUNC_END(__rmemcpy_fast) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 414b857f5d0f..6d70f3c91ec7 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -54,38 +54,106 @@ SYM_FUNC_END(__memset_generic) * a2: n */ SYM_FUNC_START(__memset_fast) - move a3, a0 - beqz a2, 3f - - ori a4, zero, 64 - blt a2, a4, 2f - /* fill a1 to 64 bits */ fill_to_64 a1 - /* set 64 bytes at a time */ -1: st.d a1, a0, 0 - st.d a1, a0, 8 - st.d a1, a0, 16 - st.d a1, a0, 24 - st.d a1, a0, 32 - st.d a1, a0, 40 - st.d a1, a0, 48 - st.d a1, a0, 56 + sltui t0, a2, 9 + bnez t0, .Lsmall - addi.d a0, a0, 64 - addi.d a2, a2, -64 - bge a2, a4, 1b + add.d a2, a0, a2 + st.d a1, a0, 0 - beqz a2, 3f + /* align up address */ + addi.d a3, a0, 8 + bstrins.d a3, zero, 2, 0 + + addi.d a4, a2, -64 + bgeu a3, a4, .Llt64 + + /* set 64 bytes at a time */ +.Lloop64: + st.d a1, a3, 0 + st.d a1, a3, 8 + st.d a1, a3, 16 + st.d a1, a3, 24 + st.d a1, a3, 32 + st.d a1, a3, 40 + st.d a1, a3, 48 + st.d a1, a3, 56 + addi.d a3, a3, 64 + bltu a3, a4, .Lloop64 /* set the remaining bytes */ -2: st.b a1, a0, 0 - addi.d a0, a0, 1 - addi.d a2, a2, -1 - bgt a2, zero, 2b +.Llt64: + addi.d a4, a2, -32 + bgeu a3, a4, .Llt32 + st.d a1, a3, 0 + st.d a1, a3, 8 + st.d a1, a3, 16 + st.d a1, a3, 24 + addi.d a3, a3, 32 + +.Llt32: + addi.d a4, a2, -16 + bgeu a3, a4, .Llt16 + st.d a1, a3, 0 + st.d a1, a3, 8 + addi.d a3, a3, 16 + +.Llt16: + addi.d a4, a2, -8 + bgeu a3, a4, .Llt8 + st.d a1, a3, 0 + +.Llt8: + st.d a1, a2, -8 /* return */ -3: move a0, a3 + jr ra + + .align 4 +.Lsmall: + pcaddi t0, 4 + slli.d a2, a2, 4 + add.d t0, t0, a2 + jr t0 + + .align 4 +0: jr ra + + .align 4 +1: st.b a1, a0, 0 + jr ra + + .align 4 +2: st.h a1, a0, 0 + jr ra + + .align 4 +3: st.h a1, a0, 0 + st.b a1, a0, 2 + jr ra + + .align 4 +4: st.w a1, a0, 0 + jr ra + + .align 4 +5: st.w a1, a0, 0 + st.b a1, a0, 4 + jr ra + + .align 4 +6: st.w a1, a0, 0 + st.h a1, a0, 4 + jr ra + + .align 4 +7: st.w a1, a0, 0 + st.w a1, a0, 3 + jr ra + + .align 4 +8: st.d a1, a0, 0 jr ra SYM_FUNC_END(__memset_fast) -- Gitee From d78de8534bfbd4014618dd47040b4792648e3263 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 13 Jul 2023 11:04:53 +0800 Subject: [PATCH 199/241] LoongArch: Fix return value underflow in exception path This patch fixes an underflow issue in the return value within the exception path, specifically at .Llt8 when the remaining length is less than 8 bytes. Change-Id: Ie340caeb0dd8b7225258bccb05e09f99c480b4bd Cc: stable@vger.kernel.org Fixes: 8941e93ca590 ("LoongArch: Optimize memory ops (memset/memcpy/memmove)") Reported-by: Weihao Li Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/lib/clear_user.S | 3 ++- arch/loongarch/lib/copy_user.S | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index d7ee1e6666b4..bbc53ec352d6 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -108,6 +108,7 @@ SYM_FUNC_START(__clear_user_fast) addi.d a3, a2, -8 bgeu a0, a3, .Llt8 15: st.d zero, a0, 0 + addi.d a0, a0, 8 .Llt8: 16: st.d zero, a2, -8 @@ -188,7 +189,7 @@ SYM_FUNC_START(__clear_user_fast) _asm_extable 13b, .L_fixup_handle_0 _asm_extable 14b, .L_fixup_handle_1 _asm_extable 15b, .L_fixup_handle_0 - _asm_extable 16b, .L_fixup_handle_1 + _asm_extable 16b, .L_fixup_handle_0 _asm_extable 17b, .L_fixup_handle_s0 _asm_extable 18b, .L_fixup_handle_s0 _asm_extable 19b, .L_fixup_handle_s0 diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index 106536e48419..daf08cbad6d6 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -136,6 +136,7 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt8 30: ld.d t0, a1, 0 31: st.d t0, a0, 0 + addi.d a0, a0, 8 .Llt8: 32: ld.d t0, a3, -8 @@ -246,7 +247,7 @@ SYM_FUNC_START(__copy_user_fast) _asm_extable 30b, .L_fixup_handle_0 _asm_extable 31b, .L_fixup_handle_0 _asm_extable 32b, .L_fixup_handle_0 - _asm_extable 33b, .L_fixup_handle_1 + _asm_extable 33b, .L_fixup_handle_0 _asm_extable 34b, .L_fixup_handle_s0 _asm_extable 35b, .L_fixup_handle_s0 _asm_extable 36b, .L_fixup_handle_s0 -- Gitee From 40b57ccb47874f02590ddf42d75b458c43a91ebe Mon Sep 17 00:00:00 2001 From: Weihao Li Date: Thu, 31 Aug 2023 11:37:59 +0800 Subject: [PATCH 200/241] LoongArch: Adjust {copy, clear}_user exception handler behavior The {copy, clear}_user function should returns number of bytes that could not be {copied, cleared}. So, try to {copy, clear} byte by byte when ld.{d,w,h} and st.{d,w,h} trapped into an exception. Change-Id: I7d443309a522214d11505e80de3f95f6adf57245 Reviewed-by: WANG Rui Signed-off-by: Weihao Li Signed-off-by: Huacai Chen --- arch/loongarch/lib/clear_user.S | 87 ++++++++--------- arch/loongarch/lib/copy_user.S | 161 ++++++++++++++++---------------- 2 files changed, 127 insertions(+), 121 deletions(-) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index bbc53ec352d6..248121a23ecd 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -11,19 +11,6 @@ #include #include -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - sub.d a0, a2, a0 - addi.d a0, a0, (\to) * (-8) - jr ra -.endr - -.irp to, 0, 2, 4 -.L_fixup_handle_s\to\(): - addi.d a0, a1, -\to - jr ra -.endr - SYM_FUNC_START(__clear_user) /* * Some CPUs support hardware unaligned access @@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic) 2: move a0, a1 jr ra - _asm_extable 1b, .L_fixup_handle_s0 + _asm_extable 1b, 2b SYM_FUNC_END(__clear_user_generic) /* @@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast) jr ra /* fixup and ex_table */ - _asm_extable 0b, .L_fixup_handle_0 - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_1 - _asm_extable 3b, .L_fixup_handle_2 - _asm_extable 4b, .L_fixup_handle_3 - _asm_extable 5b, .L_fixup_handle_4 - _asm_extable 6b, .L_fixup_handle_5 - _asm_extable 7b, .L_fixup_handle_6 - _asm_extable 8b, .L_fixup_handle_7 - _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_1 - _asm_extable 11b, .L_fixup_handle_2 - _asm_extable 12b, .L_fixup_handle_3 - _asm_extable 13b, .L_fixup_handle_0 - _asm_extable 14b, .L_fixup_handle_1 - _asm_extable 15b, .L_fixup_handle_0 - _asm_extable 16b, .L_fixup_handle_0 - _asm_extable 17b, .L_fixup_handle_s0 - _asm_extable 18b, .L_fixup_handle_s0 - _asm_extable 19b, .L_fixup_handle_s0 - _asm_extable 20b, .L_fixup_handle_s2 - _asm_extable 21b, .L_fixup_handle_s0 - _asm_extable 22b, .L_fixup_handle_s0 - _asm_extable 23b, .L_fixup_handle_s4 - _asm_extable 24b, .L_fixup_handle_s0 - _asm_extable 25b, .L_fixup_handle_s4 - _asm_extable 26b, .L_fixup_handle_s0 - _asm_extable 27b, .L_fixup_handle_s4 - _asm_extable 28b, .L_fixup_handle_s0 +.Llarge_fixup: + sub.d a1, a2, a0 + +.Lsmall_fixup: +29: st.b zero, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, -1 + bgt a1, zero, 29b + +.Lexit: + move a0, a1 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Llarge_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Lexit + _asm_extable 18b, .Lsmall_fixup + _asm_extable 19b, .Lsmall_fixup + _asm_extable 20b, .Lsmall_fixup + _asm_extable 21b, .Lsmall_fixup + _asm_extable 22b, .Lsmall_fixup + _asm_extable 23b, .Lsmall_fixup + _asm_extable 24b, .Lsmall_fixup + _asm_extable 25b, .Lsmall_fixup + _asm_extable 26b, .Lsmall_fixup + _asm_extable 27b, .Lsmall_fixup + _asm_extable 28b, .Lsmall_fixup + _asm_extable 29b, .Lexit SYM_FUNC_END(__clear_user_fast) diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index daf08cbad6d6..58cfc6285d46 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -11,19 +11,6 @@ #include #include -.irp to, 0, 1, 2, 3, 4, 5, 6, 7 -.L_fixup_handle_\to\(): - sub.d a0, a2, a0 - addi.d a0, a0, (\to) * (-8) - jr ra -.endr - -.irp to, 0, 2, 4 -.L_fixup_handle_s\to\(): - addi.d a0, a2, -\to - jr ra -.endr - SYM_FUNC_START(__copy_user) /* * Some CPUs support hardware unaligned access @@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic) 3: move a0, a2 jr ra - _asm_extable 1b, .L_fixup_handle_s0 - _asm_extable 2b, .L_fixup_handle_s0 + _asm_extable 1b, 3b + _asm_extable 2b, 3b SYM_FUNC_END(__copy_user_generic) /* @@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast) sltui t0, a2, 9 bnez t0, .Lsmall - add.d a3, a1, a2 - add.d a2, a0, a2 0: ld.d t0, a1, 0 1: st.d t0, a0, 0 + add.d a3, a1, a2 + add.d a2, a0, a2 /* align up destination address */ andi t1, a0, 7 @@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast) 7: ld.d t5, a1, 40 8: ld.d t6, a1, 48 9: ld.d t7, a1, 56 - addi.d a1, a1, 64 10: st.d t0, a0, 0 11: st.d t1, a0, 8 12: st.d t2, a0, 16 @@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast) 15: st.d t5, a0, 40 16: st.d t6, a0, 48 17: st.d t7, a0, 56 + addi.d a1, a1, 64 addi.d a0, a0, 64 bltu a1, a4, .Lloop64 @@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast) 19: ld.d t1, a1, 8 20: ld.d t2, a1, 16 21: ld.d t3, a1, 24 - addi.d a1, a1, 32 22: st.d t0, a0, 0 23: st.d t1, a0, 8 24: st.d t2, a0, 16 25: st.d t3, a0, 24 + addi.d a1, a1, 32 addi.d a0, a0, 32 .Llt32: @@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt16 26: ld.d t0, a1, 0 27: ld.d t1, a1, 8 - addi.d a1, a1, 16 28: st.d t0, a0, 0 29: st.d t1, a0, 8 + addi.d a1, a1, 16 addi.d a0, a0, 16 .Llt16: @@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast) bgeu a1, a4, .Llt8 30: ld.d t0, a1, 0 31: st.d t0, a0, 0 + addi.d a1, a1, 8 addi.d a0, a0, 8 .Llt8: @@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast) jr ra /* fixup and ex_table */ - _asm_extable 0b, .L_fixup_handle_0 - _asm_extable 1b, .L_fixup_handle_0 - _asm_extable 2b, .L_fixup_handle_0 - _asm_extable 3b, .L_fixup_handle_0 - _asm_extable 4b, .L_fixup_handle_0 - _asm_extable 5b, .L_fixup_handle_0 - _asm_extable 6b, .L_fixup_handle_0 - _asm_extable 7b, .L_fixup_handle_0 - _asm_extable 8b, .L_fixup_handle_0 - _asm_extable 9b, .L_fixup_handle_0 - _asm_extable 10b, .L_fixup_handle_0 - _asm_extable 11b, .L_fixup_handle_1 - _asm_extable 12b, .L_fixup_handle_2 - _asm_extable 13b, .L_fixup_handle_3 - _asm_extable 14b, .L_fixup_handle_4 - _asm_extable 15b, .L_fixup_handle_5 - _asm_extable 16b, .L_fixup_handle_6 - _asm_extable 17b, .L_fixup_handle_7 - _asm_extable 18b, .L_fixup_handle_0 - _asm_extable 19b, .L_fixup_handle_0 - _asm_extable 20b, .L_fixup_handle_0 - _asm_extable 21b, .L_fixup_handle_0 - _asm_extable 22b, .L_fixup_handle_0 - _asm_extable 23b, .L_fixup_handle_1 - _asm_extable 24b, .L_fixup_handle_2 - _asm_extable 25b, .L_fixup_handle_3 - _asm_extable 26b, .L_fixup_handle_0 - _asm_extable 27b, .L_fixup_handle_0 - _asm_extable 28b, .L_fixup_handle_0 - _asm_extable 29b, .L_fixup_handle_1 - _asm_extable 30b, .L_fixup_handle_0 - _asm_extable 31b, .L_fixup_handle_0 - _asm_extable 32b, .L_fixup_handle_0 - _asm_extable 33b, .L_fixup_handle_0 - _asm_extable 34b, .L_fixup_handle_s0 - _asm_extable 35b, .L_fixup_handle_s0 - _asm_extable 36b, .L_fixup_handle_s0 - _asm_extable 37b, .L_fixup_handle_s0 - _asm_extable 38b, .L_fixup_handle_s0 - _asm_extable 39b, .L_fixup_handle_s0 - _asm_extable 40b, .L_fixup_handle_s0 - _asm_extable 41b, .L_fixup_handle_s2 - _asm_extable 42b, .L_fixup_handle_s0 - _asm_extable 43b, .L_fixup_handle_s0 - _asm_extable 44b, .L_fixup_handle_s0 - _asm_extable 45b, .L_fixup_handle_s0 - _asm_extable 46b, .L_fixup_handle_s0 - _asm_extable 47b, .L_fixup_handle_s4 - _asm_extable 48b, .L_fixup_handle_s0 - _asm_extable 49b, .L_fixup_handle_s0 - _asm_extable 50b, .L_fixup_handle_s0 - _asm_extable 51b, .L_fixup_handle_s4 - _asm_extable 52b, .L_fixup_handle_s0 - _asm_extable 53b, .L_fixup_handle_s0 - _asm_extable 54b, .L_fixup_handle_s0 - _asm_extable 55b, .L_fixup_handle_s4 - _asm_extable 56b, .L_fixup_handle_s0 - _asm_extable 57b, .L_fixup_handle_s0 +.Llarge_fixup: + sub.d a2, a2, a0 + +.Lsmall_fixup: +58: ld.b t0, a1, 0 +59: st.b t0, a0, 0 + addi.d a0, a0, 1 + addi.d a1, a1, 1 + addi.d a2, a2, -1 + bgt a2, zero, 58b + +.Lexit: + move a0, a2 + jr ra + + _asm_extable 0b, .Lsmall_fixup + _asm_extable 1b, .Lsmall_fixup + _asm_extable 2b, .Llarge_fixup + _asm_extable 3b, .Llarge_fixup + _asm_extable 4b, .Llarge_fixup + _asm_extable 5b, .Llarge_fixup + _asm_extable 6b, .Llarge_fixup + _asm_extable 7b, .Llarge_fixup + _asm_extable 8b, .Llarge_fixup + _asm_extable 9b, .Llarge_fixup + _asm_extable 10b, .Llarge_fixup + _asm_extable 11b, .Llarge_fixup + _asm_extable 12b, .Llarge_fixup + _asm_extable 13b, .Llarge_fixup + _asm_extable 14b, .Llarge_fixup + _asm_extable 15b, .Llarge_fixup + _asm_extable 16b, .Llarge_fixup + _asm_extable 17b, .Llarge_fixup + _asm_extable 18b, .Llarge_fixup + _asm_extable 19b, .Llarge_fixup + _asm_extable 20b, .Llarge_fixup + _asm_extable 21b, .Llarge_fixup + _asm_extable 22b, .Llarge_fixup + _asm_extable 23b, .Llarge_fixup + _asm_extable 24b, .Llarge_fixup + _asm_extable 25b, .Llarge_fixup + _asm_extable 26b, .Llarge_fixup + _asm_extable 27b, .Llarge_fixup + _asm_extable 28b, .Llarge_fixup + _asm_extable 29b, .Llarge_fixup + _asm_extable 30b, .Llarge_fixup + _asm_extable 31b, .Llarge_fixup + _asm_extable 32b, .Llarge_fixup + _asm_extable 33b, .Llarge_fixup + _asm_extable 34b, .Lexit + _asm_extable 35b, .Lexit + _asm_extable 36b, .Lsmall_fixup + _asm_extable 37b, .Lsmall_fixup + _asm_extable 38b, .Lsmall_fixup + _asm_extable 39b, .Lsmall_fixup + _asm_extable 40b, .Lsmall_fixup + _asm_extable 41b, .Lsmall_fixup + _asm_extable 42b, .Lsmall_fixup + _asm_extable 43b, .Lsmall_fixup + _asm_extable 44b, .Lsmall_fixup + _asm_extable 45b, .Lsmall_fixup + _asm_extable 46b, .Lsmall_fixup + _asm_extable 47b, .Lsmall_fixup + _asm_extable 48b, .Lsmall_fixup + _asm_extable 49b, .Lsmall_fixup + _asm_extable 50b, .Lsmall_fixup + _asm_extable 51b, .Lsmall_fixup + _asm_extable 52b, .Lsmall_fixup + _asm_extable 53b, .Lsmall_fixup + _asm_extable 54b, .Lsmall_fixup + _asm_extable 55b, .Lsmall_fixup + _asm_extable 56b, .Lsmall_fixup + _asm_extable 57b, .Lsmall_fixup + _asm_extable 58b, .Lexit + _asm_extable 59b, .Lexit SYM_FUNC_END(__copy_user_fast) -- Gitee From 619586e25f447b6e5d11a248c2570de518dbd266 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 16 Feb 2023 21:09:14 +0800 Subject: [PATCH 201/241] LoongArch: Add checksum optimization for 64-bit system LoongArch platform is 64-bit system, which supports 8-bytes memory accessing, but generic checksum functions use 4-byte memory access. So add 8-bytes memory access optimization for checksum functions on LoongArch. And the code comes from arm64 system. When network hw checksum is disabled, iperf performance improves about 10% with this patch. Change-Id: Ifcf59d0eede30bd715395bbaedc547b3fdcbbd6a Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/checksum.h | 66 ++++++++++++ arch/loongarch/lib/Makefile | 2 +- arch/loongarch/lib/csum.c | 141 ++++++++++++++++++++++++++ 3 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/checksum.h create mode 100644 arch/loongarch/lib/csum.c diff --git a/arch/loongarch/include/asm/checksum.h b/arch/loongarch/include/asm/checksum.h new file mode 100644 index 000000000000..cabbf6af44c4 --- /dev/null +++ b/arch/loongarch/include/asm/checksum.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2016 ARM Ltd. + * Copyright (C) 2023 Loongson Technology Corporation Limited + */ +#ifndef __ASM_CHECKSUM_H +#define __ASM_CHECKSUM_H + +#include +#include + +#define _HAVE_ARCH_IPV6_CSUM +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum sum); + +/* + * turns a 32-bit partial checksum (e.g. from csum_partial) into a + * 1's complement 16-bit checksum. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + u32 tmp = (__force u32)sum; + + /* + * swap the two 16-bit halves of sum + * if there is a carry from adding the two 16-bit halves, + * it will carry from the lower half into the upper half, + * giving us the correct sum in the upper half. + */ + return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16); +} +#define csum_fold csum_fold + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. ihl is the number + * of 32-bit words and is always >= 5. + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + u64 sum; + __uint128_t tmp; + int n = ihl; /* we want it signed */ + + tmp = *(const __uint128_t *)iph; + iph += 16; + n -= 4; + tmp += ((tmp >> 64) | (tmp << 64)); + sum = tmp >> 64; + do { + sum += *(const u32 *)iph; + iph += 4; + } while (--n > 0); + + sum += ror64(sum, 32); + return csum_fold((__force __wsum)(sum >> 32)); +} +#define ip_fast_csum ip_fast_csum + +extern unsigned int do_csum(const unsigned char *buff, int len); +#define do_csum do_csum + +#include + +#endif /* __ASM_CHECKSUM_H */ diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index 29eabd49792b..b2759f2a04d5 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -4,6 +4,6 @@ # lib-y += delay.o memset.o memcpy.o memmove.o \ - clear_user.o copy_user.o unaligned.o dump_tlb.o + clear_user.o copy_user.o csum.o unaligned.o dump_tlb.o obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c new file mode 100644 index 000000000000..a5e84b403c3b --- /dev/null +++ b/arch/loongarch/lib/csum.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2019-2020 Arm Ltd. + +#include +#include +#include + +#include + +static u64 accumulate(u64 sum, u64 data) +{ + sum += data; + if (sum < data) + sum += 1; + return sum; +} + +/* + * We over-read the buffer and this makes KASAN unhappy. Instead, disable + * instrumentation and call kasan explicitly. + */ +unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) +{ + unsigned int offset, shift, sum; + const u64 *ptr; + u64 data, sum64 = 0; + + if (unlikely(len == 0)) + return 0; + + offset = (unsigned long)buff & 7; + /* + * This is to all intents and purposes safe, since rounding down cannot + * result in a different page or cache line being accessed, and @buff + * should absolutely not be pointing to anything read-sensitive. We do, + * however, have to be careful not to piss off KASAN, which means using + * unchecked reads to accommodate the head and tail, for which we'll + * compensate with an explicit check up-front. + */ + kasan_check_read(buff, len); + ptr = (u64 *)(buff - offset); + len = len + offset - 8; + + /* + * Head: zero out any excess leading bytes. Shifting back by the same + * amount should be at least as fast as any other way of handling the + * odd/even alignment, and means we can ignore it until the very end. + */ + shift = offset * 8; + data = *ptr++; + data = (data >> shift) << shift; + + /* + * Body: straightforward aligned loads from here on (the paired loads + * underlying the quadword type still only need dword alignment). The + * main loop strictly excludes the tail, so the second loop will always + * run at least once. + */ + while (unlikely(len > 64)) { + __uint128_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = *(__uint128_t *)ptr; + tmp2 = *(__uint128_t *)(ptr + 2); + tmp3 = *(__uint128_t *)(ptr + 4); + tmp4 = *(__uint128_t *)(ptr + 6); + + len -= 64; + ptr += 8; + + /* This is the "don't dump the carry flag into a GPR" idiom */ + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp2 += (tmp2 >> 64) | (tmp2 << 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp4 += (tmp4 >> 64) | (tmp4 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp2 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp3 = ((tmp3 >> 64) << 64) | (tmp4 >> 64); + tmp3 += (tmp3 >> 64) | (tmp3 << 64); + tmp1 = ((tmp1 >> 64) << 64) | (tmp3 >> 64); + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + tmp1 = ((tmp1 >> 64) << 64) | sum64; + tmp1 += (tmp1 >> 64) | (tmp1 << 64); + sum64 = tmp1 >> 64; + } + while (len > 8) { + __uint128_t tmp; + + sum64 = accumulate(sum64, data); + tmp = *(__uint128_t *)ptr; + + len -= 16; + ptr += 2; + + data = tmp >> 64; + sum64 = accumulate(sum64, tmp); + } + if (len > 0) { + sum64 = accumulate(sum64, data); + data = *ptr; + len -= 8; + } + /* + * Tail: zero any over-read bytes similarly to the head, again + * preserving odd/even alignment. + */ + shift = len * -8; + data = (data << shift) >> shift; + sum64 = accumulate(sum64, data); + + /* Finally, folding */ + sum64 += (sum64 >> 32) | (sum64 << 32); + sum = sum64 >> 32; + sum += (sum >> 16) | (sum << 16); + if (offset & 1) + return (u16)swab32(sum); + + return sum >> 16; +} + +__sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + __uint128_t src, dst; + u64 sum = (__force u64)csum; + + src = *(const __uint128_t *)saddr->s6_addr; + dst = *(const __uint128_t *)daddr->s6_addr; + + sum += (__force u32)htonl(len); + sum += (u32)proto << 24; + src += (src >> 64) | (src << 64); + dst += (dst >> 64) | (dst << 64); + + sum = accumulate(sum, src >> 64); + sum = accumulate(sum, dst >> 64); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold((__force __wsum)(sum >> 32)); +} +EXPORT_SYMBOL(csum_ipv6_magic); -- Gitee From 9e7f9c1aa110322a4c5d3a9fbcfe421a4ca7498e Mon Sep 17 00:00:00 2001 From: Min Zhou Date: Mon, 19 Jul 2021 17:08:07 +0800 Subject: [PATCH 202/241] LoongArch: crypto: Add crc32 and crc32c hw acceleration With a blatant copy of some MIPS bits we introduce the crc32 and crc32c hw accelerated module to LoongArch. LoongArch has provided these instructions to calculate crc32 and crc32c: * crc.w.b.w crcc.w.b.w * crc.w.h.w crcc.w.h.w * crc.w.w.w crcc.w.w.w * crc.w.d.w crcc.w.d.w So we can make use of these instructions to improve the performance of calculation for crc32(c) checksums. As can be seen from the following test results, crc32(c) instructions can improve the performance by 58%. Software implemention Hardware acceleration Buffer size time cost (seconds) time cost (seconds) Accel. 100 KB 0.000845 0.000534 59.1% 1 MB 0.007758 0.004836 59.4% 10 MB 0.076593 0.047682 59.4% 100 MB 0.756734 0.479126 58.5% 1000 MB 7.563841 4.778266 58.5% Change-Id: I61ef5117b7a16a0cf168330859adc083c161dddc Signed-off-by: Min Zhou Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 + arch/loongarch/crypto/Makefile | 6 + arch/loongarch/crypto/crc32-loongarch.c | 304 ++++++++++++++++++++++++ crypto/Kconfig | 8 + 4 files changed, 320 insertions(+) create mode 100644 arch/loongarch/crypto/Makefile create mode 100644 arch/loongarch/crypto/crc32-loongarch.c diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 13eaa36b0949..0dab3174a95a 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -135,6 +135,8 @@ libs-y += arch/loongarch/lib/ # See arch/loongarch/Kbuild for content of core part of the kernel core-y += arch/loongarch/ +drivers-y += arch/loongarch/crypto/ + # suspend and hibernation support drivers-$(CONFIG_PM) += arch/loongarch/power/ diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile new file mode 100644 index 000000000000..d22613d27ce9 --- /dev/null +++ b/arch/loongarch/crypto/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for LoongArch crypto files.. +# + +obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c new file mode 100644 index 000000000000..10a45faf2ef0 --- /dev/null +++ b/arch/loongarch/crypto/crc32-loongarch.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions + * + * Module based on mips/crypto/crc32-mips.c + * + * Copyright (C) 2014 Linaro Ltd + * Copyright (C) 2018 MIPS Tech, LLC + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +#include + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + #type ".w." #size ".w" " %0, %1, %0\n\t"\ + : "+r" (crc) \ + : "r" (value) \ + : "memory"); \ +} while (0) + +#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) +#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) + +static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32(crc, value, b); + } + + return crc; +} + +static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) +{ + u32 crc = crc_; + + while (len >= sizeof(u64)) { + u64 value = get_unaligned_le64(p); + + CRC32C(crc, value, d); + p += sizeof(u64); + len -= sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value = get_unaligned_le32(p); + + CRC32C(crc, value, w); + p += sizeof(u32); + len -= sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value = get_unaligned_le16(p); + + CRC32C(crc, value, h); + p += sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value = *p++; + + CRC32C(crc, value, b); + } + + return crc; +} + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +struct chksum_ctx { + u32 key; +}; + +struct chksum_desc_ctx { + u32 crc; +}; + +static int chksum_init(struct shash_desc *desc) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = mctx->key; + + return 0; +} + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set the seed. + */ +static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(tfm); + + if (keylen != sizeof(mctx->key)) + return -EINVAL; + + mctx->key = get_unaligned_le32(key); + + return 0; +} + +static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32_loongarch_hw(ctx->crc, data, length); + return 0; +} + +static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length); + return 0; +} + +static int chksum_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(ctx->crc, out); + return 0; +} + +static int chksumc_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + put_unaligned_le32(~ctx->crc, out); + return 0; +} + +static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out); + return 0; +} + +static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) +{ + put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out); + return 0; +} + +static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksum_finup(ctx->crc, data, len, out); +} + +static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + + return __chksumc_finup(ctx->crc, data, len, out); +} + +static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksum_finup(mctx->key, data, length, out); +} + +static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) +{ + struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); + + return __chksumc_finup(mctx->key, data, length, out); +} + +static int chksum_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = 0; + return 0; +} + +static int chksumc_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + + mctx->key = ~0; + return 0; +} + +static struct shash_alg crc32_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksum_update, + .final = chksum_final, + .finup = chksum_finup, + .digest = chksum_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-loongarch", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksum_cra_init, + } +}; + +static struct shash_alg crc32c_alg = { + .digestsize = CHKSUM_DIGEST_SIZE, + .setkey = chksum_setkey, + .init = chksum_init, + .update = chksumc_update, + .final = chksumc_final, + .finup = chksumc_finup, + .digest = chksumc_digest, + .descsize = sizeof(struct chksum_desc_ctx), + .base = { + .cra_name = "crc32c", + .cra_driver_name = "crc32c-loongarch", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_alignmask = 0, + .cra_ctxsize = sizeof(struct chksum_ctx), + .cra_module = THIS_MODULE, + .cra_init = chksumc_cra_init, + } +}; + +static int __init crc32_mod_init(void) +{ + int err; + + if (!cpu_has(CPU_FEATURE_CRC32)) + return 0; + + err = crypto_register_shash(&crc32_alg); + if (err) + return err; + + err = crypto_register_shash(&crc32c_alg); + if (err) + return err; + + return 0; +} + +static void __exit crc32_mod_exit(void) +{ + if (!cpu_has(CPU_FEATURE_CRC32)) + return; + + crypto_unregister_shash(&crc32_alg); + crypto_unregister_shash(&crc32c_alg); +} + +module_init(crc32_mod_init); +module_exit(crc32_mod_exit); + +MODULE_AUTHOR("Min Zhou "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); +MODULE_LICENSE("GPL v2"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 3564fe8d17ee..220581f3c062 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -663,6 +663,14 @@ config CRYPTO_CRC32_MIPS CRC32c and CRC32 CRC algorithms implemented using mips crypto instructions, when available. +config CRYPTO_CRC32_LOONGARCH + tristate "CRC32c and CRC32 CRC algorithm (LoongArch)" + depends on LOONGARCH + select CRC32 + select CRYPTO_HASH + help + CRC32c and CRC32 CRC algorithms implemented using LoongArch + CRC32 instructions, when available. config CRYPTO_XXHASH tristate "xxHash hash algorithm" -- Gitee From 61293262583d58cfc907436ab8a4a5a44aae35c6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 13 Oct 2021 08:29:43 +0800 Subject: [PATCH 203/241] LoongArch: BPF: Add eBPF JIT support Change-Id: Ibdd8e35a93496b096a92ca3d9180a9822d5bb0ba Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kbuild | 1 + arch/loongarch/Kconfig | 1 + arch/loongarch/configs/loongson3_defconfig | 1 + arch/loongarch/include/asm/perf_event.h | 4 +- .../include/uapi/asm/bpf_perf_event.h | 9 + arch/loongarch/net/Makefile | 7 + arch/loongarch/net/ebpf_jit.c | 978 ++++++++++++++++++ arch/loongarch/net/ebpf_jit.h | 842 +++++++++++++++ .../loongarch/include/uapi/asm/bitsperlong.h | 9 + tools/include/uapi/asm/bitsperlong.h | 2 + tools/testing/selftests/seccomp/seccomp_bpf.c | 6 + 11 files changed, 1859 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/uapi/asm/bpf_perf_event.h create mode 100644 arch/loongarch/net/Makefile create mode 100644 arch/loongarch/net/ebpf_jit.c create mode 100644 arch/loongarch/net/ebpf_jit.h create mode 100644 tools/arch/loongarch/include/uapi/asm/bitsperlong.h diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index e997ba117bd9..4fd77e6cccd0 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -18,4 +18,5 @@ obj- := $(platform-) obj-y += kernel/ obj-y += mm/ +obj-y += net/ obj-y += vdso/ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index fc3c14e947cf..66dd8d0b15af 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -87,6 +87,7 @@ config LOONGARCH select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_EBPF_JIT if 64BIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN select HAVE_EXIT_THREAD select HAVE_FAST_GUP diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 1acf3c3ba153..5e3553d8e90e 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -291,6 +291,7 @@ CONFIG_NET_ACT_BPF=m CONFIG_OPENVSWITCH=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y +CONFIG_BPF_JIT=y CONFIG_BT=m CONFIG_BT_HCIBTUSB=m # CONFIG_BT_HCIBTUSB_BCM is not set diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h index 943f478d823f..a9caaf15738d 100644 --- a/arch/loongarch/include/asm/perf_event.h +++ b/arch/loongarch/include/asm/perf_event.h @@ -12,5 +12,7 @@ #ifndef __LOONGARCH_PERF_EVENT_H__ #define __LOONGARCH_PERF_EVENT_H__ -/* Nothing to show here; the file is required by linux/perf_event.h. */ + +#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs + #endif /* __LOONGARCH_PERF_EVENT_H__ */ diff --git a/arch/loongarch/include/uapi/asm/bpf_perf_event.h b/arch/loongarch/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..eb6e2fd2a1f0 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile new file mode 100644 index 000000000000..42fa082caa93 --- /dev/null +++ b/arch/loongarch/net/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for arch/loongarch/net +# +# Copyright (C) 2021 Loongson Technology Corporation Limited +# +obj-$(CONFIG_BPF_JIT) += ebpf_jit.o diff --git a/arch/loongarch/net/ebpf_jit.c b/arch/loongarch/net/ebpf_jit.c new file mode 100644 index 000000000000..b62f3f048c13 --- /dev/null +++ b/arch/loongarch/net/ebpf_jit.c @@ -0,0 +1,978 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ +#include "ebpf_jit.h" + +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 2) +#define REG_TCC (MAX_BPF_JIT_REG + 3) +#define TCC_SAVED (MAX_BPF_JIT_REG + 4) + +#define SAVE_RA BIT(0) +#define SAVE_TCC BIT(1) + +static const int regmap[] = { + /* return value from in-kernel function, and exit value for eBPF program */ + [BPF_REG_0] = LOONGARCH_GPR_A5, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = LOONGARCH_GPR_A0, + [BPF_REG_2] = LOONGARCH_GPR_A1, + [BPF_REG_3] = LOONGARCH_GPR_A2, + [BPF_REG_4] = LOONGARCH_GPR_A3, + [BPF_REG_5] = LOONGARCH_GPR_A4, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = LOONGARCH_GPR_S0, + [BPF_REG_7] = LOONGARCH_GPR_S1, + [BPF_REG_8] = LOONGARCH_GPR_S2, + [BPF_REG_9] = LOONGARCH_GPR_S3, + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = LOONGARCH_GPR_S4, + /* temporary register for blinding constants */ + [BPF_REG_AX] = LOONGARCH_GPR_T0, + /* temporary register for internal BPF JIT */ + [TMP_REG_1] = LOONGARCH_GPR_T1, + [TMP_REG_2] = LOONGARCH_GPR_T2, + [TMP_REG_3] = LOONGARCH_GPR_T3, + /* tail call */ + [REG_TCC] = LOONGARCH_GPR_A6, + /* store A6 in S5 if program do calls */ + [TCC_SAVED] = LOONGARCH_GPR_S5, +}; + +static void mark_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_RA; +} + +static void mark_tail_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_TCC; +} + +static bool seen_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_RA); +} + +static bool seen_tail_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_TCC); +} + +static u8 tail_call_reg(struct jit_ctx *ctx) +{ + if (seen_call(ctx)) + return regmap[TCC_SAVED]; + + return regmap[REG_TCC]; +} + +/* + * eBPF prog stack layout: + * + * high + * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP + * | $ra | + * +-------------------------+ + * | $fp | + * +-------------------------+ + * | $s0 | + * +-------------------------+ + * | $s1 | + * +-------------------------+ + * | $s2 | + * +-------------------------+ + * | $s3 | + * +-------------------------+ + * | $s4 | + * +-------------------------+ + * | $s5 | + * +-------------------------+ <--BPF_REG_FP + * | prog->aux->stack_depth | + * | (optional) | + * current $sp -------------> +-------------------------+ + * low + */ +static void build_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0, store_offset, bpf_stack_adjust; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + + stack_adjust += sizeof(long); /* LOONGARCH_GPR_RA */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_FP */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S0 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S1 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S2 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S3 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S4 */ + stack_adjust += sizeof(long); /* LOONGARCH_GPR_S5 */ + + stack_adjust = round_up(stack_adjust, 16); + stack_adjust += bpf_stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in REG_TCC from the caller. + */ + emit_insn(ctx, addid, regmap[REG_TCC], LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); + + store_offset = stack_adjust - sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); + + if (bpf_stack_adjust) + emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); + + /* + * Program contains calls and tail calls, so REG_TCC need + * to be saved across calls. + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + move_reg(ctx, regmap[TCC_SAVED], regmap[REG_TCC]); + + ctx->stack_size = stack_adjust; +} + +static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) +{ + int stack_adjust = ctx->stack_size; + int load_offset; + + load_offset = stack_adjust - sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); + + if (!is_tail_call) { + /* Set return value */ + move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]); + /* Return to the caller */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0); + } else { + /* + * Call the next bpf prog and skip the first instruction + * of TCC initialization. + */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, regmap[TMP_REG_3], 1); + } +} + +void build_epilogue(struct jit_ctx *ctx) +{ + __build_epilogue(ctx, false); +} + +/* initialized on the first pass of build_body() */ +static int out_offset = -1; +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + int off; + u8 tcc = tail_call_reg(ctx); + u8 a1 = LOONGARCH_GPR_A1; + u8 a2 = LOONGARCH_GPR_A2; + u8 tmp1 = regmap[TMP_REG_1]; + u8 tmp2 = regmap[TMP_REG_2]; + u8 tmp3 = regmap[TMP_REG_3]; + const int idx0 = ctx->idx; + +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + + /* + * a0: &ctx + * a1: &array + * a2: index + * + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_insn(ctx, ldwu, tmp1, a1, off); + /* bgeu $a2, $t1, jmp_offset */ + emit_tailcall_jump(ctx, BPF_JGE, a2, tmp1, jmp_offset); + + /* + * if (TCC-- < 0) + * goto out; + */ + emit_insn(ctx, addid, tmp1, tcc, -1); + emit_tailcall_jump(ctx, BPF_JSLT, tcc, LOONGARCH_GPR_ZERO, jmp_offset); + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit_insn(ctx, sllid, tmp2, a2, 3); + emit_insn(ctx, addd, tmp2, tmp2, a1); + off = offsetof(struct bpf_array, ptrs); + emit_insn(ctx, ldd, tmp2, tmp2, off); + /* beq $t2, $zero, jmp_offset */ + emit_tailcall_jump(ctx, BPF_JEQ, tmp2, LOONGARCH_GPR_ZERO, jmp_offset); + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_insn(ctx, ldd, tmp3, tmp2, off); + move_reg(ctx, tcc, tmp1); + __build_epilogue(ctx, true); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + + return 0; +#undef cur_offset +#undef jmp_offset +} + +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) +{ + bool is32 = (BPF_CLASS(insn->code) == BPF_ALU); + const u8 code = insn->code; + const u8 cond = BPF_OP(code); + const u8 dst = regmap[insn->dst_reg]; + const u8 src = regmap[insn->src_reg]; + const u8 tmp = regmap[TMP_REG_1]; + const u8 tmp2 = regmap[TMP_REG_2]; + const s16 off = insn->off; + const s32 imm = insn->imm; + int i = insn - ctx->prog->insnsi; + int jmp_offset; + bool func_addr_fixed; + u64 func_addr; + u64 imm64; + int ret; + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + move_imm32(ctx, dst, imm, is32); + break; + + /* dst = dst + src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit_insn(ctx, addd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst + imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (is_signed_imm12(imm)) { + emit_insn(ctx, addid, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, addd, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - src */ + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_insn(ctx, subd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst - imm */ + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (is_signed_imm12(-imm)) { + emit_insn(ctx, addid, dst, dst, -imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, subd, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * src */ + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_insn(ctx, muld, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst * imm */ + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, muld, dst, dst, tmp); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_insn(ctx, divdu, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst / imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, divdu, dst, dst, tmp); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % src */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_insn(ctx, moddu, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst % imm */ + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, moddu, dst, dst, tmp); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit_insn(ctx, and, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst & imm */ + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, andi, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, and, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | src */ + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit_insn(ctx, or, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst | imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, or, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ src */ + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit_insn(ctx, xor, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + /* dst = dst ^ imm */ + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, xori, dst, dst, imm); + } else { + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, xor, dst, dst, tmp); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst << src (logical) */ + case BPF_ALU | BPF_LSH | BPF_X: + emit_insn(ctx, sllw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: + emit_insn(ctx, slld, dst, dst, src); + break; + /* dst = dst << imm (logical) */ + case BPF_ALU | BPF_LSH | BPF_K: + emit_insn(ctx, slliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: + emit_insn(ctx, sllid, dst, dst, imm); + break; + + /* dst = dst >> src (logical) */ + case BPF_ALU | BPF_RSH | BPF_X: + emit_insn(ctx, srlw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: + emit_insn(ctx, srld, dst, dst, src); + break; + /* dst = dst >> imm (logical) */ + case BPF_ALU | BPF_RSH | BPF_K: + emit_insn(ctx, srliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: + emit_insn(ctx, srlid, dst, dst, imm); + break; + + /* dst = dst >> src (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_X: + emit_insn(ctx, sraw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_insn(ctx, srad, dst, dst, src); + break; + /* dst = dst >> imm (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_K: + emit_insn(ctx, sraiw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_insn(ctx, sraid, dst, dst, imm); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, sllid, dst, dst, 48); + emit_insn(ctx, srlid, dst, dst, 48); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + /* do nothing */ + break; + } + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + emit_insn(ctx, revb2h, dst, dst); + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, sllid, dst, dst, 48); + emit_insn(ctx, srlid, dst, dst, 48); + break; + case 32: + emit_insn(ctx, revb2w, dst, dst); + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + emit_insn(ctx, revbd, dst, dst); + break; + } + break; + + /* PC += off if dst cond src */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_cond_jump(ctx, cond, dst, src, jmp_offset); + break; + + /* PC += off if dst cond imm */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm32(ctx, tmp, imm, is32); + emit_cond_jump(ctx, cond, dst, tmp, jmp_offset); + break; + + /* PC += off if dst & src */ + case BPF_JMP | BPF_JSET | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_insn(ctx, and, tmp, dst, src); + emit_cond_jump(ctx, cond, tmp, LOONGARCH_GPR_ZERO, jmp_offset); + break; + /* PC += off if dst & imm */ + case BPF_JMP | BPF_JSET | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm32(ctx, tmp, imm, is32); + emit_insn(ctx, and, tmp, dst, tmp); + emit_cond_jump(ctx, cond, tmp, LOONGARCH_GPR_ZERO, jmp_offset); + break; + + /* PC += off */ + case BPF_JMP | BPF_JA: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_uncond_jump(ctx, jmp_offset, false); + break; + + /* function call */ + case BPF_JMP | BPF_CALL: + mark_call(ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + + move_imm64(ctx, tmp, func_addr, is32); + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, tmp, 0); + move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0); + break; + + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + mark_tail_call(ctx); + if (emit_bpf_tail_call(ctx)) + return -EINVAL; + break; + + /* function return */ + case BPF_JMP | BPF_EXIT: + emit_sext_32(ctx, regmap[BPF_REG_0]); + /* + * Optimization: when last instruction is EXIT, + * simply fallthrough to epilogue. + */ + if (i == ctx->prog->len - 1) + break; + + jmp_offset = epilogue_offset(ctx); + emit_uncond_jump(ctx, jmp_offset, true); + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm; + move_imm64(ctx, dst, imm64, is32); + return 1; + + /* dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + if (is_signed_imm12(off)) { + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, ldbu, dst, src, off); + break; + case BPF_H: + emit_insn(ctx, ldhu, dst, src, off); + break; + case BPF_W: + emit_insn(ctx, ldwu, dst, src, off); + break; + case BPF_DW: + emit_insn(ctx, ldd, dst, src, off); + break; + } + } else { + move_imm32(ctx, tmp, off, is32); + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, ldxbu, dst, src, tmp); + break; + case BPF_H: + emit_insn(ctx, ldxhu, dst, src, tmp); + break; + case BPF_W: + emit_insn(ctx, ldxwu, dst, src, tmp); + break; + case BPF_DW: + emit_insn(ctx, ldxd, dst, src, tmp); + break; + } + } + break; + + /* *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + move_imm32(ctx, tmp, imm, is32); + if (is_signed_imm12(off)) { + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stb, tmp, dst, off); + break; + case BPF_H: + emit_insn(ctx, sth, tmp, dst, off); + break; + case BPF_W: + emit_insn(ctx, stw, tmp, dst, off); + break; + case BPF_DW: + emit_insn(ctx, std, tmp, dst, off); + break; + } + } else { + move_imm32(ctx, tmp2, off, is32); + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stxb, tmp, dst, tmp2); + break; + case BPF_H: + emit_insn(ctx, stxh, tmp, dst, tmp2); + break; + case BPF_W: + emit_insn(ctx, stxw, tmp, dst, tmp2); + break; + case BPF_DW: + emit_insn(ctx, stxd, tmp, dst, tmp2); + break; + } + } + break; + + /* *(size *)(dst + off) = src */ + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + if (is_signed_imm12(off)) { + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stb, src, dst, off); + break; + case BPF_H: + emit_insn(ctx, sth, src, dst, off); + break; + case BPF_W: + emit_insn(ctx, stw, src, dst, off); + break; + case BPF_DW: + emit_insn(ctx, std, src, dst, off); + break; + } + } else { + move_imm32(ctx, tmp, off, is32); + switch (BPF_SIZE(code)) { + case BPF_B: + emit_insn(ctx, stxb, src, dst, tmp); + break; + case BPF_H: + emit_insn(ctx, stxh, src, dst, tmp); + break; + case BPF_W: + emit_insn(ctx, stxw, src, dst, tmp); + break; + case BPF_DW: + emit_insn(ctx, stxd, src, dst, tmp); + break; + } + } + break; + + /* atomic_add: lock *(size *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_W: + case BPF_STX | BPF_XADD | BPF_DW: + if (insn->imm != BPF_ADD) { + pr_err_once("unknown atomic op code %02x\n", insn->imm); + return -EINVAL; + } + + move_imm32(ctx, tmp, off, is32); + emit_insn(ctx, addd, tmp, dst, tmp); + switch (BPF_SIZE(insn->code)) { + case BPF_W: + emit_insn(ctx, amaddw, tmp2, src, tmp); + break; + case BPF_DW: + emit_insn(ctx, amaddd, tmp2, src, tmp); + break; + } + break; + + default: + pr_err("bpf_jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +static int build_body(struct jit_ctx *ctx, bool extra_pass) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + if (!ctx->image) + ctx->offset[i] = ctx->idx; + + ret = build_insn(insn, ctx, extra_pass); + if (ret > 0) { + i++; + if (!ctx->image) + ctx->offset[i] = ctx->idx; + continue; + } + if (ret) + return ret; + } + + if (!ctx->image) + ctx->offset[i] = ctx->idx; + + return 0; +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +/* Fill space with illegal instructions */ +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *ptr; + + /* We are guaranteed to have aligned memory */ + for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) + *ptr++ = INSN_BREAK; +} + +static int validate_code(struct jit_ctx *ctx) +{ + int i; + union loongarch_instruction insn; + + for (i = 0; i < ctx->idx; i++) { + insn = ctx->image[i]; + /* Check INSN_BREAK */ + if (insn.word == INSN_BREAK) + return -1; + } + + return 0; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *tmp, *orig_prog = prog; + struct bpf_binary_header *header; + struct jit_data *jit_data; + struct jit_ctx ctx; + bool tmp_blinded = false; + bool extra_pass = false; + int image_size; + u8 *image_ptr; + + /* + * If BPF JIT was not enabled then we must fall back to + * the interpreter. + */ + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + /* + * If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. Otherwise, we save + * the new JITed code. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + if (jit_data->ctx.offset) { + ctx = jit_data->ctx; + image_ptr = jit_data->image; + header = jit_data->header; + extra_pass = true; + image_size = sizeof(u32) * ctx.idx; + goto skip_init_ctx; + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.prog = prog; + + ctx.offset = kcalloc(prog->len + 1, sizeof(*ctx.offset), GFP_KERNEL); + if (!ctx.offset) { + prog = orig_prog; + goto out_off; + } + + /* 1. Initial fake pass to compute ctx->idx and set ctx->flags */ + if (build_body(&ctx, extra_pass)) { + prog = orig_prog; + goto out_off; + } + build_prologue(&ctx); + ctx.epilogue_offset = ctx.idx; + build_epilogue(&ctx); + + /* Now we know the actual image size. + * As each LoongArch instruction is of length 32bit, + * we are translating number of JITed intructions into + * the size required to store these JITed code. + */ + image_size = sizeof(u32) * ctx.idx; + /* Now we know the size of the structure to make */ + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (!header) { + prog = orig_prog; + goto out_off; + } + + /* 2. Now, the actual pass to generate final JIT code */ + ctx.image = (union loongarch_instruction *)image_ptr; +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + if (build_body(&ctx, extra_pass)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + build_epilogue(&ctx); + + /* 3. Extra pass to validate JITed code */ + if (validate_code(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + /* And we're done */ + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, 2, ctx.image); + + /* Update the icache */ + bpf_flush_icache(header, ctx.image + ctx.idx); + + if (!prog->is_func || extra_pass) { + if (extra_pass && ctx.idx != jit_data->ctx.idx) { + pr_err_once("multi-func JIT bug %d != %d\n", + ctx.idx, jit_data->ctx.idx); + bpf_jit_binary_free(header); + prog->bpf_func = NULL; + prog->jited = 0; + goto out_off; + } + bpf_jit_binary_lock_ro(header); + } else { + jit_data->ctx = ctx; + jit_data->image = image_ptr; + jit_data->header = header; + } + prog->bpf_func = (void *)ctx.image; + prog->jited = 1; + prog->jited_len = image_size; + + if (!prog->is_func || extra_pass) { +out_off: + kfree(ctx.offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + + out_offset = -1; + return prog; +} diff --git a/arch/loongarch/net/ebpf_jit.h b/arch/loongarch/net/ebpf_jit.h new file mode 100644 index 000000000000..8c39316d3743 --- /dev/null +++ b/arch/loongarch/net/ebpf_jit.h @@ -0,0 +1,842 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2021 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include + +struct jit_ctx { + const struct bpf_prog *prog; + unsigned int idx; + unsigned int flags; + unsigned int epilogue_offset; + u32 *offset; + union loongarch_instruction *image; + u32 stack_size; +}; + +struct jit_data { + struct bpf_binary_header *header; + u8 *image; + struct jit_ctx ctx; +}; + +#define emit_insn(ctx, func, ...) \ +do { \ + if (ctx->image != NULL) { \ + union loongarch_instruction *insn = &ctx->image[ctx->idx]; \ + emit_##func(insn, ##__VA_ARGS__); \ + } \ + ctx->idx++; \ +} while (0) + +static inline bool is_unsigned_imm(unsigned long val, unsigned int bit) +{ + return val < (1UL << bit); +} + +static inline bool is_signed_imm(long val, unsigned int bit) +{ + return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1)); +} + +#define is_signed_imm12(val) is_signed_imm(val, 12) +#define is_signed_imm16(val) is_signed_imm(val, 16) +#define is_signed_imm26(val) is_signed_imm(val, 26) +#define is_signed_imm32(val) is_signed_imm(val, 32) +#define is_signed_imm52(val) is_signed_imm(val, 52) +#define is_unsigned_imm12(val) is_unsigned_imm(val, 12) +#define is_unsigned_imm32(val) is_unsigned_imm(val, 32) + +static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx) +{ + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas la64 branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1)); +} + +static inline int epilogue_offset(const struct jit_ctx *ctx) +{ + int to = ctx->epilogue_offset; + int from = ctx->idx; + + return (to - from); +} + +static inline void emit_ldbu(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldbu_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldhu(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldhu_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldwu(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldwu_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldd(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = ldd_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_stb(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = stb_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_sth(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = sth_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_stw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = stw_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_std(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = std_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_ldxbu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxbu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ldxhu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxhu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ldxwu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxwu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ldxd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = ldxd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxb(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxb_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxh(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxh_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_stxd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = stxd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_amaddw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rk, enum loongarch_gpr rj) +{ + insn->reg3_format.opcode = amaddw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rk = rk; + insn->reg3_format.rj = rj; +} + +static inline void emit_amaddd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rk, enum loongarch_gpr rj) +{ + insn->reg3_format.opcode = amaddd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rk = rk; + insn->reg3_format.rj = rj; +} + +static inline void emit_addd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = addd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_addiw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = addiw_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_addid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = addid_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_subd(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = subd_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_muld(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = muld_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_divdu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = divdu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_moddu(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = moddu_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_and(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = and_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_andi(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui12_format.opcode = andi_op; + insn->reg2ui12_format.simmediate = imm; + insn->reg2ui12_format.rd = rd; + insn->reg2ui12_format.rj = rj; +} + +static inline void emit_or(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = or_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_ori(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui12_format.opcode = ori_op; + insn->reg2ui12_format.simmediate = imm; + insn->reg2ui12_format.rd = rd; + insn->reg2ui12_format.rj = rj; +} + +static inline void emit_xor(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = xor_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_xori(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui12_format.opcode = xori_op; + insn->reg2ui12_format.simmediate = imm; + insn->reg2ui12_format.rd = rd; + insn->reg2ui12_format.rj = rj; +} + +static inline void emit_lu12iw(union loongarch_instruction *insn, + enum loongarch_gpr rd, int imm) +{ + insn->reg1i20_format.opcode = lu12iw_op; + insn->reg1i20_format.simmediate = imm; + insn->reg1i20_format.rd = rd; +} + +static inline void emit_lu32id(union loongarch_instruction *insn, + enum loongarch_gpr rd, int imm) +{ + insn->reg1i20_format.opcode = lu32id_op; + insn->reg1i20_format.simmediate = imm; + insn->reg1i20_format.rd = rd; +} + +static inline void emit_lu52id(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + insn->reg2i12_format.opcode = lu52id_op; + insn->reg2i12_format.simmediate = imm; + insn->reg2i12_format.rd = rd; + insn->reg2i12_format.rj = rj; +} + +static inline void emit_sllw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = sllw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_slliw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui5_format.opcode = slliw_op; + insn->reg2ui5_format.simmediate = imm; + insn->reg2ui5_format.rd = rd; + insn->reg2ui5_format.rj = rj; +} + +static inline void emit_slld(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = slld_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_sllid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui6_format.opcode = sllid_op; + insn->reg2ui6_format.simmediate = imm; + insn->reg2ui6_format.rd = rd; + insn->reg2ui6_format.rj = rj; +} + +static inline void emit_srlw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = srlw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_srliw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui5_format.opcode = srliw_op; + insn->reg2ui5_format.simmediate = imm; + insn->reg2ui5_format.rd = rd; + insn->reg2ui5_format.rj = rj; +} + +static inline void emit_srld(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = srld_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_srlid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui6_format.opcode = srlid_op; + insn->reg2ui6_format.simmediate = imm; + insn->reg2ui6_format.rd = rd; + insn->reg2ui6_format.rj = rj; +} + +static inline void emit_sraw(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = sraw_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_sraiw(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui5_format.opcode = sraid_op; + insn->reg2ui5_format.simmediate = imm; + insn->reg2ui5_format.rd = rd; + insn->reg2ui5_format.rj = rj; +} + +static inline void emit_srad(union loongarch_instruction *insn, enum loongarch_gpr rd, + enum loongarch_gpr rj, enum loongarch_gpr rk) +{ + insn->reg3_format.opcode = srad_op; + insn->reg3_format.rd = rd; + insn->reg3_format.rj = rj; + insn->reg3_format.rk = rk; +} + +static inline void emit_sraid(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, u32 imm) +{ + insn->reg2ui6_format.opcode = sraid_op; + insn->reg2ui6_format.simmediate = imm; + insn->reg2ui6_format.rd = rd; + insn->reg2ui6_format.rj = rj; +} + +static inline void emit_beq(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = beq_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bne(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bne_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_blt(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = blt_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bge(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bge_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bltu(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bltu_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_bgeu(union loongarch_instruction *insn, + enum loongarch_gpr rj, enum loongarch_gpr rd, int offset) +{ + insn->reg2i16_format.opcode = bgeu_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rj = rj; + insn->reg2i16_format.rd = rd; +} + +static inline void emit_b(union loongarch_instruction *insn, int offset) +{ + unsigned int simmediate_l, simmediate_h; + + simmediate_l = offset & 0xffff; + offset >>= 16; + simmediate_h = offset & 0x3ff; + + insn->reg0i26_format.opcode = b_op; + insn->reg0i26_format.simmediate_l = simmediate_l; + insn->reg0i26_format.simmediate_h = simmediate_h; +} + +static inline void emit_jirl(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj, int offset) +{ + insn->reg2i16_format.opcode = jirl_op; + insn->reg2i16_format.simmediate = offset; + insn->reg2i16_format.rd = rd; + insn->reg2i16_format.rj = rj; +} + +static inline void emit_pcaddu18i(union loongarch_instruction *insn, + enum loongarch_gpr rd, int imm) +{ + insn->reg1i20_format.opcode = pcaddu18i_op; + insn->reg1i20_format.simmediate = imm; + insn->reg1i20_format.rd = rd; +} + +static inline void emit_revb2h(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + insn->reg2_format.opcode = revb2h_op; + insn->reg2_format.rd = rd; + insn->reg2_format.rj = rj; +} + +static inline void emit_revb2w(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + insn->reg2_format.opcode = revb2w_op; + insn->reg2_format.rd = rd; + insn->reg2_format.rj = rj; +} + +static inline void emit_revbd(union loongarch_instruction *insn, + enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + insn->reg2_format.opcode = revbd_op; + insn->reg2_format.rd = rd; + insn->reg2_format.rj = rj; +} + +/* Zero-extend 32 bits into 64 bits */ +static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32) +{ + if (!is32) + return; + + /* Clear the upper 32 bits */ + emit_insn(ctx, lu32id, reg, 0); +} + +/* Signed-extend 32 bits into 64 bits */ +static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg) +{ + emit_insn(ctx, addiw, reg, reg, 0); +} + +static inline void move_imm32(struct jit_ctx *ctx, enum loongarch_gpr rd, + int imm32, bool is32) +{ + int si20; + u32 ui12; + + /* or rd, $zero, $zero */ + if (imm32 == 0) { + emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO); + return; + } + + /* addiw rd, $zero, imm_11_0(signed) */ + if (is_signed_imm12(imm32)) { + emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm32); + goto zext; + } + + /* ori rd, $zero, imm_11_0(unsigned) */ + if (is_unsigned_imm12(imm32)) { + emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm32); + goto zext; + } + + /* lu12iw rd, imm_31_12(signed) */ + si20 = (imm32 >> 12) & 0xfffff; + emit_insn(ctx, lu12iw, rd, si20); + + /* ori rd, rd, imm_11_0(unsigned) */ + ui12 = imm32 & 0xfff; + if (ui12 != 0) + emit_insn(ctx, ori, rd, rd, ui12); + +zext: + emit_zext_32(ctx, rd, is32); +} + +static inline void move_imm64(struct jit_ctx *ctx, enum loongarch_gpr rd, + long imm64, bool is32) +{ + int imm32, si20, si12; + long imm52; + + si12 = (imm64 >> 52) & 0xfff; + imm52 = imm64 & 0xfffffffffffff; + /* lu52id rd, $zero, imm_63_52(signed) */ + if (si12 != 0 && imm52 == 0) { + emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, si12); + return; + } + + imm32 = imm64 & 0xffffffff; + move_imm32(ctx, rd, imm32, is32); + + if (!is_signed_imm32(imm64)) { + if (imm52 != 0) { + /* lu32id rd, imm_51_32(signed) */ + si20 = (imm64 >> 32) & 0xfffff; + emit_insn(ctx, lu32id, rd, si20); + } + + /* lu52id rd, rd, imm_63_52(signed) */ + if (!is_signed_imm52(imm64)) + emit_insn(ctx, lu52id, rd, rd, si12); + } +} + +static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd, + enum loongarch_gpr rj) +{ + emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO); +} + +static inline int invert_jump_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JNE: + case BPF_JSET: + return BPF_JEQ; + case BPF_JGT: + return BPF_JLE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLT: + return BPF_JGE; + case BPF_JLE: + return BPF_JGT; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static inline void cond_jump_offs16(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + switch (cond) { + case BPF_JEQ: + /* PC += jmp_offset if rj == rd */ + emit_insn(ctx, beq, rj, rd, jmp_offset); + return; + case BPF_JNE: + case BPF_JSET: + /* PC += jmp_offset if rj != rd */ + emit_insn(ctx, bne, rj, rd, jmp_offset); + return; + case BPF_JGT: + /* PC += jmp_offset if rj > rd (unsigned) */ + emit_insn(ctx, bltu, rd, rj, jmp_offset); + return; + case BPF_JLT: + /* PC += jmp_offset if rj < rd (unsigned) */ + emit_insn(ctx, bltu, rj, rd, jmp_offset); + return; + case BPF_JGE: + /* PC += jmp_offset if rj >= rd (unsigned) */ + emit_insn(ctx, bgeu, rj, rd, jmp_offset); + return; + case BPF_JLE: + /* PC += jmp_offset if rj <= rd (unsigned) */ + emit_insn(ctx, bgeu, rd, rj, jmp_offset); + return; + case BPF_JSGT: + /* PC += jmp_offset if rj > rd (signed) */ + emit_insn(ctx, blt, rd, rj, jmp_offset); + return; + case BPF_JSLT: + /* PC += jmp_offset if rj < rd (signed) */ + emit_insn(ctx, blt, rj, rd, jmp_offset); + return; + case BPF_JSGE: + /* PC += jmp_offset if rj >= rd (signed) */ + emit_insn(ctx, bge, rj, rd, jmp_offset); + return; + case BPF_JSLE: + /* PC += jmp_offset if rj <= rd (signed) */ + emit_insn(ctx, bge, rd, rj, jmp_offset); + return; + } +} + +static inline void cond_jump_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + cond = invert_jump_cond(cond); + cond_jump_offs16(ctx, cond, rj, rd, 2); + emit_insn(ctx, b, jmp_offset); +} + +static inline void cond_jump_offs32(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + s64 upper, lower; + + upper = (jmp_offset + (1 << 15)) >> 16; + lower = jmp_offset & 0xffff; + + cond = invert_jump_cond(cond); + cond_jump_offs16(ctx, cond, rj, rd, 3); + + /* + * jmp_addr = jmp_offset << 2 + * tmp2 = PC + jmp_addr[31, 18] + 18'b0 + */ + emit_insn(ctx, pcaddu18i, LOONGARCH_GPR_T2, upper << 2); + + /* jump to (tmp2 + jmp_addr[17, 2] + 2'b0) */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T2, lower + 1); +} + +static inline void uncond_jump_offs26(struct jit_ctx *ctx, int jmp_offset) +{ + emit_insn(ctx, b, jmp_offset); +} + +static inline void uncond_jump_offs32(struct jit_ctx *ctx, int jmp_offset, bool is_exit) +{ + s64 upper, lower; + + upper = (jmp_offset + (1 << 15)) >> 16; + lower = jmp_offset & 0xffff; + + if (is_exit) + lower -= 1; + + /* + * jmp_addr = jmp_offset << 2; + * tmp1 = PC + jmp_addr[31, 18] + 18'b0 + */ + emit_insn(ctx, pcaddu18i, LOONGARCH_GPR_T1, upper << 2); + + /* jump to (tmp1 + jmp_addr[17, 2] + 2'b0) */ + emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T1, lower + 1); +} + +static inline void emit_cond_jump(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + if (is_signed_imm16(jmp_offset)) + cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); + else if (is_signed_imm26(jmp_offset)) + cond_jump_offs26(ctx, cond, rj, rd, jmp_offset); + else + cond_jump_offs32(ctx, cond, rj, rd, jmp_offset); +} + +static inline void emit_uncond_jump(struct jit_ctx *ctx, int jmp_offset, bool is_exit) +{ + if (is_signed_imm26(jmp_offset)) + uncond_jump_offs26(ctx, jmp_offset); + else + uncond_jump_offs32(ctx, jmp_offset, is_exit); +} + +static inline void emit_tailcall_jump(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj, + enum loongarch_gpr rd, int jmp_offset) +{ + if (is_signed_imm16(jmp_offset)) + cond_jump_offs16(ctx, cond, rj, rd, jmp_offset); + else if (is_signed_imm26(jmp_offset)) + cond_jump_offs26(ctx, cond, rj, rd, jmp_offset - 1); + else + cond_jump_offs32(ctx, cond, rj, rd, jmp_offset - 2); +} diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..00b4ba1e5cdf --- /dev/null +++ b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_LOONGARCH_BITSPERLONG_H +#define __ASM_LOONGARCH_BITSPERLONG_H + +#define __BITS_PER_LONG (__SIZEOF_LONG__ * 8) + +#include + +#endif /* __ASM_LOONGARCH_BITSPERLONG_H */ diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h index edba4d93e9e6..da5206517158 100644 --- a/tools/include/uapi/asm/bitsperlong.h +++ b/tools/include/uapi/asm/bitsperlong.h @@ -17,6 +17,8 @@ #include "../../../arch/riscv/include/uapi/asm/bitsperlong.h" #elif defined(__alpha__) #include "../../../arch/alpha/include/uapi/asm/bitsperlong.h" +#elif defined(__loongarch__) +#include "../../../arch/loongarch/include/uapi/asm/bitsperlong.h" #else #include #endif diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 413a7b9f3c4d..f725d83d3ede 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -125,6 +125,8 @@ struct seccomp_data { # define __NR_seccomp 277 # elif defined(__csky__) # define __NR_seccomp 277 +# elif defined(__loongarch__) +# define __NR_seccomp 277 # elif defined(__hppa__) # define __NR_seccomp 338 # elif defined(__powerpc__) @@ -1730,6 +1732,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) NT_ARM_SYSTEM_CALL, &__v)); \ } while (0) # define SYSCALL_RET(_regs) (_regs).regs[0] +#elif defined(__loongarch__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[11] +# define SYSCALL_RET(_regs) (_regs).regs[4] #elif defined(__riscv) && __riscv_xlen == 64 # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM(_regs) (_regs).a7 -- Gitee From 485479b43938a114a6627a9d1611b363e8a14c16 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 3 Jun 2021 14:52:25 +0800 Subject: [PATCH 204/241] LoongArch: Add STACKPROTECTOR and STACKVALIDATOR support Change-Id: I097ddeaf81073d1d3b2ef6d508fea743798408c3 Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 2 ++ arch/loongarch/boot/compressed/decompress.c | 7 ++++ arch/loongarch/include/asm/stackprotector.h | 39 +++++++++++++++++++++ arch/loongarch/kernel/asm-offsets.c | 3 ++ arch/loongarch/kernel/process.c | 6 ++++ arch/loongarch/kernel/switch.S | 5 +++ include/linux/compiler.h | 15 ++++++++ 7 files changed, 77 insertions(+) create mode 100644 arch/loongarch/include/asm/stackprotector.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 66dd8d0b15af..1075c24a5362 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -105,6 +105,8 @@ config LOONGARCH select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ + select HAVE_STACKPROTECTOR + select HAVE_STACK_VALIDATION select HAVE_SYSCALL_TRACEPOINTS select HAVE_TIF_NOHZ select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP diff --git a/arch/loongarch/boot/compressed/decompress.c b/arch/loongarch/boot/compressed/decompress.c index 41d51d5acbec..b5fadf88628e 100644 --- a/arch/loongarch/boot/compressed/decompress.c +++ b/arch/loongarch/boot/compressed/decompress.c @@ -71,6 +71,13 @@ void error(char *x) #include "../../../../lib/decompress_unzstd.c" #endif +const unsigned long __stack_chk_guard = 0x000a0dff; + +void __stack_chk_fail(void) +{ + error("stack-protector: Kernel stack is corrupted\n"); +} + void decompress_kernel(unsigned long boot_heap_start, long kdump_reloc_offset) { unsigned long zimage_start, zimage_size; diff --git a/arch/loongarch/include/asm/stackprotector.h b/arch/loongarch/include/asm/stackprotector.h new file mode 100644 index 000000000000..c51813a181fb --- /dev/null +++ b/arch/loongarch/include/asm/stackprotector.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on LoongArch. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 9bb72bb19ce3..197316bb9180 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -68,6 +68,9 @@ void output_task_defines(void) OFFSET(TASK_FLAGS, task_struct, flags); OFFSET(TASK_MM, task_struct, mm); OFFSET(TASK_PID, task_struct, pid); +#if defined(CONFIG_STACKPROTECTOR) + OFFSET(TASK_STACK_CANARY, task_struct, stack_canary); +#endif DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); BLANK(); } diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2984d18a28a2..2760b21ddd99 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -177,6 +177,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } +#ifdef CONFIG_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + bool in_task_stack(unsigned long stack, struct task_struct *task, struct stack_info *info) { diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S index 75d63830f3c3..8741f6e62899 100644 --- a/arch/loongarch/kernel/switch.S +++ b/arch/loongarch/kernel/switch.S @@ -29,6 +29,11 @@ SYM_FUNC_START(__switch_to) stptr.d ra, a0, THREAD_REG01 stptr.d a3, a0, THREAD_SCHED_RA stptr.d a4, a0, THREAD_SCHED_CFA +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) + la t7, __stack_chk_guard + LONG_L t8, a1, TASK_STACK_CANARY + LONG_S t8, t7, 0 +#endif move tp, a2 cpu_restore_nonscratch a1 diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 475d0a3ce059..5bfca2f2112c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -113,6 +113,20 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * The __COUNTER__ based labels are a hack to make each instance of the macros * unique, to convince GCC not to merge duplicate inline asm statements. */ +#ifdef __loongarch__ +#define annotate_reachable() ({ \ + asm volatile("%0:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long %0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#define annotate_unreachable() ({ \ + asm volatile("%0:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long %0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ +}) +#else #define annotate_reachable() ({ \ asm volatile("%c0:\n\t" \ ".pushsection .discard.reachable\n\t" \ @@ -125,6 +139,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, ".long %c0b - .\n\t" \ ".popsection\n\t" : : "i" (__COUNTER__)); \ }) +#endif #define ASM_UNREACHABLE \ "999:\n\t" \ ".pushsection .discard.unreachable\n\t" \ -- Gitee From e50608a2199c357d17f231a0ed70a0f1e04c29e3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 13 Jun 2021 18:13:09 +0800 Subject: [PATCH 205/241] LoongArch: Add orc stack unwinder support Change-Id: I965012f32e89789451ac15a44c2c45244150e00d Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 5 +- arch/loongarch/boot/Makefile | 2 + arch/loongarch/boot/compressed/Makefile | 2 + arch/loongarch/include/asm/module.h | 6 + arch/loongarch/include/asm/orc_lookup.h | 35 ++ arch/loongarch/include/asm/orc_types.h | 57 +++ arch/loongarch/include/asm/stackframe.h | 3 + arch/loongarch/include/asm/unwind.h | 14 +- arch/loongarch/include/asm/unwind_hints.h | 47 ++ arch/loongarch/kernel/Makefile | 4 + arch/loongarch/kernel/entry.S | 5 + arch/loongarch/kernel/genex.S | 4 + arch/loongarch/kernel/module.c | 26 +- arch/loongarch/kernel/process.c | 9 +- arch/loongarch/kernel/setup.c | 2 + arch/loongarch/kernel/stacktrace.c | 70 +++ arch/loongarch/kernel/traps.c | 41 +- arch/loongarch/kernel/unwind_orc.c | 513 ++++++++++++++++++++++ arch/loongarch/kernel/vmlinux.lds.S | 3 + arch/loongarch/mm/tlb.c | 28 +- arch/loongarch/mm/tlbex.S | 2 + arch/loongarch/power/Makefile | 2 + arch/loongarch/vdso/Makefile | 1 + 23 files changed, 842 insertions(+), 39 deletions(-) create mode 100644 arch/loongarch/include/asm/orc_lookup.h create mode 100644 arch/loongarch/include/asm/orc_types.h create mode 100644 arch/loongarch/include/asm/unwind_hints.h create mode 100644 arch/loongarch/kernel/unwind_orc.c diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 0dab3174a95a..3a3f2b5712da 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -67,8 +67,6 @@ KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) -KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) -KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel @@ -77,6 +75,9 @@ KBUILD_AFLAGS_MODULE += -Wa,-mla-global-with-abs KBUILD_CFLAGS_MODULE += -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs endif +KBUILD_AFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_CFLAGS += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) + ifeq ($(CONFIG_RELOCATABLE),y) LDFLAGS_vmlinux += --emit-relocs endif diff --git a/arch/loongarch/boot/Makefile b/arch/loongarch/boot/Makefile index b09257a75756..91ef912e2076 100644 --- a/arch/loongarch/boot/Makefile +++ b/arch/loongarch/boot/Makefile @@ -7,6 +7,8 @@ # Drop some uninteresting sections in the kernel. # This is only relevant for ELF kernels but doesn't hurt a.out # +OBJECT_FILES_NON_STANDARD := y + drop-sections := .comment .note .options strip-flags := $(addprefix --remove-section=,$(drop-sections)) diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index 7e78077b6723..046b5b439f79 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -3,6 +3,8 @@ # Author: Huacai Chen # Copyright (C) 2020 Loongson Technology Corporation Limited +OBJECT_FILES_NON_STANDARD := y + include $(srctree)/arch/loongarch/Kbuild.platforms # set the default size of the mallocing area for decompressing diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index fccef8461f80..d368bdfe915e 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -7,6 +7,7 @@ #include #include +#include #define RELA_STACK_DEPTH 16 @@ -20,6 +21,11 @@ struct mod_arch_specific { struct mod_section got; struct mod_section plt; struct mod_section plt_idx; +#ifdef CONFIG_UNWINDER_ORC + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; +#endif }; struct got_entry { diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h new file mode 100644 index 000000000000..f41f1e2f9b67 --- /dev/null +++ b/arch/loongarch/include/asm/orc_lookup.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ORC_LOOKUP_H +#define _ASM_ORC_LOOKUP_H + +/* + * This is a lookup table for speeding up access to the .orc_unwind table. + * Given an input address offset, the corresponding lookup table entry + * specifies a subset of the .orc_unwind table to search. + * + * Each block represents the end of the previous range and the start of the + * next range. An extra block is added to give the last range an end. + * + * The block size should be a power of 2 to avoid a costly 'div' instruction. + * + * A block size of 256 was chosen because it roughly doubles unwinder + * performance while only adding ~5% to the ORC data footprint. + */ +#define LOOKUP_BLOCK_ORDER 8 +#define LOOKUP_BLOCK_SIZE (1 << LOOKUP_BLOCK_ORDER) + +#ifndef LINKER_SCRIPT + +extern unsigned int orc_lookup[]; +extern unsigned int orc_lookup_end[]; + +#define LOOKUP_START_IP ((unsigned long)_stext) +#define LOOKUP_STOP_IP ((unsigned long)_etext) + +#endif /* LINKER_SCRIPT */ + +#endif /* _ASM_ORC_LOOKUP_H */ diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h new file mode 100644 index 000000000000..40b2800b0d6f --- /dev/null +++ b/arch/loongarch/include/asm/orc_types.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_ORC_TYPES_H +#define _ASM_ORC_TYPES_H + +#include +#include + +/* + * The ORC_REG_* registers are base registers which are used to find other + * registers on the stack. + * + * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the + * address of the previous frame: the caller's SP before it called the current + * function. + * + * ORC_REG_UNDEFINED means the corresponding register's value didn't change in + * the current frame. + * + * The most commonly used base registers are SP and FP -- which the previous SP + * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is + * usually based on. + */ +#define ORC_REG_UNDEFINED 0 +#define ORC_REG_PREV_SP 1 +#define ORC_REG_SP 2 +#define ORC_REG_FP 3 +#define ORC_REG_MAX 15 + +#ifndef __ASSEMBLY__ +/* + * This struct is more or less a vastly simplified version of the DWARF Call + * Frame Information standard. It contains only the necessary parts of DWARF + * CFI, simplified for ease of access by the in-kernel unwinder. It tells the + * unwinder how to find the previous SP and FP (and sometimes entry regs) on + * the stack for a given code address. Each instance of the struct corresponds + * to one or more code locations. + */ +struct orc_entry { + signed short sp_offset; + signed short fp_offset; + signed short ra_offset; + unsigned int sp_reg:4; + unsigned int fp_reg:4; + unsigned int ra_reg:4; + unsigned int type:2; + unsigned int end:1; + unsigned int unused:1; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_ORC_TYPES_H */ diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 1ed681be9c22..58a4644a4ad9 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -13,6 +13,7 @@ #include #include #include +#include /* Make the addition of cfi info a little easier. */ .macro cfi_rel_offset reg offset=0 docfi=0 @@ -158,6 +159,7 @@ cfi_st u0, PT_R21, \docfi csrrd u0, PERCPU_BASE_KS 9: + UNWIND_HINT_REGS .endm .macro SAVE_ALL docfi=0 @@ -215,6 +217,7 @@ .macro RESTORE_SP_AND_RET docfi=0 cfi_ld sp, PT_R3, \docfi + UNWIND_HINT sp_reg=ORC_REG_SP type=ORC_TYPE_CALL ertn .endm diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index 71b36ccf786d..14649fdd8883 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -7,6 +7,7 @@ #ifndef _ASM_UNWIND_H #define _ASM_UNWIND_H +#include #include #include @@ -15,10 +16,13 @@ struct unwind_state { struct stack_info stack_info; struct task_struct *task; -#ifdef CONFIG_UNWINDER_PROLOGUE + int graph_idx; +#if defined(CONFIG_UNWINDER_PROLOGUE) unsigned long sp, pc, ra; bool enable; bool first; +#elif defined(CONFIG_UNWINDER_ORC) + unsigned long sp, pc, fp, ra; #else /* CONFIG_UNWINDER_GUESS */ unsigned long sp, pc; bool first; @@ -40,4 +44,12 @@ static inline bool unwind_error(struct unwind_state *state) { return state->error; } + +#ifdef CONFIG_UNWINDER_ORC +void unwind_init(void); +void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); +#else +static inline void unwind_init(void) {} +static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} +#endif /* CONFIG_UNWINDER_ORC */ #endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h new file mode 100644 index 000000000000..51fbddff0458 --- /dev/null +++ b/arch/loongarch/include/asm/unwind_hints.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Most of this ideas comes from x86. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#ifndef _ASM_UNWIND_HINTS_H +#define _ASM_UNWIND_HINTS_H + +#include +#include "orc_types.h" + +#ifdef __ASSEMBLY__ + +.macro UNWIND_HINT_EMPTY + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 +.endm + +.macro UNWIND_HINT_REGS base=ORC_REG_SP offset=0 + UNWIND_HINT sp_reg=\base sp_offset=\offset type=UNWIND_HINT_TYPE_REGS +.endm + +.macro UNWIND_HINT_FUNC offset=0 + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\offset type=UNWIND_HINT_TYPE_FUNC +.endm + +.macro NOT_SIBLING_CALL_HINT +876: .pushsection .discard.not_sibling_call + .long 876b - . + .popsection +.endm + +#else /* !__ASSEMBLY__ */ + +#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) + +#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) + +#define NOT_SIBLING_CALL_HINT \ + "876:\n\t" \ + ".pushsection .discard.not_sibling_call\n\t" \ + ".long 876b - .\n\t" \ + ".popsection\n\t" + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_UNWIND_HINTS_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 0f802db755c4..28cadeb891c9 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -3,6 +3,9 @@ # Makefile for the Linux/LoongArch kernel. # +OBJECT_FILES_NON_STANDARD_head.o :=y +OBJECT_FILES_NON_STANDARD_relocate_kernel.o :=y + extra-y := head.o vmlinux.lds obj-y += cpu-probe.o elf.o entry.o genex.o idle.o irq.o \ @@ -36,6 +39,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HARDWARE_WATCHPOINTS) += watch.o diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 6cda0d20a0f1..0f9d3314632e 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -13,6 +13,7 @@ #include #include #include +#include .text .cfi_sections .debug_frame @@ -57,6 +58,8 @@ SYM_CODE_START(handle_syscall) SAVE_STATIC + UNWIND_HINT_REGS + move u0, t0 li.d tp, ~_THREAD_MASK and tp, tp, sp @@ -68,6 +71,7 @@ SYM_CODE_START(handle_syscall) SYM_CODE_END(handle_syscall) SYM_CODE_START(ret_from_fork) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, sp bl syscall_exit_to_user_mode @@ -77,6 +81,7 @@ SYM_CODE_START(ret_from_fork) SYM_CODE_END(ret_from_fork) SYM_CODE_START(ret_from_kernel_thread) + UNWIND_HINT_REGS bl schedule_tail # a0 = struct task_struct *prev move a0, s1 jirl ra, s0, 0 diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index d732b5c8f793..828f3a369f9e 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -9,6 +9,7 @@ #include #include #include +#include .align 5 SYM_FUNC_START(__arch_cpu_idle) @@ -28,6 +29,7 @@ SYM_FUNC_END(__arch_cpu_idle) SYM_CODE_START(handle_vint) BACKUP_T0T1 SAVE_ALL + UNWIND_HINT_REGS la.abs t1, __arch_cpu_idle LONG_L t0, sp, PT_ERA /* 32 byte rollback region */ @@ -39,6 +41,7 @@ SYM_CODE_START(handle_vint) move a1, sp la.abs t0, do_vint jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_CODE_END(handle_vint) @@ -67,6 +70,7 @@ SYM_CODE_END(except_vec_cex) move a0, sp la.abs t0, do_\handler jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_CODE_END(handle_\exception) .endm diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 88dc90001a2e..0bc6a6a5d7d6 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -19,6 +19,8 @@ #include #include +#include + static inline bool signed_imm_check(long val, unsigned int bit) { return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1)); @@ -506,13 +508,25 @@ void *module_alloc(unsigned long size) int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - const Elf_Shdr *s, *se; - const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (!strcmp(".altinstructions", secstrs + s->sh_name)) - apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size); + char *secstrings; + const Elf_Shdr *s, *orc = NULL, *orc_ip = NULL, *alt = NULL; + + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".orc_unwind", secstrings + s->sh_name)) + orc = s; + if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) + orc_ip = s; } + if (alt) + apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size); + + if (orc && orc_ip) + unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size); + return 0; } diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2760b21ddd99..0cf89ce42749 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -266,7 +266,9 @@ unsigned long get_wchan(struct task_struct *task) * Just calculate a simpler wchan value here. * Do nothing here. */ -#else /* CONFIG_UNWINDER_PROLOGUE */ + return 0; +#endif + stack_page_end = stack_page + THREAD_SIZE; frame = thread_saved_fp(task); @@ -280,9 +282,11 @@ unsigned long get_wchan(struct task_struct *task) state.stack_info.begin = stack_page; state.stack_info.end = stack_page_end; state.stack_info.next_sp = 0; + state.pc = pc; +#ifdef CONFIG_UNWINDER_PROLOGUE state.enable = true; state.first = true; - state.pc = pc; +#endif while (in_sched_functions(pc)) { if (!unwind_next_frame(&state)) { @@ -293,7 +297,6 @@ unsigned long get_wchan(struct task_struct *task) } out: -#endif /* CONFIG_UNWINDER_GUESS */ put_task_stack(task); return pc; diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 48207a9a2a1d..a2172b4f15dc 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include DEFINE_PER_CPU(unsigned long, kernelsp); unsigned long fw_arg0, fw_arg1, fw_arg2; @@ -368,6 +369,7 @@ void __init setup_arch(char **cmdline_p) { cpu_probe(); *cmdline_p = boot_command_line; + unwind_init(); early_init(); pagetable_init(); diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 55ae37018fff..973c39256382 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -106,6 +106,76 @@ void save_stack_trace_tsk(struct task_struct *tsk, } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE + +static int __always_inline +__save_stack_trace_reliable(struct stack_trace *trace, + struct task_struct *tsk) +{ + struct unwind_state state; + struct pt_regs dummyregs; + struct pt_regs *regs = &dummyregs; + unsigned long addr; + + if (tsk == current) { + regs->csr_era = (unsigned long)__builtin_return_address(0); + regs->regs[3] = (unsigned long)__builtin_frame_address(0); + } else { + regs->csr_era = thread_saved_ra(tsk); + regs->regs[3] = thread_saved_fp(tsk); + } + + for (unwind_start(&state, tsk, regs); + !unwind_done(&state) && !unwind_error(&state); + unwind_next_frame(&state)) { + + addr = unwind_get_return_address(&state); + + /* + * A NULL or invalid return address probably means there's some + * generated code which __kernel_text_address() doesn't know + * about. + */ + if (!addr) + return -EINVAL; + + if (!consume_entry(trace, addr)) + return -EINVAL; + } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + + return 0; +} + +/* + * This function returns an error if it detects any unreliable features of the + * stack. Otherwise it guarantees that the stack trace is reliable. + * + * If the task is not 'current', the caller *must* ensure the task is inactive. + */ +int save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + int ret; + + /* + * If the task doesn't have a stack (e.g., a zombie), the stack is + * "reliably" empty. + */ + if (!try_get_task_stack(tsk)) + return 0; + + ret = __save_stack_trace_reliable(trace, tsk); + + put_task_stack(tsk); + + return ret; +} +#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ + static int copy_stack_frame(unsigned long fp, struct stack_frame *frame) { diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 53b3e51476c9..268f83c00b92 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -52,6 +52,32 @@ #include "access-helper.h" +void *exception_table[EXCCODE_INT_START] = { + [0 ... EXCCODE_INT_START - 1] = handle_reserved, + + [EXCCODE_TLBI] = handle_tlb_load, + [EXCCODE_TLBL] = handle_tlb_load, + [EXCCODE_TLBS] = handle_tlb_store, + [EXCCODE_TLBM] = handle_tlb_modify, + [EXCCODE_TLBNR] = handle_tlb_protect, + [EXCCODE_TLBNX] = handle_tlb_protect, + [EXCCODE_TLBPE] = handle_tlb_protect, + [EXCCODE_ADE] = handle_ade, + [EXCCODE_ALE] = handle_ale, + [EXCCODE_BCE] = handle_bce, + [EXCCODE_SYS] = handle_sys, + [EXCCODE_BP] = handle_bp, + [EXCCODE_INE] = handle_ri, + [EXCCODE_IPE] = handle_ri, + [EXCCODE_FPDIS] = handle_fpu, + [EXCCODE_LSXDIS] = handle_lsx, + [EXCCODE_LASXDIS] = handle_lasx, + [EXCCODE_FPE] = handle_fpe, + [EXCCODE_BTDIS] = handle_lbt, + [EXCCODE_WATCH] = handle_watch, +}; +EXPORT_SYMBOL_GPL(exception_table); + static void show_backtrace(struct task_struct *task, const struct pt_regs *regs, const char *loglvl, bool user) { @@ -1091,19 +1117,8 @@ void __init trap_init(void) for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++) set_handler(i * VECSIZE, handle_vint, VECSIZE); - set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE); - set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE); - set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE); - set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE); - set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE); - set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE); - set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE); - set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE); - set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE); - set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE); - set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE); - set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE); + for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); cache_error_setup(); diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c new file mode 100644 index 000000000000..ebed520ed935 --- /dev/null +++ b/arch/loongarch/kernel/unwind_orc.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/x86/kernel/unwind_orc.c + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define orc_warn(fmt, ...) \ + printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__) + +#define orc_warn_current(args...) \ +({ \ + if (state->task == current) \ + orc_warn(args); \ +}) + +extern int __start_orc_unwind_ip[]; +extern int __stop_orc_unwind_ip[]; +extern struct orc_entry __start_orc_unwind[]; +extern struct orc_entry __stop_orc_unwind[]; + +static bool orc_init __ro_after_init; +static unsigned int lookup_num_blocks __ro_after_init; + +static int *cur_orc_ip_table = __start_orc_unwind_ip; +static struct orc_entry *cur_orc_table = __start_orc_unwind; + +static inline unsigned long orc_ip(const int *ip) +{ + return (unsigned long)ip + *ip; +} + +static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table, + unsigned int num_entries, unsigned long ip) +{ + int *first = ip_table; + int *last = ip_table + num_entries - 1; + int *mid = first, *found = first; + + if (!num_entries) + return NULL; + + /* + * Do a binary range search to find the rightmost duplicate of a given + * starting address. Some entries are section terminators which are + * "weak" entries for ensuring there are no gaps. They should be + * ignored when they conflict with a real entry. + */ + while (first <= last) { + mid = first + ((last - first) / 2); + + if (orc_ip(mid) <= ip) { + found = mid; + first = mid + 1; + } else + last = mid - 1; + } + + return u_table + (found - ip_table); +} + +#ifdef CONFIG_MODULES +static struct orc_entry *orc_module_find(unsigned long ip) +{ + struct module *mod; + + mod = __module_address(ip); + if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip) + return NULL; + return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, + mod->arch.num_orcs, ip); +} +#else +static struct orc_entry *orc_module_find(unsigned long ip) +{ + return NULL; +} +#endif + +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = UNWIND_HINT_TYPE_CALL, + .sp_reg = ORC_REG_FP, + .sp_offset = 16, + .fp_reg = ORC_REG_PREV_SP, + .fp_offset = -16, + .ra_reg = ORC_REG_PREV_SP, + .ra_offset = -8, + .end = 0, +}; + +static struct orc_entry *orc_find(unsigned long ip) +{ + struct orc_entry *orc = NULL; + + /* Although exception occurs, CSR_ERA is not 0. */ + if (ip == 0) + return NULL; + + /* For non-init vmlinux addresses, use the fast lookup table: */ + if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) { + unsigned int idx, start, stop; + + idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE; + + if (unlikely((idx >= lookup_num_blocks - 1))) { + orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n", + idx, lookup_num_blocks, (void *)ip); + return NULL; + } + + start = orc_lookup[idx]; + stop = orc_lookup[idx + 1] + 1; + + if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) || + (__start_orc_unwind + stop > __stop_orc_unwind))) { + orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n", + idx, lookup_num_blocks, start, stop, (void *)ip); + return NULL; + } + + return __orc_find(__start_orc_unwind_ip + start, + __start_orc_unwind + start, stop - start, ip); + } + + /* vmlinux .init slow lookup: */ + if (init_kernel_text(ip)) + return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); + + /* Module lookup: */ + orc = orc_module_find(ip); + if (orc) + return orc; + + return NULL; +} + +#ifdef CONFIG_MODULES +static DEFINE_MUTEX(sort_mutex); + +static void orc_sort_swap(void *_a, void *_b, int size) +{ + int delta = _b - _a; + int *a = _a, *b = _b, tmp; + struct orc_entry *orc_a, *orc_b; + struct orc_entry orc_tmp; + + /* Swap the .orc_unwind_ip entries: */ + tmp = *a; + *a = *b + delta; + *b = tmp - delta; + + /* Swap the corresponding .orc_unwind entries: */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + orc_b = cur_orc_table + (b - cur_orc_ip_table); + orc_tmp = *orc_a; + *orc_a = *orc_b; + *orc_b = orc_tmp; +} + +static int orc_sort_cmp(const void *_a, const void *_b) +{ + const int *a = _a, *b = _b; + unsigned long a_val = orc_ip(a); + unsigned long b_val = orc_ip(b); + struct orc_entry *orc_a; + + if (a_val > b_val) + return 1; + if (a_val < b_val) + return -1; + + /* + * The "weak" section terminator entries need to always be on the left + * to ensure the lookup code skips them in favor of real entries. + * These terminator entries exist to handle any gaps created by + * whitelisted .o files which didn't get objtool generation. + */ + orc_a = cur_orc_table + (a - cur_orc_ip_table); + return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1; +} + +void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size, + void *_orc, size_t orc_size) +{ + int *orc_ip = _orc_ip; + struct orc_entry *orc = _orc; + unsigned int num_entries = orc_ip_size / sizeof(int); + + WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(*orc) != 0 || + num_entries != orc_size / sizeof(*orc)); + + /* + * The 'cur_orc_*' globals allow the orc_sort_swap() callback to + * associate an .orc_unwind_ip table entry with its corresponding + * .orc_unwind entry so they can both be swapped. + */ + mutex_lock(&sort_mutex); + cur_orc_ip_table = orc_ip; + cur_orc_table = orc; + sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap); + mutex_unlock(&sort_mutex); + + mod->arch.orc_unwind_ip = orc_ip; + mod->arch.orc_unwind = orc; + mod->arch.num_orcs = num_entries; +} +#endif + +void __init unwind_init(void) +{ + int i; + size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind; + size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip; + size_t num_entries = orc_ip_size / sizeof(int); + struct orc_entry *orc; + + if (!num_entries || orc_ip_size % sizeof(int) != 0 || + orc_size % sizeof(struct orc_entry) != 0 || + num_entries != orc_size / sizeof(struct orc_entry)) { + orc_warn("WARNING: Bad or missing .orc_unwind table. Disabling unwinder.\n"); + return; + } + + /* + * Note, the orc_unwind and orc_unwind_ip tables were already + * sorted at build time via the 'sorttable' tool. + * It's ready for binary search straight away, no need to sort it. + */ + + /* Initialize the fast lookup table: */ + lookup_num_blocks = orc_lookup_end - orc_lookup; + for (i = 0; i < lookup_num_blocks - 1; i++) { + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, + num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i)); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + + orc_lookup[i] = orc - __start_orc_unwind; + } + + /* Initialize the ending block: */ + orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP); + if (!orc) { + orc_warn("WARNING: Corrupt .orc_unwind table. Disabling unwinder.\n"); + return; + } + orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind; + + orc_init = true; +} + +static inline bool task_on_another_cpu(struct task_struct *task) +{ +#ifdef CONFIG_SMP + return task != current && task->on_cpu; +#else + return false; +#endif +} + +void unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs) +{ + memset(state, 0, sizeof(*state)); + state->task = task; + + if (!orc_init) + goto err; + + if (task_on_another_cpu(task)) + goto err; + + state->pc = regs->csr_era; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + state->fp = regs->regs[22]; + + if (get_stack_info(state->sp, state->task, &state->stack_info)) + goto err; + + return; + +err: + state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; +} + +#define INSN_LINK_OFFSET 4 + +extern void *exception_table[]; +extern asmlinkage void handle_vint(void); +extern asmlinkage void handle_reserved(void); + +static inline unsigned long bt_address(unsigned long ra) +{ + extern unsigned long eentry; + + if (__kernel_text_address(ra)) + return ra; + + if (__module_text_address(ra)) + return ra; + + if (ra >= eentry && ra <= eentry + EXCCODE_INT_END * VECSIZE) { + unsigned long type = (ra - eentry) / VECSIZE; + unsigned long offset = (ra - eentry) % VECSIZE; + unsigned long func; + switch (type) { + case 0 ... EXCCODE_INT_START-1: + func = (unsigned long)exception_table[type]; + break; + case EXCCODE_INT_START ... EXCCODE_INT_END: + func = (unsigned long)handle_vint; + break; + default: + func = (unsigned long)handle_reserved; + return 0; + } + + return func + offset; + } + + return ra; +} + +static inline bool on_stack(struct stack_info *info, unsigned long addr, + size_t len) +{ + unsigned long begin = info->begin; + unsigned long end = info->end; + + return (info->type != STACK_TYPE_UNKNOWN && + addr >= begin && addr < end && addr + len > begin && addr + len <= end); +} + +static bool stack_access_ok(struct unwind_state *state, unsigned long addr, + size_t len) +{ + struct stack_info *info = &state->stack_info; + + if (!on_stack(info, addr, len) && + (get_stack_info(addr, state->task, info))) + return false; + + return true; +} + +static bool is_entry_func(unsigned long addr) +{ + extern u32 kernel_entry; + extern u32 kernel_entry_end; + + return addr >= (unsigned long)&kernel_entry && + addr < (unsigned long)&kernel_entry_end; +} + +#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) + +bool unwind_next_frame(struct unwind_state *state) +{ + struct stack_info *info = &state->stack_info; + struct orc_entry *orc; + struct pt_regs *regs; + unsigned long *p, pc; + + if (unwind_done(state)) + return false; + + /* Don't let modules unload while we're reading their ORC data. */ + preempt_disable(); + + if (is_entry_func(state->pc)) + goto end; + + orc = orc_find(state->pc); + if (!orc) { + orc = &orc_fp_entry; + state->error = true; + } + + switch (orc->sp_reg) { + case ORC_REG_SP: + state->sp = state->sp + orc->sp_offset; + break; + case ORC_REG_FP: + state->sp = state->fp; + break; + default: + orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc); + goto err; + } + + switch (orc->fp_reg) { + case ORC_REG_PREV_SP: + p = (unsigned long *)(state->sp + orc->fp_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + state->fp = *p; + break; + case ORC_REG_UNDEFINED: + /* Nothing. */ + break; + default: + orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc); + goto err; + } + + switch (orc->type) { + case UNWIND_HINT_TYPE_CALL: + if (orc->ra_reg == ORC_REG_PREV_SP) { + p = (unsigned long *)(state->sp + orc->ra_offset); + if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) + goto err; + + pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + *p, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); + + pc -= INSN_LINK_OFFSET; + } else if (orc->ra_reg == ORC_REG_UNDEFINED) { + if (!state->ra || state->ra == state->pc) + goto err; + + pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, + state->ra, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); + + pc -= INSN_LINK_OFFSET; + state->ra = 0; + } else { + orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc); + goto err; + } + break; + case UNWIND_HINT_TYPE_REGS: + if (state->stack_info.type == STACK_TYPE_IRQ && state->sp == info->end) + regs = (struct pt_regs *)info->next_sp; + else + regs = (struct pt_regs *)state->sp; + + if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs))) + goto err; + + if ((info->end == (unsigned long)regs + sizeof(*regs)) && + !regs->regs[3] && !regs->regs[1]) + goto end; + + if (user_mode(regs)) + goto end; + + pc = regs->csr_era; + if (!__kernel_text_address(pc)) + goto err; + + state->sp = regs->regs[3]; + state->ra = regs->regs[1]; + state->fp = regs->regs[22]; + get_stack_info(state->sp, state->task, info); + + break; + default: + orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc); + goto err; + } + + state->pc = bt_address(pc); + if (!state->pc) { + pr_err("cannot find unwind pc at %pK\n", (void *)pc); + goto err; + } + + preempt_enable(); + return true; + +err: + state->error = true; + +end: + preempt_enable(); + state->stack_info.type = STACK_TYPE_UNKNOWN; + return false; +} + +unsigned long unwind_get_return_address(struct unwind_state *state) +{ + if (unwind_done(state)) + return 0; + + return __kernel_text_address(state->pc) ? state->pc : 0; +} diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S index 50662b99cb98..cd973bcda25f 100644 --- a/arch/loongarch/kernel/vmlinux.lds.S +++ b/arch/loongarch/kernel/vmlinux.lds.S @@ -2,6 +2,7 @@ #include #include #include +#include #define PAGE_SIZE _PAGE_SIZE #define RO_EXCEPTION_TABLE_ALIGN 4 @@ -62,6 +63,8 @@ SECTIONS RO_DATA(4096) RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE) + ORC_UNWIND_TABLE + /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c index 108fe47257e5..cbecddd88bd0 100644 --- a/arch/loongarch/mm/tlb.c +++ b/arch/loongarch/mm/tlb.c @@ -15,6 +15,8 @@ #include #include +extern void *exception_table[]; + void local_flush_tlb_all(void) { invtlb_all(INVTLB_CURRENT_ALL, 0, 0); @@ -262,27 +264,25 @@ extern long exception_handlers[VECSIZE * 128 / sizeof(long)]; static void setup_tlb_handler(int cpu) { + int i; + setup_ptwalker(); local_flush_tlb_all(); + if (cpu_has_ptw) { + exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw; + exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw; + exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw; + exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw; + } + /* The tlb handlers are generated only once */ if (cpu == 0) { memcpy((void *)tlbrentry, handle_tlb_refill, 0x80); local_flush_icache_range(tlbrentry, tlbrentry + 0x80); - if (!cpu_has_ptw) { - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE); - } else { - set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE); - set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE); - set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE); - set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE); - } - set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE); - set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE); + + for (i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++) + set_handler(i * VECSIZE, exception_table[i], VECSIZE); } else { int vec_sz __maybe_unused; void *addr __maybe_unused; diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 421aacd7929c..528042120ab9 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -30,6 +30,7 @@ li.w a1, \write la.abs t0, do_page_fault jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_CODE_END(tlb_do_page_fault_\write) .endm @@ -46,6 +47,7 @@ SYM_CODE_START(handle_tlb_protect) REG_S a2, sp, PT_BVADDR la.abs t0, do_page_fault jirl ra, t0, 0 + UNWIND_HINT_REGS RESTORE_ALL_AND_RET SYM_CODE_END(handle_tlb_protect) diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile index 58151d003e40..4eb720bac06d 100644 --- a/arch/loongarch/power/Makefile +++ b/arch/loongarch/power/Makefile @@ -1,3 +1,5 @@ +OBJECT_FILES_NON_STANDARD_suspend_asm.o := y + obj-y += platform.o obj-$(CONFIG_SUSPEND) += suspend.o suspend_asm.o diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 9bf343ceeed2..9a1e032d21f0 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Objects to go into the VDSO. +OBJECT_FILES_NON_STANDARD := y # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. -- Gitee From 18173cca13cd8aa1a61438b9b5f2f42e01fc9530 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 12 Apr 2021 11:30:39 +0800 Subject: [PATCH 206/241] LoongArch: Add HIGHMEM and FDT support for future Change-Id: Ie1135446b96a846ad8028c75138357b7299ad883 Signed-off-by: Huacai Chen --- arch/loongarch/Kbuild | 2 + arch/loongarch/Kconfig | 29 +++ arch/loongarch/Makefile | 3 +- arch/loongarch/boot/dts/Makefile | 4 + arch/loongarch/boot/dts/loongson/Makefile | 4 + .../boot/dts/loongson/loongson3.dtsi | 235 ++++++++++++++++++ .../boot/dts/loongson/loongson3_ls7a.dts | 159 ++++++++++++ arch/loongarch/include/asm/fixmap.h | 51 ++++ arch/loongarch/include/asm/highmem.h | 53 ++++ arch/loongarch/include/asm/kmap_types.h | 6 + arch/loongarch/include/asm/setup.h | 10 + arch/loongarch/kernel/setup.c | 67 +++++ arch/loongarch/loongson64/setup.c | 13 + arch/loongarch/mm/pgtable-64.c | 7 + 14 files changed, 642 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/boot/dts/loongson/Makefile create mode 100644 arch/loongarch/boot/dts/loongson/loongson3.dtsi create mode 100644 arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts create mode 100644 arch/loongarch/include/asm/highmem.h diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index 4fd77e6cccd0..be21050cf9df 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -20,3 +20,5 @@ obj-y += kernel/ obj-y += mm/ obj-y += net/ obj-y += vdso/ + +obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 1075c24a5362..4006629253f6 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -146,8 +146,11 @@ config MACH_LOONGSON64 select SYS_SUPPORTS_HOTPLUG_CPU select SYS_SUPPORTS_NUMA select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_RELOCATABLE select ZONE_DMA32 + select USE_OF + select BUILTIN_DTB help This enables the support of Loongson 64-bit family of machines. These machines are based on new Loongson-3 processors (Old Loongson is MIPS @@ -200,6 +203,7 @@ config CPU_LOONGSON64 bool "Loongson 64-bit CPU" depends on SYS_HAS_CPU_LOONGSON64 select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_LSX select CPU_SUPPORTS_LASX select GPIOLIB @@ -402,6 +406,22 @@ config CPU_HAS_LASX If unsure, say Y. +# +# - Highmem only makes sense for the 32-bit kernel. +# - We use SYS_SUPPORTS_HIGHMEM to offer highmem only for systems where we +# know they might have memory configurations that could make use of highmem +# support. +# +config HIGHMEM + bool "High Memory Support" + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM + +config CPU_SUPPORTS_HIGHMEM + bool + +config SYS_SUPPORTS_HIGHMEM + bool + config CPU_SUPPORTS_LSX bool @@ -720,6 +740,15 @@ config ARCH_STRICT_ALIGN to run kernel only on systems with h/w unaligned access support in order to optimise for performance. +config USE_OF + bool + select OF + select OF_EARLY_FLATTREE + select IRQ_DOMAIN + +config BUILTIN_DTB + bool + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 3a3f2b5712da..5d5ac68701f6 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -8,6 +8,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/loongarch/boot/tools relocs KBUILD_DEFCONFIG := loongson3_defconfig +KBUILD_DTBS := dtbs # # Select the object file format to substitute into the linker script. @@ -159,7 +160,7 @@ bootz-y := vmlinuz bootz-y += vmlinuz.bin bootz-$(CONFIG_EFI_STUB)+= vmlinuz.efi -all: $(all-y) +all: $(all-y) $(KBUILD_DTBS) # boot $(boot-y): vmlinux FORCE diff --git a/arch/loongarch/boot/dts/Makefile b/arch/loongarch/boot/dts/Makefile index e69de29bb2d1..94c45818e4c9 100644 --- a/arch/loongarch/boot/dts/Makefile +++ b/arch/loongarch/boot/dts/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +subdir-y += loongson + +obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y)) diff --git a/arch/loongarch/boot/dts/loongson/Makefile b/arch/loongarch/boot/dts/loongson/Makefile new file mode 100644 index 000000000000..9def5f6ee9c6 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_CPU_LOONGSON64) += loongson3_ls7a.dtb + +obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/loongarch/boot/dts/loongson/loongson3.dtsi b/arch/loongarch/boot/dts/loongson/loongson3.dtsi new file mode 100644 index 000000000000..2f985a7ddaf2 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson/loongson3.dtsi @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +/ { + /* + * Loongson-3 may have as many as 4 nodes, each node has 4 cores. + * Each core has its own pcache and cores in the same node share scache. + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x0>; + l2-cache = <&vcache0>; + next-level-cache = <&scache0>; + }; + + cpu@1 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x1>; + l2-cache = <&vcache1>; + next-level-cache = <&scache0>; + }; + + cpu@2 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x2>; + l2-cache = <&vcache2>; + next-level-cache = <&scache0>; + }; + + cpu@3 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x3>; + l2-cache = <&vcache3>; + next-level-cache = <&scache0>; + }; + + cpu@4 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x4>; + l2-cache = <&vcache4>; + next-level-cache = <&scache1>; + }; + + cpu@5 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x5>; + l2-cache = <&vcache5>; + next-level-cache = <&scache1>; + }; + + cpu@6 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x6>; + l2-cache = <&vcache6>; + next-level-cache = <&scache1>; + }; + + cpu@7 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x7>; + l2-cache = <&vcache7>; + next-level-cache = <&scache1>; + }; + + cpu@8 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x8>; + l2-cache = <&vcache8>; + next-level-cache = <&scache2>; + }; + + cpu@9 { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0x9>; + l2-cache = <&vcache9>; + next-level-cache = <&scache2>; + }; + + cpu@a { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xa>; + l2-cache = <&vcachea>; + next-level-cache = <&scache2>; + }; + + cpu@b { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xb>; + l2-cache = <&vcacheb>; + next-level-cache = <&scache2>; + }; + + cpu@c { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xc>; + l2-cache = <&vcachec>; + next-level-cache = <&scache3>; + }; + + cpu@d { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xd>; + l2-cache = <&vcached>; + next-level-cache = <&scache3>; + }; + + cpu@e { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xe>; + l2-cache = <&vcachee>; + next-level-cache = <&scache3>; + }; + + cpu@f { + compatible = "loongson,loongson3"; + device_type = "cpu"; + reg = <0xf>; + l2-cache = <&vcachef>; + next-level-cache = <&scache3>; + }; + + vcache0: l2-cache0 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache1: l2-cache1 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache2: l2-cache2 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache3: l2-cache3 { + compatible = "cache"; + next-level-cache = <&scache0>; + }; + + vcache4: l2-cache4 { + compatible = "cache"; + next-level-cache = <&scache1>; + }; + + vcache5: l2-cache5 { + compatible = "cache"; + next-level-cache = <&scache1>; + }; + + vcache6: l2-cache6 { + compatible = "cahce"; + next-level-cache = <&scache1>; + }; + + vcache7: l2-cache7 { + compatible = "cache"; + next-level-cache = <&scache1>; + }; + + vcache8: l2-cache8 { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcache9: l2-cache9 { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcachea: l2-cachea { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcacheb: l2-cacheb { + compatible = "cache"; + next-level-cache = <&scache2>; + }; + + vcachec: l2-cachec { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + vcached: l2-cached { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + vcachee: l2-cachee { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + vcachef: l2-cachef { + compatible = "cache"; + next-level-cache = <&scache3>; + }; + + scache0: l3-cache0 { + compatible = "cache"; + }; + + scache1: l3-cache1 { + compatible = "cache"; + }; + + scache2: l3-cache2 { + compatible = "cache"; + }; + + scache3: l3-cache3 { + compatible = "cache"; + }; + }; +}; diff --git a/arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts b/arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts new file mode 100644 index 000000000000..f9fa2d5d2032 --- /dev/null +++ b/arch/loongarch/boot/dts/loongson/loongson3_ls7a.dts @@ -0,0 +1,159 @@ +/dts-v1/; +#include "loongson3.dtsi" +/ { + model = "loongson,generic"; + compatible = "loongson,loongson3"; + #address-cells = <2>; + #size-cells = <2>; + + memory { + name = "memory"; + device_type = "memory"; + }; + + cpuic: interrupt-controller { + compatible = "loongson,cpu-interrupt-controller"; + interrupt-controller; + #interrupt-cells = <1>; + }; + + platic: interrupt-controller@1bd00040 { + compatible = "loongson,ls7a-interrupt-controller"; + interrupt-controller; + #interrupt-cells = <1>; + interrupts = <3>; + interrupt-parent = <&cpuic>; + }; + + aliases { + i2c0 = &i2c0; + i2c1 = &i2c1; + i2c2 = &i2c2; + i2c3 = &i2c3; + i2c4 = &i2c4; + i2c5 = &i2c5; + }; + + platform { + compatible = "loongson,nbus", "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + enable-lpc-irq; + ranges = <0x000 0x00000000 0x000 0x00000000 0x20000000 + 0x000 0x40000000 0x000 0x40000000 0x40000000 + 0xe00 0x00000000 0xe00 0x00000000 0x80000000>; + + uart0: serial@10080000 { + device_type = "serial"; + compatible = "ns16550,loongson"; + reg = <0 0x10080000 0x100>; + clock-frequency = <50000000>; + interrupts = <72>; + interrupt-parent = <&platic>; + no-loopback-test; + }; + + gpio: gpio@100e0000 { + compatible = "loongson,ls7a-gpio"; + reg = <0 0x100e0000 0xc00>; + gpio-controller; + #gpio-cells = <2>; + ngpios = <57>; + conf_offset = <0x800>; + out_offset = <0x900>; + in_offset = <0xa00>; + gpio_base = <16>; + interrupts = <124>; + interrupt-parent = <&platic>; + }; + + i2c0: i2c@10090000 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090000 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c1: i2c@10090100 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090100 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c2: i2c@10090200 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090200 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c3: i2c@10090300 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090300 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c4: i2c@10090400 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090400 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + i2c5: i2c@10090500 { + compatible = "loongson,ls7a-i2c"; + reg = <0 0x10090500 0x8>; + interrupts = <73>; + interrupt-parent = <&platic>; + #address-cells = <1>; + #size-cells = <0>; + }; + + rtc0: rtc@100d0100 { + compatible = "loongson,ls7a-rtc"; + reg = <0 0x100d0100 0x100>; + interrupts = <116>; + interrupt-parent = <&platic>; + }; + + pwm0: pwm@100a0000 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0000 0x10>; + interrupts = <88>; + interrupt-parent = <&platic>; + }; + + pwm1: pwm@100a0100 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0100 0x10>; + interrupts = <89>; + interrupt-parent = <&platic>; + }; + + pwm2: pwm@100a0200 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0200 0x10>; + interrupts = <90>; + interrupt-parent = <&platic>; + }; + + pwm3: pwm@100a0300 { + compatible = "loongson,ls7a-pwm"; + reg = <0 0x100a0300 0x10>; + interrupts = <91>; + interrupt-parent = <&platic>; + }; + }; +}; diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h index e6f8d21809ff..76b1f550f289 100644 --- a/arch/loongarch/include/asm/fixmap.h +++ b/arch/loongarch/include/asm/fixmap.h @@ -13,15 +13,59 @@ #define _ASM_FIXMAP_H #include +#ifdef CONFIG_HIGHMEM +#include +#include +#endif + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ #define NR_FIX_BTMAPS 64 +/* + * on UP currently we will have no trace of the fixmap mechanizm, + * no page table allocations, etc. This might change in the + * future, say framebuffers for the console driver(s) could be + * fix-mapped? + */ enum fixed_addresses { FIX_HOLE, +#define FIX_N_COLOURS 8 + FIX_CMAP_BEGIN, + FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * 2), +#ifdef CONFIG_HIGHMEM + /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_BEGIN = FIX_CMAP_END + 1, + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) #define FIXMAP_PAGE_IO PAGE_KERNEL_SUC @@ -31,4 +75,11 @@ extern void __set_fixmap(enum fixed_addresses idx, #include +/* + * Called from pgtable_init() + */ +extern void fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base); + + #endif diff --git a/arch/loongarch/include/asm/highmem.h b/arch/loongarch/include/asm/highmem.h new file mode 100644 index 000000000000..c555be1008d4 --- /dev/null +++ b/arch/loongarch/include/asm/highmem.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *pkmap_page_table; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define LAST_PKMAP 1024 + +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +extern void *kmap_high(struct page *page); +extern void kunmap_high(struct page *page); + +extern void *kmap(struct page *page); +extern void kunmap(struct page *page); +extern void *kmap_atomic(struct page *page); +extern void *kmap_atomic_pfn(unsigned long pfn); + +#define flush_cache_kmaps() BUG_ON(cpu_has_dc_aliases) + +extern void kmap_init(void); + +#define kmap_prot PAGE_KERNEL + +#endif /* __KERNEL__ */ + +#endif /* _ASM_HIGHMEM_H */ diff --git a/arch/loongarch/include/asm/kmap_types.h b/arch/loongarch/include/asm/kmap_types.h index 5c2173103727..16665dc2431b 100644 --- a/arch/loongarch/include/asm/kmap_types.h +++ b/arch/loongarch/include/asm/kmap_types.h @@ -2,6 +2,12 @@ #ifndef _ASM_KMAP_TYPES_H #define _ASM_KMAP_TYPES_H +#ifdef CONFIG_DEBUG_HIGHMEM +#define __WITH_KM_FENCE +#endif + #include +#undef __WITH_KM_FENCE + #endif diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index 6a628c3bce32..7628c51616d6 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -20,4 +20,14 @@ extern void per_cpu_trap_init(int cpu); extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len); +#ifdef CONFIG_USE_OF + +struct boot_param_header; + +extern void device_tree_init(void); + +#else /* CONFIG_OF */ +static inline void device_tree_init(void) { } +#endif /* CONFIG_OF */ + #endif /* __SETUP_H */ diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index a2172b4f15dc..87d9a3ac2784 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -13,10 +13,13 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include @@ -85,6 +88,62 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add memblock_add(start, size); } +static void __init dt_bootmem_init(void) +{ + phys_addr_t ramstart, ramend; + unsigned long start, end; + int i; + + ramstart = memblock_start_of_DRAM(); + ramend = memblock_end_of_DRAM(); + + /* Reserve memory occupied by kernel. */ + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); + + /* + * Reserve any memory between the start of RAM and PHYS_OFFSET + */ + if (ramstart > PHYS_OFFSET) + memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET); + + if (PFN_UP(ramstart) > ARCH_PFN_OFFSET) { + pr_info("Wasting %lu bytes for tracking %lu unused pages\n", + (unsigned long)((PFN_UP(ramstart) - ARCH_PFN_OFFSET) * sizeof(struct page)), + (unsigned long)(PFN_UP(ramstart) - ARCH_PFN_OFFSET)); + } + + min_low_pfn = ARCH_PFN_OFFSET; + max_pfn = PFN_DOWN(ramend); + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { + /* + * Skip highmem here so we get an accurate max_low_pfn if low + * memory stops short of high memory. + * If the region overlaps HIGHMEM_START, end is clipped so + * max_pfn excludes the highmem portion. + */ + if (start >= PFN_DOWN(HIGHMEM_START)) + continue; + if (end > PFN_DOWN(HIGHMEM_START)) + end = PFN_DOWN(HIGHMEM_START); + if (end > max_low_pfn) + max_low_pfn = end; + } + + if (min_low_pfn >= max_low_pfn) + panic("Incorrect memory mapping !!!"); + + if (max_pfn > PFN_DOWN(HIGHMEM_START)) { +#ifdef CONFIG_HIGHMEM + highstart_pfn = PFN_DOWN(HIGHMEM_START); + highend_pfn = max_pfn; +#else + max_low_pfn = PFN_DOWN(HIGHMEM_START); + max_pfn = max_low_pfn; +#endif + } +} + #ifdef CONFIG_ARCH_WRITECOMBINE bool wc_enabled = true; #else @@ -237,6 +296,14 @@ static void __init arch_mem_init(char **cmdline_p) check_kernel_sections_mem(); + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + + if (initial_boot_params) + dt_bootmem_init(); + + device_tree_init(); + /* * In order to reduce the possibility of kernel panic when failed to * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate diff --git a/arch/loongarch/loongson64/setup.c b/arch/loongarch/loongson64/setup.c index 701708769bc0..a37b699837df 100644 --- a/arch/loongarch/loongson64/setup.c +++ b/arch/loongarch/loongson64/setup.c @@ -10,6 +10,8 @@ */ #include #include +#include +#include #include #ifdef CONFIG_VT @@ -39,3 +41,14 @@ static int __init register_gop_device(void) return PTR_ERR_OR_ZERO(pd); } subsys_initcall(register_gop_device); + +#define NR_CELLS 6 + +void __init device_tree_init(void) +{ + if (!initial_boot_params) + return; + + if (early_init_dt_verify(initial_boot_params)) + unflatten_and_copy_device_tree(); +} diff --git a/arch/loongarch/mm/pgtable-64.c b/arch/loongarch/mm/pgtable-64.c index ac112f2bf072..dcf7e79cd5dc 100644 --- a/arch/loongarch/mm/pgtable-64.c +++ b/arch/loongarch/mm/pgtable-64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, void __init pagetable_init(void) { + unsigned long vaddr; + /* Initialize the entire pgd. */ pgd_init((unsigned long)swapper_pg_dir); pgd_init((unsigned long)invalid_pg_dir); @@ -111,4 +114,8 @@ void __init pagetable_init(void) #ifndef __PAGETABLE_PMD_FOLDED pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); #endif + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; } -- Gitee From 0c9f0df8d1c4e9b845f4968675bbe621a713aa8f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 207/241] LoongArch: Add debug machanism support Debug machanism include: gdb, ftrace, kprobe, uprobe and jump_label Change-Id: Idb5ab9586691b27019253e674ec745581022dedc Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 16 + arch/loongarch/Kconfig.debug | 6 + arch/loongarch/Makefile | 5 + arch/loongarch/include/asm/ftrace.h | 47 ++ arch/loongarch/include/asm/inst.h | 64 +++ arch/loongarch/include/asm/jump_label.h | 50 ++ arch/loongarch/include/asm/kgdb.h | 32 ++ arch/loongarch/include/asm/kprobes.h | 87 ++++ arch/loongarch/include/asm/module.h | 13 +- arch/loongarch/include/asm/module.lds.h | 1 + arch/loongarch/include/asm/stackframe.h | 4 + arch/loongarch/include/asm/unwind.h | 2 +- arch/loongarch/include/asm/uprobes.h | 40 ++ arch/loongarch/kernel/Makefile | 19 + arch/loongarch/kernel/alternative.c | 21 - arch/loongarch/kernel/entry.S | 4 + arch/loongarch/kernel/ftrace.c | 97 ++++ arch/loongarch/kernel/ftrace_dyn.c | 304 ++++++++++++ arch/loongarch/kernel/inst.c | 320 +++++++++++++ arch/loongarch/kernel/jump_label.c | 23 + arch/loongarch/kernel/kgdb.c | 584 ++++++++++++++++++++++++ arch/loongarch/kernel/kprobes.c | 424 +++++++++++++++++ arch/loongarch/kernel/mcount.S | 94 ++++ arch/loongarch/kernel/mcount_dyn.S | 164 +++++++ arch/loongarch/kernel/module-sections.c | 12 +- arch/loongarch/kernel/module.c | 27 +- arch/loongarch/kernel/spinlock_test.c | 123 +++++ arch/loongarch/kernel/traps.c | 6 + arch/loongarch/kernel/unwind_prologue.c | 32 +- arch/loongarch/kernel/uprobes.c | 243 ++++++++++ arch/loongarch/mm/fault.c | 9 + scripts/recordmcount.c | 39 ++ 32 files changed, 2879 insertions(+), 33 deletions(-) create mode 100644 arch/loongarch/include/asm/ftrace.h create mode 100644 arch/loongarch/include/asm/jump_label.h create mode 100644 arch/loongarch/include/asm/kgdb.h create mode 100644 arch/loongarch/include/asm/kprobes.h create mode 100644 arch/loongarch/include/asm/uprobes.h create mode 100644 arch/loongarch/kernel/ftrace.c create mode 100644 arch/loongarch/kernel/ftrace_dyn.c create mode 100644 arch/loongarch/kernel/inst.c create mode 100644 arch/loongarch/kernel/jump_label.c create mode 100644 arch/loongarch/kernel/kgdb.c create mode 100644 arch/loongarch/kernel/kprobes.c create mode 100644 arch/loongarch/kernel/mcount.S create mode 100644 arch/loongarch/kernel/mcount_dyn.S create mode 100644 arch/loongarch/kernel/spinlock_test.c create mode 100644 arch/loongarch/kernel/uprobes.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 4006629253f6..b6056382f9fd 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -41,6 +41,7 @@ config LOONGARCH select ARCH_KEEP_MEMBLOCK select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_QUEUED_RWLOCKS @@ -76,6 +77,9 @@ config LOONGARCH select GENERIC_VDSO_TIME_NS select HANDLE_DOMAIN_IRQ select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER @@ -84,18 +88,27 @@ config LOONGARCH select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS + select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_EBPF_JIT if 64BIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN select HAVE_EXIT_THREAD select HAVE_FAST_GUP + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GENERIC_VDSO select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KPROBES + select HAVE_KPROBES_ON_FTRACE + select HAVE_KRETPROBES select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC @@ -175,6 +188,9 @@ config SCHED_OMIT_FRAME_POINTER config AS_HAS_EXPLICIT_RELOCS def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x)) +config ARCH_SUPPORTS_UPROBES + bool + config SYS_SUPPORTS_HOTPLUG_CPU bool diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug index 11f505e4aa05..94ff96bb511f 100644 --- a/arch/loongarch/Kconfig.debug +++ b/arch/loongarch/Kconfig.debug @@ -42,3 +42,9 @@ config DEBUG_ZBOOT After the compressed kernel support works, please disable this option to reduce the kernel image size and speed up the booting procedure a little. + +config SPINLOCK_TEST + tristate "Enable spinlock timing tests in debugfs" + default n + help + Add several files to the debugfs to test spinlock speed. diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 5d5ac68701f6..8c524663f93f 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -30,6 +30,11 @@ ifneq ($(SUBARCH),$(ARCH)) endif endif +ifdef CONFIG_DYNAMIC_FTRACE + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif + cflags-y += $(call cc-option, -mno-check-zero-division) ifdef CONFIG_64BIT diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h new file mode 100644 index 000000000000..070155c3ccf4 --- /dev/null +++ b/arch/loongarch/include/asm/ftrace.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive for + * more details. + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ + +#ifndef _ASM_LOONGARCH_FTRACE_H +#define _ASM_LOONGARCH_FTRACE_H + +#define FTRACE_PLT_IDX 0 +#define FTRACE_REGS_PLT_IDX 1 +#define NR_FTRACE_PLTS 2 + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +#ifndef CONFIG_DYNAMIC_FTRACE +extern void _mcount(void); +#define mcount _mcount +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; + +struct dyn_ftrace; +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_TRACER */ +#endif /* _ASM_LOONGARCH_FTRACE_H */ diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 2e4fb22fd3c8..380885072375 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -10,8 +10,12 @@ */ #ifndef _ASM_INST_H #define _ASM_INST_H +#include #include +#include +#include + #include /* HACHACHAHCAHC ... */ @@ -132,6 +136,46 @@ void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned i unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign); unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n); +static inline bool cond_beqz(struct pt_regs *regs, int rj) +{ + return regs->regs[rj] == 0; +} + +static inline bool cond_bnez(struct pt_regs *regs, int rj) +{ + return regs->regs[rj] != 0; +} + +static inline bool cond_beq(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] == regs->regs[rd]; +} + +static inline bool cond_bne(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] != regs->regs[rd]; +} + +static inline bool cond_blt(struct pt_regs *regs, int rj, int rd) +{ + return (long)regs->regs[rj] < (long)regs->regs[rd]; +} + +static inline bool cond_bge(struct pt_regs *regs, int rj, int rd) +{ + return (long)regs->regs[rj] >= (long)regs->regs[rd]; +} + +static inline bool cond_bltu(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] < regs->regs[rd]; +} + +static inline bool cond_bgeu(struct pt_regs *regs, int rj, int rd) +{ + return regs->regs[rj] >= regs->regs[rd]; +} + static inline bool is_branch_insn(union loongarch_instruction insn) { return insn.reg1i21_format.opcode >= beqz_op && @@ -144,10 +188,30 @@ static inline bool is_pc_insn(union loongarch_instruction insn) insn.reg1i20_format.opcode <= pcaddu18i_op; } +unsigned long bs_dest_16(unsigned long now, unsigned int si); +unsigned long bs_dest_21(unsigned long now, unsigned int h, unsigned int l); +unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l); + +int simu_branch(struct pt_regs *regs, union loongarch_instruction insn); +int simu_pc(struct pt_regs *regs, union loongarch_instruction insn); + +int larch_insn_read(void *addr, u32 *insnp); +int larch_insn_write(void *addr, u32 insn); +int larch_insn_patch_text(void *addr, u32 insn); + +u32 larch_insn_gen_nop(void); +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest); +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest); + +u32 larch_insn_gen_addu16id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm); u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm); u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest); +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, + enum loongarch_gpr rk); +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj); + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/asm/jump_label.h b/arch/loongarch/include/asm/jump_label.h new file mode 100644 index 000000000000..3cea299a5ef5 --- /dev/null +++ b/arch/loongarch/include/asm/jump_label.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + * + * Based on arch/arm64/include/asm/jump_label.h + */ +#ifndef __ASM_JUMP_LABEL_H +#define __ASM_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include + +#define JUMP_LABEL_NOP_SIZE 4 + +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + ".align 3 \n\t" \ + ".long 1b - ., %l[l_yes] - . \n\t" \ + ".quad %0 - . \n\t" \ + ".popsection \n\t" + +static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) +{ + asm_volatile_goto( + "1: nop \n\t" + JUMP_TABLE_ENTRY + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; + +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) +{ + asm_volatile_goto( + "1: b %l[l_yes] \n\t" + JUMP_TABLE_ENTRY + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; + +l_yes: + return true; +} + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h new file mode 100644 index 000000000000..91b1e30db000 --- /dev/null +++ b/arch/loongarch/include/asm/kgdb.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KGDB_H_ +#define __ASM_KGDB_H_ + +#ifdef __KERNEL__ + +#ifdef CONFIG_32BIT +#define KGDB_GDB_REG_SIZE 32 +#define GDB_SIZEOF_REG sizeof(u32) +#else /* CONFIG_CPU_32BIT */ +#define KGDB_GDB_REG_SIZE 64 +#define GDB_SIZEOF_REG sizeof(u64) +#endif + +#define BUFMAX 2048 +#define DBG_ALL_REG_NUM 76 +#define DBG_MAX_REG_NUM 35 +#define NUMREGBYTES (DBG_MAX_REG_NUM * sizeof(GDB_SIZEOF_REG)) +#define NUMCRITREGBYTES (12 * sizeof(GDB_SIZEOF_REG)) +#define BREAK_INSTR_SIZE 4 +#define CACHE_FLUSH_IS_SAFE 0 + +extern void arch_kgdb_breakpoint(void); +extern void *saved_vectors[32]; +extern void handle_exception(struct pt_regs *regs); +extern void breakinst(void); +extern int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig); + +#endif /* __KERNEL__ */ + +#endif /* __ASM_KGDB_H_ */ diff --git a/arch/loongarch/include/asm/kprobes.h b/arch/loongarch/include/asm/kprobes.h new file mode 100644 index 000000000000..bac015197c7a --- /dev/null +++ b/arch/loongarch/include/asm/kprobes.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Kernel Probes (KProbes) + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H + +#include + +#ifdef CONFIG_KPROBES +#include +#include + +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct kprobe; +struct pt_regs; + +typedef union loongarch_instruction kprobe_opcode_t; + +#define MAX_INSN_SIZE 2 + +#define flush_insn_slot(p) \ +do { \ + if (p->addr) \ + flush_icache_range((unsigned long)p->addr, \ + (unsigned long)p->addr + \ + (MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \ +} while (0) + + +#define kretprobe_blacklist_size 0 + +void arch_remove_kprobe(struct kprobe *p); +int kprobe_fault_handler(struct pt_regs *regs, int trapnr); + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the original instruction */ + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; + unsigned long old_SR; + unsigned long saved_SR; + unsigned long saved_era; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long kprobe_old_SR; + unsigned long kprobe_saved_SR; + unsigned long kprobe_saved_era; + /* Per-thread fields, used while emulating branches */ + unsigned long flags; + unsigned long target_era; + struct prev_kprobe prev_kprobe; +}; + +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_KPROBES_H */ diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h index d368bdfe915e..d9e91add666b 100644 --- a/arch/loongarch/include/asm/module.h +++ b/arch/loongarch/include/asm/module.h @@ -26,6 +26,9 @@ struct mod_arch_specific { int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif + + /* for CONFIG_DYNAMIC_FTRACE */ + struct plt_entry *ftrace_trampolines; }; struct got_entry { @@ -33,7 +36,7 @@ struct got_entry { }; struct plt_entry { - u32 inst_lu12iw; + u32 inst_addu16id; u32 inst_lu32id; u32 inst_lu52id; u32 inst_jirl; @@ -53,14 +56,14 @@ static inline struct got_entry emit_got_entry(Elf_Addr val) static inline struct plt_entry emit_plt_entry(unsigned long val) { - u32 lu12iw, lu32id, lu52id, jirl; + u32 addu16id, lu32id, lu52id, jirl; - lu12iw = (0x0000000a << 25 | (((val >> 12) & 0xfffff) << 5) | REG_T1); + addu16id = larch_insn_gen_addu16id(REG_T1, REG_ZERO, ADDR_IMM(val, ADDU16ID)); lu32id = larch_insn_gen_lu32id(REG_T1, ADDR_IMM(val, LU32ID)); lu52id = larch_insn_gen_lu52id(REG_T1, REG_T1, ADDR_IMM(val, LU52ID)); - jirl = larch_insn_gen_jirl(0, REG_T1, 0, (val & 0xfff)); + jirl = larch_insn_gen_jirl(0, REG_T1, 0, (val & 0xffff)); - return (struct plt_entry) { lu12iw, lu32id, lu52id, jirl }; + return (struct plt_entry) { addu16id, lu32id, lu52id, jirl }; } static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val) diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h index 0739a26c46c0..88554f92e010 100644 --- a/arch/loongarch/include/asm/module.lds.h +++ b/arch/loongarch/include/asm/module.lds.h @@ -5,4 +5,5 @@ SECTIONS { .got 0 : { BYTE(0) } .plt 0 : { BYTE(0) } .plt.idx 0 : { BYTE(0) } + .ftrace_trampoline 0 : { BYTE(0) } } diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h index 58a4644a4ad9..c4895cc9003d 100644 --- a/arch/loongarch/include/asm/stackframe.h +++ b/arch/loongarch/include/asm/stackframe.h @@ -159,6 +159,10 @@ cfi_st u0, PT_R21, \docfi csrrd u0, PERCPU_BASE_KS 9: +#ifdef CONFIG_KGDB + li.w t0, CSR_CRMD_WE + csrxchg t0, t0, LOONGARCH_CSR_CRMD +#endif UNWIND_HINT_REGS .endm diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index 14649fdd8883..fcbf6b53490d 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -27,7 +27,7 @@ struct unwind_state { unsigned long sp, pc; bool first; #endif - bool error; + bool error, is_ftrace; }; void unwind_start(struct unwind_state *state, struct task_struct *task, diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h new file mode 100644 index 000000000000..3760f9033e5b --- /dev/null +++ b/arch/loongarch/include/asm/uprobes.h @@ -0,0 +1,40 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#ifndef __ASM_UPROBES_H +#define __ASM_UPROBES_H + +#include +#include + +#include +#include + +/* + * We want this to be defined as union loongarch_instruction but that makes the + * generic code blow up. + */ +typedef u32 uprobe_opcode_t; + +#define MAX_UINSN_BYTES 8 +#define UPROBE_XOL_SLOT_BYTES 128 /* Max. cache line size */ + +#define UPROBE_BRK_UPROBE 0x002a000c /* break 12 */ +#define UPROBE_BRK_UPROBE_XOL 0x002a000d /* break 13 */ + +#define UPROBE_SWBP_INSN UPROBE_BRK_UPROBE +#define UPROBE_SWBP_INSN_SIZE 4 + +struct arch_uprobe { + unsigned long resume_era; + u32 insn[2]; + u32 ixol[2]; +}; + +struct arch_uprobe_task { + unsigned long saved_trap_nr; +}; + +#endif /* __ASM_UPROBES_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 28cadeb891c9..a931eb43971c 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -22,6 +22,19 @@ CFLAGS_module.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall.o += $(call cc-option,-Wno-override-init,) CFLAGS_perf_event.o += $(call cc-option,-Wno-override-init,) +ifdef CONFIG_FUNCTION_TRACER + ifndef CONFIG_DYNAMIC_FTRACE + obj-y += mcount.o ftrace.o + CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) + else + obj-y += mcount_dyn.o ftrace_dyn.o + CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE) + endif + CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE) + CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) +endif + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o @@ -44,4 +57,10 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o obj-$(CONFIG_HARDWARE_WATCHPOINTS) += watch.o +obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_SPINLOCK_TEST) += spinlock_test.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c index c9b0c877f870..c6ed3b40372b 100644 --- a/arch/loongarch/kernel/alternative.c +++ b/arch/loongarch/kernel/alternative.c @@ -43,27 +43,6 @@ do { \ } \ } while (0) -#define __SIGNEX(X, SIDX) ((X) >= (1 << SIDX) ? ~((1 << SIDX) - 1) | (X) : (X)) -#define SIGNEX16(X) __SIGNEX(((unsigned long)(X)), 15) -#define SIGNEX20(X) __SIGNEX(((unsigned long)(X)), 19) -#define SIGNEX21(X) __SIGNEX(((unsigned long)(X)), 20) -#define SIGNEX26(X) __SIGNEX(((unsigned long)(X)), 25) - -static inline unsigned long bs_dest_16(unsigned long now, unsigned int si) -{ - return now + (SIGNEX16(si) << 2); -} - -static inline unsigned long bs_dest_21(unsigned long now, unsigned int h, unsigned int l) -{ - return now + (SIGNEX21(h << 16 | l) << 2); -} - -static inline unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l) -{ - return now + (SIGNEX26(h << 16 | l) << 2); -} - /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void __init_or_module add_nops(union loongarch_instruction *insn, int count) { diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 0f9d3314632e..93d65afe9efa 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -58,6 +58,10 @@ SYM_CODE_START(handle_syscall) SAVE_STATIC +#ifdef CONFIG_KGDB + li.w t1, CSR_CRMD_WE + csrxchg t1, t1, LOONGARCH_CSR_CRMD +#endif UNWIND_HINT_REGS move u0, t0 diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c new file mode 100644 index 000000000000..ce125881033d --- /dev/null +++ b/arch/loongarch/kernel/ftrace.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Code for replacing ftrace calls with jumps. + * + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + * Thanks goes to Steven Rostedt for writing the original x86 version. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* + * As `call _mcount` follows LoongArch psABI, ra-saved operation and + * stack operation can be found before this insn. + */ +static inline bool is_ra_save_ins(union loongarch_instruction *ip) +{ + /* st.d $ra, $sp, offset */ + return ip->reg2i12_format.opcode == std_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 1; +} + +static inline bool is_stack_open_ins(union loongarch_instruction *ip) +{ + /* addi.d $sp, $sp, -imm */ + return ip->reg2i12_format.opcode == addid_op && + ip->reg2i12_format.rj == 3 && + ip->reg2i12_format.rd == 3 && + ip->reg2i12_format.simmediate < 0; +} + +static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off) +{ + union loongarch_instruction *insn; + int limit = 32; + + insn = (union loongarch_instruction *)insn_addr; + + do { + insn--; + limit--; + + if (is_ra_save_ins(insn)) + *ra_off = insn->reg2i12_format.simmediate; + + } while (!is_stack_open_ins(insn) && limit); + + if (!limit) + return -EINVAL; + + return 0; +} + +void prepare_ftrace_return(unsigned long self_addr, + unsigned long callsite_sp, unsigned long old) +{ + int ra_off; + unsigned long return_hooker = (unsigned long)&return_to_handler; + + if (unlikely(ftrace_graph_is_dead())) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + if (ftrace_get_parent_ra_addr(self_addr, &ra_off)) + goto out; + + if (!function_graph_enter(old, self_addr, 0, NULL)) { + *(unsigned long *)(callsite_sp + ra_off) = return_hooker; + } + + return; + +out: + ftrace_graph_stop(); + WARN_ON(1); + return; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c new file mode 100644 index 000000000000..4dadfa28f295 --- /dev/null +++ b/arch/loongarch/kernel/ftrace_dyn.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Based on arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include + +#include +#include + +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + if (validate) { + if (larch_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + + if (larch_insn_patch_text((void *)pc, new)) + return -EPERM; + + return 0; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = larch_insn_gen_bl(pc, (unsigned long)func); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * The compiler has inserted 2 NOPs before the regular function prologue. + * T series registers are available and safe because of LoongArch psABI. + * + * At runtime, replace nop with bl to enable ftrace call and replace bl with + * nop to disable ftrace call. The bl requires us to save the original RA value, + * so here it saves RA at t0. + * details are: + * + * | Compiled | Disabled | Enabled | + * +------------+------------------------+------------------------+ + * | nop | move t0, ra | move t0, ra | + * | nop | nop | bl ftrace_caller | + * | func_body | func_body | func_body | + * + * The RA value will be recovered by ftrace_regs_entry, and restored into RA + * before returning to the regular function prologue. When a function is not + * being traced, the move t0, ra is not harmful. + */ + +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + unsigned long pc; + u32 old, new; + + pc = rec->ip; + old = larch_insn_gen_nop(); + new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA); + + return ftrace_modify_code(pc, old, new, true); +} + +static inline int __get_mod(struct module **mod, unsigned long addr) +{ + preempt_disable(); + *mod = __module_text_address(addr); + preempt_enable(); + + if (WARN_ON(!(*mod))) + return -EINVAL; + + return 0; +} + +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt = mod->arch.ftrace_trampolines; + + if (addr == FTRACE_ADDR) + return &plt[FTRACE_PLT_IDX]; + if (addr == FTRACE_REGS_ADDR && + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return &plt[FTRACE_REGS_PLT_IDX]; + + return NULL; +} + +static unsigned long get_plt_addr(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt; + + plt = get_ftrace_plt(mod, addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)addr); + return -EINVAL; + } + + return (unsigned long)plt; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc; + long offset; + u32 old, new; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + } + + old = larch_insn_gen_nop(); + new = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc; + long offset; + u32 old, new; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + } + + new = larch_insn_gen_nop(); + old = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long pc; + long offset; + u32 old, new; + + pc = rec->ip + LOONGARCH_INSN_SIZE; + offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { + int ret; + struct module *mod; + + ret = __get_mod(&mod, pc); + if (ret) + return ret; + + addr = get_plt_addr(mod, addr); + + old_addr = get_plt_addr(mod, old_addr); + } + + old = larch_insn_gen_bl(pc, old_addr); + new = larch_insn_gen_bl(pc, addr); + + return ftrace_modify_code(pc, old, new, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +void arch_ftrace_update_code(int command) +{ + command |= FTRACE_MAY_SLEEP; + ftrace_modify_all_code(command); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern void ftrace_graph_call(void); + +void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + if (!function_graph_enter(old, self_addr, 0, parent)) + *parent = return_hooker; +} + +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc, func; + u32 branch, nop; + + pc = (unsigned long)&ftrace_graph_call; + func = (unsigned long)&ftrace_graph_caller; + + branch = larch_insn_gen_b(pc, func); + nop = larch_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KPROBES_ON_FTRACE +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + return; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_ip = regs->csr_era; + + regs->csr_era = ip; + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) { + /* + * Emulate singlestep (and also recover regs->csr_era) + * as if there is a nop. + */ + regs->csr_era = (unsigned long)p->addr + LOONGARCH_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + regs->csr_era = orig_ip; + } + /* + * If pre_handler returns !0, it changes regs->ip. We have to + * skip emulating post_handler. + */ + __this_cpu_write(current_kprobe, NULL); + } +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + return 0; +} +#endif /* CONFIG_KPROBES_ON_FTRACE */ diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c new file mode 100644 index 000000000000..60fb04c8d6c4 --- /dev/null +++ b/arch/loongarch/kernel/inst.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include + +#include +#include +#include + +#define __SIGNEX(X, SIDX) ((X) >= (1 << SIDX) ? ~((1 << SIDX) - 1) | (X) : (X)) +#define SIGNEX16(X) __SIGNEX(((unsigned long)(X)), 15) +#define SIGNEX20(X) __SIGNEX(((unsigned long)(X)), 19) +#define SIGNEX21(X) __SIGNEX(((unsigned long)(X)), 20) +#define SIGNEX26(X) __SIGNEX(((unsigned long)(X)), 25) + +unsigned long bs_dest_16(unsigned long now, unsigned int si) +{ + return now + (SIGNEX16(si) << 2); +} + +unsigned long bs_dest_21(unsigned long now, unsigned int h, unsigned int l) +{ + return now + (SIGNEX21(h << 16 | l) << 2); +} + +unsigned long bs_dest_26(unsigned long now, unsigned int h, unsigned int l) +{ + return now + (SIGNEX26(h << 16 | l) << 2); +} + +int simu_branch(struct pt_regs *regs, union loongarch_instruction insn) +{ + unsigned int si, si_l, si_h, rd, rj; + unsigned long era = regs->csr_era; + + if (era & 3) + return -EFAULT; + + si_l = insn.reg0i26_format.simmediate_l; + si_h = insn.reg0i26_format.simmediate_h; + switch (insn.reg0i26_format.opcode) { + case b_op: + regs->csr_era = bs_dest_26(era, si_h, si_l); + return 0; + case bl_op: + regs->csr_era = bs_dest_26(era, si_h, si_l); + regs->regs[1] = era + LOONGARCH_INSN_SIZE; + return 0; + } + + si_l = insn.reg1i21_format.simmediate_l; + si_h = insn.reg1i21_format.simmediate_h; + rj = insn.reg1i21_format.rj; + switch (insn.reg1i21_format.opcode) { + case beqz_op: + if (cond_beqz(regs, rj)) + regs->csr_era = bs_dest_21(era, si_h, si_l); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + return 0; + case bnez_op: + if (cond_bnez(regs, rj)) + regs->csr_era = bs_dest_21(era, si_h, si_l); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + return 0; + } + + si = insn.reg2i16_format.simmediate; + rj = insn.reg2i16_format.rj; + rd = insn.reg2i16_format.rd; + switch (insn.reg2i16_format.opcode) { + case beq_op: + if (cond_beq(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bne_op: + if (cond_bne(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case blt_op: + if (cond_blt(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bge_op: + if (cond_bge(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bltu_op: + if (cond_bltu(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case bgeu_op: + if (cond_bgeu(regs, rj, rd)) + regs->csr_era = bs_dest_16(era, si); + else + regs->csr_era += LOONGARCH_INSN_SIZE; + break; + case jirl_op: + /* Use bs_dest_16() to calc the dest. */ + regs->csr_era = bs_dest_16(regs->regs[rj], si); + regs->regs[rd] = era + LOONGARCH_INSN_SIZE; + break; + default: + return -EINVAL; + } + + return 0; +} + +int simu_pc(struct pt_regs *regs, union loongarch_instruction insn) +{ + unsigned long era = regs->csr_era; + unsigned int rd = insn.reg1i20_format.rd; + unsigned int si = insn.reg1i20_format.simmediate; + + if (era & 3) + return -EFAULT; + + switch (insn.reg1i20_format.opcode) { + case pcaddi_op: + regs->regs[rd] = era + (SIGNEX20(si) << 2); + break; + case pcalau12i_op: + regs->regs[rd] = era + (SIGNEX20(si) << 12); + regs->regs[rd] &= ~((1 << 12) - 1); + break; + case pcaddu12i_op: + regs->regs[rd] = era + (SIGNEX20(si) << 12); + break; + case pcaddu18i_op: + regs->regs[rd] = era + (SIGNEX20(si) << 18); + break; + default: + return -EINVAL; + } + + regs->csr_era += LOONGARCH_INSN_SIZE; + + return 0; +} + +static DEFINE_RAW_SPINLOCK(patch_lock); + +int larch_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE); + if (!ret) + *insnp = val; + + return ret; +} + +int larch_insn_write(void *addr, u32 insn) +{ + int ret; + unsigned long flags = 0; + + raw_spin_lock_irqsave(&patch_lock, flags); + ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE); + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + +int larch_insn_patch_text(void *addr, u32 insn) +{ + int ret; + u32 *tp = addr; + + if ((unsigned long)tp & 3) + return -EINVAL; + + ret = larch_insn_write(tp, insn); + if (!ret) + flush_icache_range((unsigned long)tp, + (unsigned long)tp + LOONGARCH_INSN_SIZE); + + return ret; +} + +u32 larch_insn_gen_nop(void) +{ + return INSN_NOP; +} + +u32 larch_insn_gen_b(unsigned long pc, unsigned long dest) +{ + unsigned int simmediate_l, simmediate_h; + union loongarch_instruction insn; + long offset = dest - pc; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) { + pr_warn("The generated b instruction is out of range.\n"); + return INSN_BREAK; + } + + offset >>= 2; + + simmediate_l = offset & 0xffff; + offset >>= 16; + simmediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = b_op; + insn.reg0i26_format.simmediate_l = simmediate_l; + insn.reg0i26_format.simmediate_h = simmediate_h; + + return insn.word; +} + +u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest) +{ + unsigned int simmediate_l, simmediate_h; + union loongarch_instruction insn; + long offset = dest - pc; + + if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) { + pr_warn("The generated bl instruction is out of range.\n"); + return INSN_BREAK; + } + + offset >>= 2; + + simmediate_l = offset & 0xffff; + offset >>= 16; + simmediate_h = offset & 0x3ff; + + insn.reg0i26_format.opcode = bl_op; + insn.reg0i26_format.simmediate_l = simmediate_l; + insn.reg0i26_format.simmediate_h = simmediate_h; + + return insn.word; +} + +u32 larch_insn_gen_addu16id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + insn.reg2i16_format.opcode = addu16id_op; + insn.reg2i16_format.rd = rd; + insn.reg2i16_format.rj = rj; + insn.reg2i16_format.simmediate = imm; + + return insn.word; +} + +u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm) +{ + union loongarch_instruction insn; + + insn.reg1i20_format.opcode = lu32id_op; + insn.reg1i20_format.rd = rd; + insn.reg1i20_format.simmediate = imm; + + return insn.word; +} + +u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm) +{ + union loongarch_instruction insn; + + insn.reg2i12_format.opcode = lu52id_op; + insn.reg2i12_format.rd = rd; + insn.reg2i12_format.rj = rj; + insn.reg2i12_format.simmediate = imm; + + return insn.word; +} + +u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, + unsigned long pc, unsigned long dest) +{ + union loongarch_instruction insn; + long offset = dest - pc; + + if ((offset & 3) || offset < -SZ_128K || offset >= SZ_128K) { + pr_warn("The generated jirl instruction is out of range.\n"); + return INSN_BREAK; + } + + insn.reg2i16_format.opcode = jirl_op; + insn.reg2i16_format.rd = rd; + insn.reg2i16_format.rj = rj; + insn.reg2i16_format.simmediate = offset >> 2; + + return insn.word; +} + +u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, + enum loongarch_gpr rk) +{ + union loongarch_instruction insn; + + insn.reg3_format.opcode = or_op; + insn.reg3_format.rd = rd; + insn.reg3_format.rj = rj; + insn.reg3_format.rk = rk; + + return insn.word; +} + +u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj) +{ + return larch_insn_gen_or(rd, rj, 0); +} diff --git a/arch/loongarch/kernel/jump_label.c b/arch/loongarch/kernel/jump_label.c new file mode 100644 index 000000000000..53c2b70aebdb --- /dev/null +++ b/arch/loongarch/kernel/jump_label.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Loongson Technology Corporation Limited + * + * Based on arch/arm64/kernel/jump_label.c + */ +#include +#include +#include +#include + +void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) +{ + u32 insn; + void *addr = (void *)jump_entry_code(entry); + + if (type == JUMP_LABEL_JMP) + insn = larch_insn_gen_b(jump_entry_code(entry), jump_entry_target(entry)); + else + insn = larch_insn_gen_nop(); + + larch_insn_patch_text(addr, insn); +} diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c new file mode 100644 index 000000000000..f5f6fe178450 --- /dev/null +++ b/arch/loongarch/kernel/kgdb.c @@ -0,0 +1,584 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Huacai Chen + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +static int kgdb_watch_dcount; +static int kgdb_watch_icount; +int kgdb_watch_activated; + +int param_set_dcount(const char *val, const struct kernel_param *kp) +{ + int dbcn, d, ret; + ret = kstrtoint(val, 0, &d); + if (ret < 0) + return ret; + dbcn = csr_read32(LOONGARCH_CSR_MWPC) & 0x3f; + if (d > dbcn) + return -EINVAL; + boot_cpu_data.watch_dreg_count = dbcn - d; + *(int *)kp->arg = d; + return 0; +} + +int param_set_icount(const char *val, const struct kernel_param *kp) +{ + int ibcn, d, ret; + ret = kstrtoint(val, 0, &d); + if (ret < 0) + return ret; + ibcn = csr_read32(LOONGARCH_CSR_FWPC) & 0x3f; + if (d > ibcn) + return -EINVAL; + boot_cpu_data.watch_ireg_count = ibcn - d; + *(int *)kp->arg = d; + return 0; +} + +const struct kernel_param_ops param_ops_dcount = { + .set = param_set_dcount, + .get = param_get_int, +}; + +const struct kernel_param_ops param_ops_icount = { + .set = param_set_icount, + .get = param_get_int, +}; + +module_param_cb(kgdb_watch_dcount, ¶m_ops_dcount, &kgdb_watch_dcount, 0644); +module_param_cb(kgdb_watch_icount, ¶m_ops_icount, &kgdb_watch_icount, 0644); + +static struct hard_trap_info { + unsigned char tt; /* Trap type code for LoongArch */ + unsigned char signo; /* Signal that we map this trap into */ +} hard_trap_info[] = { + { 1, SIGBUS }, + { 2, SIGBUS }, + { 3, SIGBUS }, + { 4, SIGBUS }, + { 5, SIGBUS }, + { 6, SIGBUS }, + { 7, SIGBUS }, + { 8, SIGBUS }, + { 9, SIGBUS }, + { 10, SIGBUS }, + { 12, SIGTRAP }, /* break */ + { 13, SIGBUS }, + { 14, SIGBUS }, + { 15, SIGFPE }, + { 16, SIGFPE }, + { 17, SIGFPE }, + { 18, SIGFPE }, + { 0, 0} /* Must be last */ +}; + +struct dbg_reg_def_t dbg_reg_def[DBG_ALL_REG_NUM] = { + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) }, + { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) }, + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) }, + { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) }, + { "f0", GDB_SIZEOF_REG, 0 }, + { "f1", GDB_SIZEOF_REG, 1 }, + { "f2", GDB_SIZEOF_REG, 2 }, + { "f3", GDB_SIZEOF_REG, 3 }, + { "f4", GDB_SIZEOF_REG, 4 }, + { "f5", GDB_SIZEOF_REG, 5 }, + { "f6", GDB_SIZEOF_REG, 6 }, + { "f7", GDB_SIZEOF_REG, 7 }, + { "f8", GDB_SIZEOF_REG, 8 }, + { "f9", GDB_SIZEOF_REG, 9 }, + { "f10", GDB_SIZEOF_REG, 10 }, + { "f11", GDB_SIZEOF_REG, 11 }, + { "f12", GDB_SIZEOF_REG, 12 }, + { "f13", GDB_SIZEOF_REG, 13 }, + { "f14", GDB_SIZEOF_REG, 14 }, + { "f15", GDB_SIZEOF_REG, 15 }, + { "f16", GDB_SIZEOF_REG, 16 }, + { "f17", GDB_SIZEOF_REG, 17 }, + { "f18", GDB_SIZEOF_REG, 18 }, + { "f19", GDB_SIZEOF_REG, 19 }, + { "f20", GDB_SIZEOF_REG, 20 }, + { "f21", GDB_SIZEOF_REG, 21 }, + { "f22", GDB_SIZEOF_REG, 22 }, + { "f23", GDB_SIZEOF_REG, 23 }, + { "f24", GDB_SIZEOF_REG, 24 }, + { "f25", GDB_SIZEOF_REG, 25 }, + { "f26", GDB_SIZEOF_REG, 26 }, + { "f27", GDB_SIZEOF_REG, 27 }, + { "f28", GDB_SIZEOF_REG, 28 }, + { "f29", GDB_SIZEOF_REG, 29 }, + { "f30", GDB_SIZEOF_REG, 30 }, + { "f31", GDB_SIZEOF_REG, 31 }, + { "fcc0", 1, 0 }, + { "fcc1", 1, 1 }, + { "fcc2", 1, 2 }, + { "fcc3", 1, 3 }, + { "fcc4", 1, 4 }, + { "fcc5", 1, 5 }, + { "fcc6", 1, 6 }, + { "fcc7", 1, 7 }, + { "fcsr", GDB_SIZEOF_REG, 0 }, +}; + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + int fp_reg; + + if (regno < 0 || regno >= DBG_ALL_REG_NUM) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1 && regno < 33) { + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + } else if (current && dbg_reg_def[regno].offset != -1 && regno < 78) { + /* FP registers 35 -> 76 */ + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + return 0; + if (regno == 75) { + /* Process the fcsr/fsr (register 75) */ + memcpy((void *)¤t->thread.fpu.fcsr, mem, + dbg_reg_def[regno].size); + } else if (regno >= 67 && regno < 75) { + /* Process the fcc */ + fp_reg = dbg_reg_def[regno].offset; + memcpy((char *)¤t->thread.fpu.fcc + fp_reg, mem, + dbg_reg_def[regno].size); + } else { + fp_reg = dbg_reg_def[regno].offset; + memcpy((void *)¤t->thread.fpu.fpr[fp_reg], mem, + dbg_reg_def[regno].size); + } + + restore_fp(current); + } + + return 0; +} + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + int fp_reg; + + if (regno >= DBG_ALL_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1 && regno < 33) { + /* First 32 registers */ + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + } else if (current && dbg_reg_def[regno].offset != -1 && regno < 78) { + /* FP registers 35 -> 76 */ + if (!(regs->csr_euen & CSR_EUEN_FPEN)) + goto out; + save_fp(current); + if (regno == 75) { + /* Process the fcsr/fsr (register 75) */ + memcpy(mem, (void *)¤t->thread.fpu.fcsr, + dbg_reg_def[regno].size); + } else if (regno >= 67 && regno < 75) { + /* Process the fcc */ + fp_reg = dbg_reg_def[regno].offset; + memcpy(mem, (char *)¤t->thread.fpu.fcc + fp_reg, + dbg_reg_def[regno].size); + } else { + fp_reg = dbg_reg_def[regno].offset; + memcpy(mem, (void *)¤t->thread.fpu.fpr[fp_reg], + dbg_reg_def[regno].size); + } + } + +out: + return dbg_reg_def[regno].name; + +} + +void arch_kgdb_breakpoint(void) +{ + __asm__ __volatile__( + ".globl breakinst\n\t" + "nop\n" + "breakinst:\tbreak 0\n\t"); + + annotate_reachable(); +} + +static int compute_signal(int tt) +{ + struct hard_trap_info *ht; + + for (ht = hard_trap_info; ht->tt && ht->signo; ht++) + if (ht->tt == tt) + return ht->signo; + + return SIGTRAP; /* default for things we don't know about */ +} + +/* + * Similar to regs_to_gdb_regs() except that process is sleeping and so + * we may not be able to get all the info. + */ +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) +{ + int reg; +#if (KGDB_GDB_REG_SIZE == 32) + u32 *ptr = (u32 *)gdb_regs; +#else + u64 *ptr = (u64 *)gdb_regs; +#endif + + *(ptr++) = 0; + *(ptr++) = p->thread.reg01; + *(ptr++) = (long)p; + *(ptr++) = p->thread.reg03; + for (reg = 4; reg < 23; reg++) + *(ptr++) = 0; + + /* S0 - S8 */ + *(ptr++) = p->thread.reg23; + *(ptr++) = p->thread.reg24; + *(ptr++) = p->thread.reg25; + *(ptr++) = p->thread.reg26; + *(ptr++) = p->thread.reg27; + *(ptr++) = p->thread.reg28; + *(ptr++) = p->thread.reg29; + *(ptr++) = p->thread.reg30; + *(ptr++) = p->thread.reg31; + *(ptr++) = 0; + + /* + * PC use return address (RA), i.e. the moment after return from __switch_to() + */ + *(ptr++) = p->thread.reg01; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->csr_era = pc; +} + +/* + * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, + * then try to fall into the debugger + */ +static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + int trap = read_csr_excode(); + +#ifdef CONFIG_KPROBES + /* + * Return immediately if the kprobes fault notifier has set + * DIE_PAGE_FAULT. + */ + if (cmd == DIE_PAGE_FAULT) + return NOTIFY_DONE; +#endif /* CONFIG_KPROBES */ + + /* Userspace events, ignore. */ + if (user_mode(regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_active) != -1) + kgdb_nmicallback(smp_processor_id(), regs); + + if (kgdb_handle_exception(trap, compute_signal(trap), cmd, regs)) + return NOTIFY_DONE; + + if (atomic_read(&kgdb_setting_breakpoint)) + if ((regs->csr_era == (unsigned long)breakinst)) + regs->csr_era += 4; + + /* In SMP mode, __flush_cache_all does IPI */ + local_irq_enable(); + flush_cache_all(); + + return NOTIFY_STOP; +} + +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP +int kgdb_ll_trap(int cmd, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig, + + }; + + if (!kgdb_io_module_registered) + return NOTIFY_DONE; + + return kgdb_loongarch_notify(NULL, cmd, &args); +} +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_loongarch_notify, +}; + +/* + * Handle the 'c' command + */ +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + char *ptr; + unsigned long address; + + regs->csr_prmd |= CSR_PRMD_PWE; + + switch (remcom_in_buffer[0]) { + case 'c': + /* handle the optional parameter */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &address)) + regs->csr_era = address; + + return 0; + } + + return -1; +} + +static struct hw_breakpoint { + unsigned enabled; + unsigned long addr; + int len; + int type; + struct perf_event * __percpu *pev; +} dbreakinfo[NUM_WATCH_REGS], ibreakinfo[NUM_WATCH_REGS]; + +static int +kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + struct hw_breakpoint *breakinfo = (bptype == BP_HARDWARE_BREAKPOINT) ? + ibreakinfo : dbreakinfo; + int count = (bptype == BP_HARDWARE_BREAKPOINT) ? kgdb_watch_icount : + kgdb_watch_dcount; + + for (i = 0; i < count; i++) + if (!breakinfo[i].enabled) + break; + if (i == count) + return -1; + + breakinfo[i].type = bptype; + breakinfo[i].len = len; + breakinfo[i].addr = addr; + breakinfo[i].enabled |= 1; + + return 0; +} + + +static int +kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) +{ + int i; + struct hw_breakpoint *breakinfo = (bptype == BP_HARDWARE_BREAKPOINT) ? + ibreakinfo : dbreakinfo; + int count = (bptype == BP_HARDWARE_BREAKPOINT) ? kgdb_watch_icount : + kgdb_watch_dcount; + + for (i = 0; i < count; i++) + if (breakinfo[i].addr == addr && breakinfo[i].enabled) + break; + if (i == count) + return -1; + + breakinfo[i].enabled &= ~1; + + return 0; +} + +static void kgdb_disable_hw_debug(struct pt_regs *regs) +{ + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + regs->csr_prmd &= ~CSR_PRMD_PWE; +} + +static void kgdb_remove_all_hw_break(void) +{ + int i, j, mask; + + for (mask = 0, i = 0, j = boot_cpu_data.watch_ireg_count; + i < kgdb_watch_icount; i++, j++) { + if (!(ibreakinfo[i].enabled & 2)) + continue; + ibreakinfo[i].enabled = 0; + watch_csrwr(0, LOONGARCH_CSR_IB0ADDR + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_IB0MASK + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_IB0CTL + 8 * j); + mask |= 1 << j; + } + watch_csrwr(mask, LOONGARCH_CSR_FWPS); + + for (mask = 0, i = 0, j = boot_cpu_data.watch_dreg_count; i < kgdb_watch_dcount; + i++, j++) { + if (!(dbreakinfo[i].enabled & 2)) + continue; + dbreakinfo[i].enabled = 0; + watch_csrwr(0, LOONGARCH_CSR_DB0ADDR + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_DB0MASK + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8 * j); + watch_csrwr(0, LOONGARCH_CSR_DB0CTL + 8 * j); + mask |= 1 << j; + } + watch_csrwr(mask, LOONGARCH_CSR_MWPS); + + csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + + kgdb_watch_activated = 0; +} + +static void kgdb_correct_hw_break(void) +{ + int i, j, dbc, activated = 0; + + for (i = 0, j = boot_cpu_data.watch_ireg_count; i < kgdb_watch_icount; i++, j++) { + if ((ibreakinfo[i].enabled & 3) == 2) { + watch_csrwr(0, LOONGARCH_CSR_IB0CTL + 8*j); + ibreakinfo[i].enabled = 0; + continue; + } else if (!ibreakinfo[i].enabled) + continue; + ibreakinfo[i].enabled |= 2; + watch_csrwr(ibreakinfo[i].addr, LOONGARCH_CSR_IB0ADDR + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_IB0ASID + 8*j); + watch_csrwr(0x1e, LOONGARCH_CSR_IB0CTL + 8*j); + watch_csrwr(0x10000, LOONGARCH_CSR_FWPS); + activated = 1; + } + + for (i = 0, j = boot_cpu_data.watch_dreg_count; i < kgdb_watch_dcount; i++, j++) { + if ((dbreakinfo[i].enabled & 3) == 2) { + watch_csrwr(0, LOONGARCH_CSR_DB0CTL + 8*j); + dbreakinfo[i].enabled = 0; + continue; + } else if (!dbreakinfo[i].enabled) + continue; + dbreakinfo[i].enabled |= 2; + dbc = 0x1e; + switch (dbreakinfo[i].len) { + case 8: + break; + case 4: + dbc |= (1<<10); + break; + case 2: + dbc |= (2<<10); + break; + case 1: + dbc |= (3<<10); + break; + default: + break; + } + + if (dbreakinfo[i].type == BP_WRITE_WATCHPOINT) { + dbc |= 1<<9; + } else if (BP_READ_WATCHPOINT) { + dbc |= 1<<8; + } else { + dbc |= 3<<8; + } + + watch_csrwr(dbreakinfo[i].addr, LOONGARCH_CSR_DB0ADDR + 8*j); + watch_csrwr(0, LOONGARCH_CSR_DB0MASK + 8*j); + watch_csrwr(0, LOONGARCH_CSR_DB0ASID + 8*j); + watch_csrwr(dbc, LOONGARCH_CSR_DB0CTL + 8*j); + activated = 1; + } + + csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD); + kgdb_watch_activated = activated; +} + +const struct kgdb_arch arch_kgdb_ops = { + .flags = KGDB_HW_BREAKPOINT, + .set_hw_breakpoint = kgdb_set_hw_break, + .remove_hw_breakpoint = kgdb_remove_hw_break, + .disable_hw_break = kgdb_disable_hw_debug, + .remove_all_hw_break = kgdb_remove_all_hw_break, + .correct_hw_break = kgdb_correct_hw_break, + .gdb_bpt_instr = { 0x00, 0x00, break_op >> 1, 0x00 }, +}; + +int kgdb_arch_init(void) +{ + int ibcn, dbcn; + + register_die_notifier(&kgdb_notifier); + dbcn = csr_read32(LOONGARCH_CSR_MWPC) & 0x3f; + ibcn = csr_read32(LOONGARCH_CSR_FWPC) & 0x3f; + boot_cpu_data.watch_dreg_count = dbcn - kgdb_watch_dcount; + boot_cpu_data.watch_ireg_count = ibcn - kgdb_watch_icount; + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c new file mode 100644 index 000000000000..bb4ed5f5df69 --- /dev/null +++ b/arch/loongarch/kernel/kprobes.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Kernel Probes (KProbes) + * + * arch/loongarch/kernel/kprobes.c + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +static const union loongarch_instruction breakpoint_insn = { + .reg0i15_format = { + .opcode = break_op, + .simmediate = BRK_KPROBE_BP, + } +}; + +static const union loongarch_instruction breakpoint2_insn = { + .reg0i15_format = { + .opcode = break_op, + .simmediate = BRK_KPROBE_SSTEPBP, + } +}; + +DEFINE_PER_CPU(struct kprobe *, current_kprobe); +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +#define SS_FAIL -1 +#define SS_NONE 0 +#define SS_BRANCH 1 +#define SS_PC 2 + +/* + * insn_has_ll_or_sc function checks whether instruction is ll or sc + * one; putting breakpoint on top of atomic ll/sc pair is bad idea; + * so we need to prevent it and refuse kprobes insertion for such + * instructions; cannot do much about breakpoint in the middle of + * ll/sc pair; it is upto user to avoid those places + */ +static int __kprobes insn_has_ll_or_sc(union loongarch_instruction insn) +{ + int ret = 0; + + switch (insn.reg2i14_format.opcode) { + case llw_op: + case lld_op: + case scw_op: + case scd_op: + ret = 1; + break; + } + return ret; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + union loongarch_instruction insn; + int ret = 0; + + insn = p->addr[0]; + + if (insn_has_ll_or_sc(insn)) { + pr_notice("Kprobes for ll and sc instructions are not" + "supported\n"); + ret = -EINVAL; + goto out; + } + + if (insn.reg1i21_format.opcode == bceqz_op) { + pr_notice("Kprobes for bceqz and bcnez instructions are not" + "supported\n"); + ret = -EINVAL; + goto out; + } + + /* insn: must be on special executable page on loongarch. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) { + ret = -ENOMEM; + goto out; + } + + /* + * In the kprobe->ainsn.insn[] array we store the original + * instruction at index zero and a break trap instruction at + * index one. + */ + memcpy(&p->ainsn.insn[0], p->addr, sizeof(kprobe_opcode_t)); + + p->ainsn.insn[1] = breakpoint2_insn; + p->opcode = *p->addr; + +out: + return ret; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = breakpoint_insn; + flush_insn_slot(p); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_insn_slot(p); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.old_SR = kcb->kprobe_old_SR; + kcb->prev_kprobe.saved_SR = kcb->kprobe_saved_SR; + kcb->prev_kprobe.saved_era = kcb->kprobe_saved_era; +} + +static void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_old_SR = kcb->prev_kprobe.old_SR; + kcb->kprobe_saved_SR = kcb->prev_kprobe.saved_SR; + kcb->kprobe_saved_era = kcb->prev_kprobe.saved_era; +} + +static void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); + kcb->kprobe_saved_SR = kcb->kprobe_old_SR = (regs->csr_prmd & CSR_PRMD_PIE); + kcb->kprobe_saved_era = regs->csr_era; +} + +static int prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ + if (is_branch_insn(p->opcode)) { + if (!simu_branch(regs, p->opcode)) + return SS_BRANCH; + } else if (is_pc_insn(p->opcode)) { + if (!simu_pc(regs, p->opcode)) + return SS_PC; + } else { + regs->csr_era = (unsigned long)&p->ainsn.insn[0]; + return SS_NONE; + } + + pr_notice("Kprobes: Error in simulate insn\n"); + regs->csr_era = (unsigned long)&p->ainsn.insn[0]; + return SS_FAIL; +} + +static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + int ss; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + /* single step inline if the instruction is an break */ + if (p->ainsn.insn->word == breakpoint_insn.word) { + regs->csr_prmd &= ~CSR_PRMD_PIE; + regs->csr_prmd |= kcb->kprobe_saved_SR; + preempt_enable_no_resched(); + } else { + regs->csr_prmd &= ~CSR_PRMD_PIE; + ss = prepare_singlestep(p, regs); + if (ss == SS_NONE) { + kcb->kprobe_status = KPROBE_HIT_SS; + } else if (ss == SS_BRANCH || ss == SS_PC) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (p->post_handler) + p->post_handler(p, regs, 0); + reset_current_kprobe(); + preempt_enable_no_resched(); + } else { + if (p->fault_handler) + p->fault_handler(p, regs, 0); + reset_current_kprobe(); + preempt_enable_no_resched(); + } + } +} + +static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_err("Unrecoverable kprobe detected. \n"); + BUG(); + break; + default: + /* impossible cases */ + WARN_ON(1); + return 0; + } + + return 1; +} + +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + kprobe_opcode_t *addr; + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *) regs->csr_era; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + p = get_kprobe(addr); + if (p) { + if (kprobe_running()) { + if (reenter_kprobe(p, regs, kcb)) + return 1; + } else { + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler && p->pre_handler(p, regs)) { + /* handler has already set things up, so skip ss setup */ + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } else { + setup_singlestep(p, regs, kcb, 0); + return 1; + } + } + } else { + if (addr->word != breakpoint_insn.word) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + preempt_enable_no_resched(); + return 1; + } + } + + preempt_enable_no_resched(); + + return 0; +} + +static inline int post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + regs->csr_era = kcb->kprobe_saved_era + LOONGARCH_INSN_SIZE; + regs->csr_prmd |= kcb->kprobe_saved_SR; + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + return 1; +} + +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + if (kcb->kprobe_status & KPROBE_HIT_SS) { + regs->csr_era = kcb->kprobe_saved_era + LOONGARCH_INSN_SIZE; + regs->csr_prmd |= kcb->kprobe_old_SR; + + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 0; +} + +/* + * Wrapper routine for handling exceptions. + */ +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch (val) { + case DIE_BREAK: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_SSTEPBP: + if (post_kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + + case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + + if (kprobe_running() + && kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); + break; + default: + break; + } + return ret; +} + +/* + * Function return probe trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe causes the + * handlers to fire + */ +static void __used kretprobe_trampoline_holder(void) +{ + asm volatile( + /* Keep the assembler from reordering and placing JR here. */ + "nop\n\t" + ".global kretprobe_trampoline\n" + "kretprobe_trampoline:\n\t" + "nop\n\t" + : : : "memory"); +} + +void kretprobe_trampoline(void); + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *) regs->regs[1]; + ri->fp = NULL; + + /* Replace the return addr with trampoline addr */ + regs->regs[1] = (unsigned long)kretprobe_trampoline; +} + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + instruction_pointer(regs) = __kretprobe_trampoline_handler(regs, + kretprobe_trampoline, NULL); + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline) + return 1; + + return 0; +} + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *)kretprobe_trampoline, + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S new file mode 100644 index 000000000000..bbdbe9f91637 --- /dev/null +++ b/arch/loongarch/kernel/mcount.S @@ -0,0 +1,94 @@ +/* + * LoongArch specific _mcount support + * + * Author: Huacai Chen + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + + .text + +#define MCOUNT_STACK_SIZE (2 * SZREG) +#define MCOUNT_S0_OFFSET (0) +#define MCOUNT_RA_OFFSET (SZREG) + + .macro MCOUNT_SAVE_REGS + PTR_ADDI sp, sp, -MCOUNT_STACK_SIZE + PTR_S s0, sp, MCOUNT_S0_OFFSET + PTR_S ra, sp, MCOUNT_RA_OFFSET + move s0, a0 + .endm + + .macro MCOUNT_RESTORE_REGS + move a0, s0 + PTR_L ra, sp, MCOUNT_RA_OFFSET + PTR_L s0, sp, MCOUNT_S0_OFFSET + PTR_ADDI sp, sp, MCOUNT_STACK_SIZE + .endm + + +SYM_FUNC_START(_mcount) + la t1, ftrace_stub + la t2, ftrace_trace_function /* Prepare t2 for (1) */ + PTR_L t2, t2, 0 + beq t1, t2, fgraph_trace + + MCOUNT_SAVE_REGS + + move a0, ra /* arg0: self return address */ + move a1, s0 /* arg1: parent's return address */ + jirl ra, t2, 0 /* (1) call *ftrace_trace_function */ + + MCOUNT_RESTORE_REGS + +fgraph_trace: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + la t1, ftrace_stub + la t3, ftrace_graph_return + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller + la t1, ftrace_graph_entry_stub + la t3, ftrace_graph_entry + PTR_L t3, t3, 0 + bne t1, t3, ftrace_graph_caller +#endif + + .globl ftrace_stub +ftrace_stub: + jirl zero, ra, 0 +SYM_FUNC_END(_mcount) +EXPORT_SYMBOL(_mcount) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_FUNC_START(ftrace_graph_caller) + MCOUNT_SAVE_REGS + + PTR_ADDI a0, ra, -4 /* arg0: Callsite self return addr */ + PTR_ADDI a1, sp, MCOUNT_STACK_SIZE /* arg1: Callsite sp */ + move a2, s0 /* arg2: Callsite parent ra */ + bl prepare_ftrace_return + + MCOUNT_RESTORE_REGS + jirl zero, ra, 0 +SYM_FUNC_END(ftrace_graph_caller) + +SYM_FUNC_START(return_to_handler) + PTR_ADDI sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + bl ftrace_return_to_handler + + /* restore the real parent address: a0 -> ra */ + move ra, a0 + + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDI sp, sp, 2 * SZREG + jirl zero, ra, 0 +SYM_FUNC_END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S new file mode 100644 index 000000000000..5571c7327b90 --- /dev/null +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Author: Jinyang He + * Author: Huacai Chen + * Copyright (C) 2020-2021 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + + .text +/* + * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the + * regular C function prologue. When PC arrived here, the last 2 instructions + * as follows, + * move t0, ra + * bl callsite (for modules, callsite is a tramplione) + * + * modules tramplione as follows, + * addu16i.d t1, zero, callsite[31:16] + * lu32i.d t1, callsite[51:32] + * lu52i.d t1, t1, callsite[63:52] + * jirl zero, t1, callsite[15:0] >> 2 + * + * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to + * that the T series regs are available and safe because each C functions + * follows the LoongArch psABI well. + */ + + .macro ftrace_regs_entry allregs=0 + PTR_ADDI sp, sp, -PT_SIZE + /* Save trace function ra at PT_ERA */ + PTR_S ra, sp, PT_ERA + /* Save parent ra at PT_R1(RA) */ + PTR_S t0, sp, PT_R1 + PTR_S a0, sp, PT_R4 + PTR_S a1, sp, PT_R5 + PTR_S a2, sp, PT_R6 + PTR_S a3, sp, PT_R7 + PTR_S a4, sp, PT_R8 + PTR_S a5, sp, PT_R9 + PTR_S a6, sp, PT_R10 + PTR_S a7, sp, PT_R11 + PTR_S fp, sp, PT_R22 + + .if \allregs + PTR_S t0, sp, PT_R12 + PTR_S t1, sp, PT_R13 + PTR_S t2, sp, PT_R14 + PTR_S t3, sp, PT_R15 + PTR_S t4, sp, PT_R16 + PTR_S t5, sp, PT_R17 + PTR_S t6, sp, PT_R18 + PTR_S t7, sp, PT_R19 + PTR_S t8, sp, PT_R20 + PTR_S s0, sp, PT_R23 + PTR_S s1, sp, PT_R24 + PTR_S s2, sp, PT_R25 + PTR_S s3, sp, PT_R26 + PTR_S s4, sp, PT_R27 + PTR_S s5, sp, PT_R28 + PTR_S s6, sp, PT_R29 + PTR_S s7, sp, PT_R30 + PTR_S s8, sp, PT_R31 + PTR_S tp, sp, PT_R2 + /* Clear it for later use as a flag sometimes. */ + PTR_S zero, sp, PT_R0 + PTR_S $r21, sp, PT_R21 + .endif + + PTR_ADDI t8, sp, PT_SIZE + PTR_S t8, sp, PT_R3 + + UNWIND_HINT_REGS + .endm + +SYM_CODE_START(ftrace_caller) + UNWIND_HINT sp_reg=ORC_REG_SP type=ORC_TYPE_CALL + ftrace_regs_entry allregs=0 + b ftrace_common +SYM_CODE_END(ftrace_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +SYM_CODE_START(ftrace_regs_caller) + UNWIND_HINT sp_reg=ORC_REG_SP type=ORC_TYPE_CALL + ftrace_regs_entry allregs=1 + b ftrace_common +SYM_CODE_END(ftrace_regs_caller) +#endif + +SYM_CODE_START(ftrace_common) + UNWIND_HINT_REGS + PTR_ADDI a0, ra, -8 /* arg0: ip */ + move a1, t0 /* arg1: parent_ip */ + la.pcrel t1, function_trace_op + PTR_L a2, t1, 0 /* arg2: op */ + move a3, sp /* arg3: regs */ + .globl ftrace_call +ftrace_call: + bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call +ftrace_graph_call: + nop /* b ftrace_graph_caller */ +#endif +/* + * As we didn't use S series regs in this assmembly code and all calls + * are C function which will save S series regs by themselves, there is + * no need to restore S series regs. The T series is available and safe + * at the callsite, so there is no need to restore the T series regs. + */ +ftrace_common_return: + PTR_L a0, sp, PT_R4 + PTR_L a1, sp, PT_R5 + PTR_L a2, sp, PT_R6 + PTR_L a3, sp, PT_R7 + PTR_L a4, sp, PT_R8 + PTR_L a5, sp, PT_R9 + PTR_L a6, sp, PT_R10 + PTR_L a7, sp, PT_R11 + PTR_L fp, sp, PT_R22 + PTR_L ra, sp, PT_R1 + PTR_L t0, sp, PT_ERA + PTR_ADDI sp, sp, PT_SIZE + UNWIND_HINT sp_reg=ORC_REG_SP type=ORC_TYPE_CALL + jirl zero, t0, 0 +SYM_CODE_END(ftrace_common) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +SYM_CODE_START(ftrace_graph_caller) + UNWIND_HINT_REGS + PTR_L a0, sp, PT_ERA + PTR_ADDI a0, a0, -8 /* arg0: self_addr */ + PTR_ADDI a1, sp, PT_R1 /* arg1: parent */ + bl prepare_ftrace_return + b ftrace_common_return +SYM_CODE_END(ftrace_graph_caller) + +SYM_CODE_START(return_to_handler) + UNWIND_HINT sp_reg=ORC_REG_SP type=ORC_TYPE_CALL + /* save return value regs */ + PTR_ADDI sp, sp, -2 * SZREG + PTR_S a0, sp, 0 + PTR_S a1, sp, SZREG + + move a0, zero /* Has no check FP now. */ + bl ftrace_return_to_handler + move ra, a0 /* parent ra */ + + /* restore return value regs */ + PTR_L a0, sp, 0 + PTR_L a1, sp, SZREG + PTR_ADDI sp, sp, 2 * SZREG + + jirl zero, ra, 0 +SYM_CODE_END(return_to_handler) +#endif + +SYM_FUNC_START(ftrace_stub) + jirl zero, ra, 0 +SYM_FUNC_END(ftrace_stub) diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c index 9af39d43ba9e..e8e5bfed9982 100644 --- a/arch/loongarch/kernel/module-sections.c +++ b/arch/loongarch/kernel/module-sections.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -104,7 +105,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { unsigned int i, num_plts = 0, num_gots = 0; - Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec; + Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec, *tramp = NULL; /* * Find the empty .plt sections. @@ -116,6 +117,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt.shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx")) mod->arch.plt_idx.shndx = i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".ftrace_trampoline")) + tramp = sechdrs + i; } if (!mod->arch.got.shndx) { @@ -171,5 +174,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.plt_idx.num_entries = 0; mod->arch.plt_idx.max_entries = num_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c index 0bc6a6a5d7d6..5ae87872c37e 100644 --- a/arch/loongarch/kernel/module.c +++ b/arch/loongarch/kernel/module.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -18,7 +19,7 @@ #include #include #include - +#include #include static inline bool signed_imm_check(long val, unsigned int bit) @@ -505,11 +506,29 @@ void *module_alloc(unsigned long size) GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } +static int module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, struct module *mod) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + struct plt_entry *ftrace_plts; + + ftrace_plts = (void *)sechdrs->sh_addr; + + ftrace_plts[FTRACE_PLT_IDX] = emit_plt_entry(FTRACE_ADDR); + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + ftrace_plts[FTRACE_REGS_PLT_IDX] = emit_plt_entry(FTRACE_REGS_ADDR); + + mod->arch.ftrace_trampolines = ftrace_plts; +#endif + return 0; +} + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { char *secstrings; - const Elf_Shdr *s, *orc = NULL, *orc_ip = NULL, *alt = NULL; + const Elf_Shdr *s, *orc = NULL, *orc_ip = NULL, *alt = NULL, *ftrace = NULL; secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; @@ -520,6 +539,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc = s; if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) orc_ip = s; + if (!strcmp(".ftrace_trampoline", secstrings + s->sh_name)) + ftrace = s; } if (alt) @@ -528,5 +549,5 @@ int module_finalize(const Elf_Ehdr *hdr, if (orc && orc_ip) unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size); - return 0; + return module_init_ftrace_plt(hdr, ftrace, mod); } diff --git a/arch/loongarch/kernel/spinlock_test.c b/arch/loongarch/kernel/spinlock_test.c new file mode 100644 index 000000000000..ea8462e8cb0b --- /dev/null +++ b/arch/loongarch/kernel/spinlock_test.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +static int ss_get(void) +{ + int cont; + int loops; + u64 timeval; + ktime_t start, finish; + DEFINE_RAW_SPINLOCK(ss_spin); + + cont = 1; + loops = 1000000; + + start = ktime_get(); + + while (cont) { + raw_spin_lock(&ss_spin); + loops--; + if (loops == 0) + cont = 0; + raw_spin_unlock(&ss_spin); + } + + finish = ktime_get(); + + timeval = ktime_us_delta(finish, start); + printk("Single: %llu\n",timeval); + + return 0; +} + +struct spin_multi_state { + raw_spinlock_t lock; + atomic_t start_wait; + atomic_t enter_wait; + atomic_t exit_wait; + int loops; +}; + +struct spin_multi_per_thread { + struct spin_multi_state *state; + ktime_t start; +}; + +static int multi_other(void *data) +{ + int loops; + int cont; + struct spin_multi_per_thread *pt = data; + struct spin_multi_state *s = pt->state; + + loops = s->loops; + cont = 1; + + atomic_dec(&s->enter_wait); + + while (atomic_read(&s->enter_wait)) + ; /* spin */ + + pt->start = ktime_get(); + + atomic_dec(&s->start_wait); + + while (atomic_read(&s->start_wait)) + ; /* spin */ + + while (cont) { + raw_spin_lock(&s->lock); + loops--; + if (loops == 0) + cont = 0; + raw_spin_unlock(&s->lock); + } + + atomic_dec(&s->exit_wait); + while (atomic_read(&s->exit_wait)) + ; /* spin */ + return 0; +} + +static int multi_get(void) +{ + u64 timeval; + ktime_t finish; + struct spin_multi_state ms; + struct spin_multi_per_thread t1, t2; + + ms.lock = __RAW_SPIN_LOCK_UNLOCKED("multi_get"); + ms.loops = 1000000; + + atomic_set(&ms.start_wait, 2); + atomic_set(&ms.enter_wait, 2); + atomic_set(&ms.exit_wait, 2); + t1.state = &ms; + t2.state = &ms; + + kthread_run(multi_other, &t2, "multi_get"); + + multi_other(&t1); + + finish = ktime_get(); + + timeval = ktime_us_delta(finish, t1.start); + printk("Multiple: %llu\n",timeval); + + return 0; +} + +static int __init spinlock_test(void) +{ + ss_get(); + multi_get(); + + return 0; +} +module_init(spinlock_test); + +MODULE_LICENSE("GPL"); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 268f83c00b92..013f0f90f5fb 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -714,6 +714,12 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs) bcode = (opcode & 0x7fff); +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP + if (kgdb_ll_trap(DIE_TRAP, str, regs, code, current->thread.trap_nr, + SIGTRAP) == NOTIFY_STOP) + return; +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ + /* * notify the kprobe handlers, if instruction is likely to * pertain to them. diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 8fcaa83c5600..73ba7cc0f2a6 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020 Loongson Technology Co., Ltd. */ +#include #include #include @@ -46,16 +47,41 @@ static inline bool is_branch_ins(union loongarch_instruction *ip) return is_branch_insn(*ip); } +static inline void unwind_state_fixup(struct unwind_state *state) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + static unsigned long ftrace_case = (unsigned long)ftrace_call + 4; + + if (state->pc == ftrace_case) + state->is_ftrace = true; +#endif +} + static bool unwind_by_prologue(struct unwind_state *state) { struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; unsigned long frame_size = 0, frame_ra = -1; unsigned long size, offset, pc = state->pc; + struct pt_regs *regs; if (state->sp >= info->end || state->sp < info->begin) return false; + if (state->is_ftrace) { + /* + * As we meet ftrace_regs_entry, reset first flag like first doing + * tracing, Prologue analysis will stop soon because PC is at entry. + */ + regs = (struct pt_regs *)state->sp; + state->pc = regs->csr_era; + state->ra = regs->regs[1]; + state->sp = regs->regs[3]; + state->first = true; + state->is_ftrace = false; + return true; + } + if (!kallsyms_lookup_size_offset(pc, &size, &offset)) return false; @@ -101,7 +127,7 @@ static bool unwind_by_prologue(struct unwind_state *state) state->pc = *(unsigned long *)(state->sp + frame_ra); state->sp = state->sp + frame_size; - return !!__kernel_text_address(state->pc); + goto out; first: state->first = false; @@ -110,7 +136,9 @@ static bool unwind_by_prologue(struct unwind_state *state) state->pc = state->ra; - return !!__kernel_text_address(state->ra); +out: + unwind_state_fixup(state); + return !!__kernel_text_address(state->pc); } static bool unwind_by_guess(struct unwind_state *state) diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c new file mode 100644 index 000000000000..c176f17e19c6 --- /dev/null +++ b/arch/loongarch/kernel/uprobes.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/** + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *aup, + struct mm_struct *mm, unsigned long addr) +{ + union loongarch_instruction insn; + + if (addr & 0x03) + return -EINVAL; + + insn.word = aup->insn[0]; + + if (insn.reg1i21_format.opcode == bceqz_op) { + pr_notice("Uprobes for bceqz and bcnez instructions are not" + "supported\n"); + return -EINVAL; + } + + if (is_branch_insn(insn) || is_pc_insn(insn)) { + aup->ixol[0] = larch_insn_gen_nop(); + } else { + aup->ixol[0] = aup->insn[0]; + } + + aup->ixol[1] = UPROBE_BRK_UPROBE_XOL; + + return 0; +} + +/** + * is_trap_insn - check if the instruction is a trap variant + * @insn: instruction to be checked. + * Returns true if @insn is a trap variant. + * + * This definition overrides the weak definition in kernel/events/uprobes.c. + * and is needed for the case where an architecture has multiple trap + * instructions (like PowerPC or MIPS). We treat BREAK just like the more + * modern conditional trap instructions. + */ +bool is_trap_insn(uprobe_opcode_t *insn) +{ + union loongarch_instruction inst; + + inst.word = *insn; + + return (inst.reg0i15_format.opcode == break_op); +} + +#define UPROBE_TRAP_NR ULONG_MAX +static int ss_none; + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs) +{ + union loongarch_instruction insn; + struct uprobe_task *utask = current->utask; + + insn.word = aup->insn[0]; + + /* + * Here do emulation if it were branch or pc-relative insn. + */ + if (is_branch_insn(insn)) { + if (!simu_branch(regs, insn)) + return -EFAULT; + aup->resume_era = regs->csr_era; + } else if (is_pc_insn(insn)) { + if (!simu_pc(regs, insn)) + return -EFAULT; + aup->resume_era = regs->csr_era; + } else { + ss_none = 1; + } + utask->autask.saved_trap_nr = current->thread.trap_nr; + current->thread.trap_nr = UPROBE_TRAP_NR; + regs->csr_era = current->utask->xol_vaddr; + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.trap_nr = utask->autask.saved_trap_nr; + if (ss_none) + regs->csr_era = current->utask->vaddr + 4; + else + regs->csr_era = aup->resume_era; + + return 0; +} + +/* + * If xol insn itself traps and generates a signal(Say, + * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped + * instruction jumps back to its own address. It is assumed that anything + * like do_page_fault/do_trap/etc sets thread.trap_nr != -1. + * + * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, + * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to + * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol(). + */ +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + if (tsk->thread.trap_nr != UPROBE_TRAP_NR) + return true; + + return false; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + /* regs == NULL is a kernel bug */ + if (WARN_ON(!regs)) + return NOTIFY_DONE; + + /* We are only interested in userspace traps */ + if (!user_mode(regs)) + return NOTIFY_DONE; + + switch (val) { + case DIE_UPROBE: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_UPROBE_XOL: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + + return 0; +} + +/* + * This function gets called when XOL instruction either gets trapped or + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. + */ +void arch_uprobe_abort_xol(struct arch_uprobe *aup, + struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + instruction_pointer_set(regs, utask->vaddr); +} + +unsigned long arch_uretprobe_hijack_return_addr( + unsigned long trampoline_vaddr, struct pt_regs *regs) +{ + unsigned long ra; + + ra = regs->regs[1]; + + /* Replace the return address with the trampoline address */ + regs->regs[1] = trampoline_vaddr; + + return ra; +} + +/** + * set_swbp - store breakpoint at a given address. + * @auprobe: arch specific probepoint information. + * @mm: the probed process address space. + * @vaddr: the virtual address to insert the opcode. + * + * For mm @mm, store the breakpoint instruction at @vaddr. + * Return 0 (success) or a negative errno. + * + * This version overrides the weak version in kernel/events/uprobes.c. + */ +int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long vaddr) +{ + return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); +} + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + unsigned long kaddr, kstart; + + /* Initialize the slot */ + kaddr = (unsigned long)kmap_atomic(page); + kstart = kaddr + (vaddr & ~PAGE_MASK); + memcpy((void *)kstart, src, len); + flush_icache_range(kstart, kstart + len); + kunmap_atomic((void *)kaddr); +} + +/** + * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs + * @regs: Reflects the saved state of the task after it has hit a breakpoint + * instruction. + * Return the address of the breakpoint instruction. + * + * This overrides the weak version in kernel/events/uprobes.c. + */ +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +/* + * See if the instruction can be emulated. + * Returns true if instruction was emulated, false otherwise. + * + * For now we always emulate so this function just returns 0. + */ +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + return 0; +} diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index f7485d93eb88..e53d7f24801e 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -135,6 +135,15 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, struct vm_area_struct *vma = NULL; vm_fault_t fault; +#ifdef CONFIG_KPROBES + /* + * This is to notify the fault handler of the kprobes. + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, + current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) + return; +#endif + /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index ec692af8ce9e..90f1d9768c16 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -38,6 +38,14 @@ #define R_AARCH64_ABS64 257 #endif +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 +#define R_LARCH_32 1 +#define R_LARCH_64 2 +#define R_LARCH_MARK_LA 20 +#define R_LARCH_SOP_PUSH_PLT_PCREL 29 +#endif + #define R_ARM_PC24 1 #define R_ARM_THM_CALL 10 #define R_ARM_CALL 28 @@ -445,6 +453,28 @@ static int arm64_is_fake_mcount(Elf64_Rel const *rp) return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } +static int LARCH32_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + +static int LARCH64_is_fake_mcount(Elf64_Rel const *rp) +{ + switch (ELF64_R_TYPE(w(rp->r_info))) { + case R_LARCH_MARK_LA: + case R_LARCH_SOP_PUSH_PLT_PCREL: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -562,6 +592,7 @@ static int do_file(char const *const fname) break; case EM_IA_64: reltype = R_IA64_IMM64; break; case EM_MIPS: /* reltype: e_class */ break; + case EM_LOONGARCH: /* reltype: e_class */ break; case EM_PPC: reltype = R_PPC_ADDR32; break; case EM_PPC64: reltype = R_PPC64_ADDR64; break; case EM_S390: /* reltype: e_class */ break; @@ -593,6 +624,10 @@ static int do_file(char const *const fname) reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } + if (w2(ehdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_32; + is_fake_mcount32 = LARCH32_is_fake_mcount; + } if (do32(ehdr, fname, reltype) < 0) goto out; break; @@ -614,6 +649,10 @@ static int do_file(char const *const fname) Elf64_r_info = MIPS64_r_info; is_fake_mcount64 = MIPS64_is_fake_mcount; } + if (w2(ghdr->e_machine) == EM_LOONGARCH) { + reltype = R_LARCH_64; + is_fake_mcount64 = LARCH64_is_fake_mcount; + } if (do64(ghdr, fname, reltype) < 0) goto out; break; -- Gitee From 7a02c9b0adb05d9a91e1c0a488818aa0e30ee9ec Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 208/241] LoongArch: Add kernel livepatching support Change-Id: If69c96a106b57f92be6901f10208b695d5f87dc0 Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 +++ arch/loongarch/include/asm/livepatch.h | 11 +++++++++++ arch/loongarch/include/asm/thread_info.h | 1 + kernel/livepatch/core.c | 6 +++++- 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 arch/loongarch/include/asm/livepatch.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index b6056382f9fd..3f4cce944e0e 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -109,6 +109,7 @@ config LOONGARCH select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES + select HAVE_LIVEPATCH select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC @@ -765,6 +766,8 @@ config USE_OF config BUILTIN_DTB bool +source "kernel/livepatch/Kconfig" + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG diff --git a/arch/loongarch/include/asm/livepatch.h b/arch/loongarch/include/asm/livepatch.h new file mode 100644 index 000000000000..c75cc6c9e25a --- /dev/null +++ b/arch/loongarch/include/asm/livepatch.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LIVEPATCH_H +#define _ASM_LIVEPATCH_H + +#include + +static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->csr_era = ip; +} +#endif /* _ASM_LIVEPATCH_H */ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index d12ff4747bbf..9955141b8324 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -87,6 +87,7 @@ register unsigned long current_stack_pointer __asm__("$r3"); #define TIF_SINGLESTEP 20 /* Single Step */ #define TIF_LSX_CTX_LIVE 21 /* LSX context must be preserved */ #define TIF_LASX_CTX_LIVE 22 /* LASX context must be preserved */ +#define TIF_PATCH_PENDING 23 /* pending live patching update */ #define _TIF_SIGPENDING (1<sh_addr; /* For each rela in this klp relocation section */ for (i = 0; i < relasec->sh_size / sizeof(Elf_Rela); i++) { + if (last && relas[i].r_offset == last->r_offset) + continue; + + last = &relas[i]; sym = (Elf_Sym *)sechdrs[symndx].sh_addr + ELF_R_SYM(relas[i].r_info); if (sym->st_shndx != SHN_LIVEPATCH) { pr_err("symbol %s is not marked as a livepatch symbol\n", -- Gitee From c4b3f7bad3332a4d704d3e9ddf93dee6c79fe9fe Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 209/241] LoongArch: Add kernel address sanitizer support Change-Id: I8e5521541d848ba98a28785a15bab995f41eef0d Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 6 + arch/loongarch/boot/compressed/Makefile | 4 + arch/loongarch/boot/compressed/string.c | 6 + arch/loongarch/include/asm/kasan.h | 120 +++++++++++++++ arch/loongarch/include/asm/pgtable-64.h | 7 + arch/loongarch/include/asm/string.h | 20 +++ arch/loongarch/kernel/Makefile | 7 + arch/loongarch/kernel/head.S | 4 + arch/loongarch/kernel/setup.c | 4 + arch/loongarch/lib/memcpy.S | 5 +- arch/loongarch/lib/memmove.S | 15 +- arch/loongarch/lib/memset.S | 5 +- arch/loongarch/mm/Makefile | 3 + arch/loongarch/mm/kasan_init.c | 195 ++++++++++++++++++++++++ arch/loongarch/vdso/Makefile | 2 + include/linux/kasan.h | 2 + lib/Kconfig.kasan | 2 + mm/kasan/generic.c | 6 + mm/kasan/init.c | 14 +- mm/kasan/kasan.h | 6 + 20 files changed, 421 insertions(+), 12 deletions(-) create mode 100644 arch/loongarch/include/asm/kasan.h create mode 100644 arch/loongarch/mm/kasan_init.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 3f4cce944e0e..d06cb2d54dbd 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -79,6 +79,7 @@ config LOONGARCH select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE + select HAVE_ARCH_KASAN if 64BIT select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP @@ -824,6 +825,11 @@ config ZONE_DMA config ZONE_DMA32 bool +config KASAN_SHADOW_OFFSET + hex + default 0x0 + depends on KASAN + menu "Power management options" config ARCH_SUSPEND_POSSIBLE diff --git a/arch/loongarch/boot/compressed/Makefile b/arch/loongarch/boot/compressed/Makefile index 046b5b439f79..60259def78ec 100644 --- a/arch/loongarch/boot/compressed/Makefile +++ b/arch/loongarch/boot/compressed/Makefile @@ -10,6 +10,10 @@ include $(srctree)/arch/loongarch/Kbuild.platforms # set the default size of the mallocing area for decompressing BOOT_HEAP_SIZE := 0x400000 +ifdef CONFIG_KASAN +KASAN_SANITIZE := n +endif + # Disable Function Tracer KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE), $(KBUILD_CFLAGS)) diff --git a/arch/loongarch/boot/compressed/string.c b/arch/loongarch/boot/compressed/string.c index 3efff97c5293..50bf9c2f841c 100644 --- a/arch/loongarch/boot/compressed/string.c +++ b/arch/loongarch/boot/compressed/string.c @@ -112,3 +112,9 @@ char * __weak strrchr(const char *s, int c) } while (*s++); return (char *)last; } + +#ifdef CONFIG_KASAN +extern void *__memset(void *s, int c, size_t n) __alias(memset); +extern void *__memcpy(void *dest, const void *src, size_t n) __alias(memcpy); +extern void *__memmove(void *dest, const void *src, size_t n) __alias(memmove); +#endif diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h new file mode 100644 index 000000000000..d987ba4fe5cb --- /dev/null +++ b/arch/loongarch/include/asm/kasan.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +#define __HAVE_ARCH_SHADOW_MAP + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#define XRANGE_SHIFT (48) + +/* Valid address length */ +#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3) +/* Used for taking out the valid address */ +#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0) +/* One segment whole address space size */ +#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1) + +/* 64-bit segment value. */ +#define XKPRANGE_UC_SEG (0x8000) +#define XKPRANGE_CC_SEG (0x9000) +#define XKVRANGE_VC_SEG (0xffff) + +/* Cached */ +#define XKPRANGE_CC_START CACHE_BASE +#define XKPRANGE_CC_SIZE XRANGE_SIZE +#define XKPRANGE_CC_KASAN_OFFSET (0) +#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE) + +/* UnCached */ +#define XKPRANGE_UC_START UNCACHE_BASE +#define XKPRANGE_UC_SIZE XRANGE_SIZE +#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END +#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE) + +/* VMALLOC (Cached or UnCached) */ +#define XKVRANGE_VC_START VMALLOC_START +#define XKVRANGE_VC_SIZE round_up(VMEMMAP_END - MODULES_VADDR + 1, PGDIR_SIZE) +#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END +#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT) +#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE) + +/* Kasan shadow memory start right after vmalloc. */ +#define KASAN_SHADOW_START round_up(VMEMMAP_END, PGDIR_SIZE) +#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET) +#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE) + +#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET) +#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET) +#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET) + +extern bool kasan_early_stage; +extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + if (kasan_early_stage) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#endif +#endif diff --git a/arch/loongarch/include/asm/pgtable-64.h b/arch/loongarch/include/asm/pgtable-64.h index 33ec860c368d..0ede81e1147b 100644 --- a/arch/loongarch/include/asm/pgtable-64.h +++ b/arch/loongarch/include/asm/pgtable-64.h @@ -71,9 +71,16 @@ #define MODULES_END (MODULES_VADDR + SZ_256M) #define VMALLOC_START MODULES_END + +#ifndef CONFIG_KASAN #define VMALLOC_END \ (vm_map_base + \ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE) +#else +#define VMALLOC_END \ + (vm_map_base + \ + min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE) +#endif #define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK)) #define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1) diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h index 812389888e35..fa4c8e896f02 100644 --- a/arch/loongarch/include/asm/string.h +++ b/arch/loongarch/include/asm/string.h @@ -11,11 +11,31 @@ #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); +extern void *__memset(void *__s, int __c, size_t __count); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *__to, __const__ void *__from, size_t __n); +extern void *__memcpy(void *__to, __const__ void *__from, size_t __n); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *__dest, __const__ void *__src, size_t __n); +extern void *__memmove(void *__dest, __const__ void *__src, size_t __n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memset(s, c, n) __memset(s, c, n) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif #endif /* _ASM_STRING_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index a931eb43971c..7f17ccf4fc13 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -35,6 +35,13 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_efi.o := n +KASAN_SANITIZE_cpu-probe.o := n +KASAN_SANITIZE_traps.o := n +KASAN_SANITIZE_smp.o := n +KASAN_SANITIZE_vdso.o := n +KASAN_SANITIZE_watch.o := n + obj-$(CONFIG_MODULES) += module.o module-sections.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S index 46be1c3ec1a2..e0742de8ae41 100644 --- a/arch/loongarch/kernel/head.S +++ b/arch/loongarch/kernel/head.S @@ -68,6 +68,10 @@ SYM_CODE_START(kernel_entry) # kernel entry point PTR_ADD sp, sp, tp set_saved_sp sp, t0, t1 +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif + #ifdef CONFIG_RELOCATABLE #ifdef CONFIG_CRASH_DUMP beqz a3, 1f diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 87d9a3ac2784..6cf43d79ba8d 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -451,4 +451,8 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); paging_init(); + +#if defined(CONFIG_KASAN) + kasan_init(); +#endif } diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index 9f9ea3014af0..86c1959e0397 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -10,15 +10,18 @@ #include #include -SYM_FUNC_START(memcpy) +SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __memcpy_generic", \ "b __memcpy_fast", CPU_FEATURE_UAL SYM_FUNC_END(memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL(__memcpy) /* * void *__memcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index af2f7ef15142..40b23b4f6070 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -10,21 +10,24 @@ #include #include -SYM_FUNC_START(memmove) - blt a0, a1, memcpy /* dst < src, memcpy */ - blt a1, a0, rmemcpy /* src < dst, rmemcpy */ - jr ra /* dst == src, return */ +SYM_FUNC_START_WEAK(memmove) +SYM_FUNC_START_ALIAS(__memmove) + blt a0, a1, __memcpy /* dst < src, memcpy */ + blt a1, a0, __rmemcpy /* src < dst, rmemcpy */ + jr ra /* dst == src, return */ SYM_FUNC_END(memmove) +SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL(__memmove) -SYM_FUNC_START(rmemcpy) +SYM_FUNC_START(__rmemcpy) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __rmemcpy_generic", \ "b __rmemcpy_fast", CPU_FEATURE_UAL -SYM_FUNC_END(rmemcpy) +SYM_FUNC_END(__rmemcpy) /* * void *__rmemcpy_generic(void *dst, const void *src, size_t n) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index 6d70f3c91ec7..db8426647bf8 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -16,15 +16,18 @@ bstrins.d \r0, \r0, 63, 32 .endm -SYM_FUNC_START(memset) +SYM_FUNC_START_WEAK(memset) +SYM_FUNC_START_ALIAS(__memset) /* * Some CPUs support hardware unaligned access */ ALTERNATIVE "b __memset_generic", \ "b __memset_fast", CPU_FEATURE_UAL SYM_FUNC_END(memset) +SYM_FUNC_END_ALIAS(__memset) EXPORT_SYMBOL(memset) +EXPORT_SYMBOL(__memset) /* * void *__memset_generic(void *s, int c, size_t n) diff --git a/arch/loongarch/mm/Makefile b/arch/loongarch/mm/Makefile index a3a137709650..fa6710785a54 100644 --- a/arch/loongarch/mm/Makefile +++ b/arch/loongarch/mm/Makefile @@ -8,3 +8,6 @@ obj-y += init.o cache.o tlb.o tlbex.o extable.o \ obj-$(CONFIG_64BIT) += pgtable-64.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_KASAN) += kasan_init.o + +KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c new file mode 100644 index 000000000000..d05f2a7c9d39 --- /dev/null +++ b/arch/loongarch/mm/kasan_init.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + */ +#define pr_fmt(fmt) "kasan: " fmt +#include +#include +#include + +#include +#include + +static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); + +#define __pgd_none(early, pgd) (early ? (pgd_val(pgd) == 0) : \ +(__pa(pgd_val(pgd)) == (unsigned long)__pa(kasan_early_shadow_pmd))) + +#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \ +(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte))) + +#define __pte_none(early, pte) (early ? pte_none(pte) : \ +((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page))) + +bool kasan_early_stage = true; + +/* + * Alloc memory for shadow memory page table. + */ +static phys_addr_t __init kasan_alloc_zeroed_page(int node) +{ + void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, node); + return __pa(p); +} + +static pte_t *kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, + bool early) +{ + if (__pmd_none(early, READ_ONCE(*pmdp))) { + phys_addr_t pte_phys = early ? + __pa_symbol(kasan_early_shadow_pte) + : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pte_phys), kasan_early_shadow_pte, + sizeof(kasan_early_shadow_pte)); + + pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys)); + } + + return pte_offset_kernel(pmdp, addr); +} + +static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval) +{ + WRITE_ONCE(*pgdp, pgdval); +} + +static pmd_t *kasan_pmd_offset(pgd_t *pgdp, unsigned long addr, int node, + bool early) +{ + if (__pgd_none(early, READ_ONCE(*pgdp))) { + phys_addr_t pmd_phys = early ? + __pa_symbol(kasan_early_shadow_pmd) + : kasan_alloc_zeroed_page(node); + if (!early) + memcpy(__va(pmd_phys), kasan_early_shadow_pmd, + sizeof(kasan_early_shadow_pmd)); + kasan_set_pgd(pgdp, __pgd((unsigned long)__va(pmd_phys))); + } + + return (pmd_t *)((pmd_t *)pgd_val(*pgdp) + pmd_index(addr)); +} + +static void kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early); + + do { + phys_addr_t page_phys = early ? + __pa_symbol(kasan_early_shadow_page) + : kasan_alloc_zeroed_page(node); + next = addr + PAGE_SIZE; + set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); + } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); +} + +static void kasan_pmd_populate(pgd_t *pgdp, unsigned long addr, + unsigned long end, int node, bool early) +{ + unsigned long next; + pmd_t *pmdp = kasan_pmd_offset(pgdp, addr, node, early); + + do { + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, node, early); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + int node, bool early) +{ + unsigned long next; + pgd_t *pgdp; + + pgdp = pgd_offset_k(addr); + + do { + next = pgd_addr_end(addr, end); + kasan_pmd_populate(pgdp, addr, next, node, early); + } while (pgdp++, addr = next, addr != end); + +} + +asmlinkage void __init kasan_early_init(void) +{ + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); +} + +/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */ +static void __init kasan_map_populate(unsigned long start, unsigned long end, + int node) +{ + kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start < end; start += PGDIR_SIZE) + kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0)); +} + +void __init kasan_init(void) +{ + u64 i; + phys_addr_t pa_start, pa_end; + + /* + * PGD was populated as invalid_pmd_table or invalid_pud_table + * in pagetable_init() which depends on how many levels of page + * table you are using, but we had to clean the gpd of kasan + * shadow memory, as the pgd value is none-zero. + * The assertion pgd_none is going to be false and the formal populate + * afterwards is not going to create any new pgd at all. + */ + memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir)); + csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + /* Maps everything to a single page of zeroes */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)VMEMMAP_END)); + + kasan_early_stage = false; + + /* Populate the linear mapping */ + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = (void *)phys_to_virt(pa_start); + void *end = (void *)phys_to_virt(pa_end); + + if (start >= end) + break; + + kasan_map_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE); + } + + /* Populate modules mapping */ + kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR), + (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE); + /* + * KAsan may reuse the contents of kasan_zero_pte directly, so we + * should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte(&kasan_early_shadow_pte[i], + pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), + PAGE_KERNEL_RO)); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH); + local_flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized.\n"); +} diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index 9a1e032d21f0..6376f09fb2f3 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -2,6 +2,8 @@ # Objects to go into the VDSO. OBJECT_FILES_NON_STANDARD := y +KASAN_SANITIZE := n + # Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before # the inclusion of generic Makefile. ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT diff --git a/include/linux/kasan.h b/include/linux/kasan.h index c0b976dd138b..7b8f6a72551f 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -29,11 +29,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } +#endif /* Enable reporting bugs after kasan_disable_current() */ extern void kasan_enable_current(void); diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 542a9c18398e..6e5363f91aed 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -118,6 +118,8 @@ endchoice config KASAN_STACK_ENABLE bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST + depends on KASAN_GENERIC || KASAN_SW_TAGS + default y if (CC_IS_GCC && !LOONGARCH) help The LLVM stack address sanitizer has a know problem that causes excessive stack usage in a lot of functions, see diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 2efc48444e77..2c57c9bb00c0 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -175,10 +175,16 @@ static __always_inline bool check_memory_region_inline(unsigned long addr, if (unlikely(addr + size < addr)) return !kasan_report(addr, size, write, ret_ip); +#ifndef __HAVE_ARCH_SHADOW_MAP if (unlikely((void *)addr < kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { return !kasan_report(addr, size, write, ret_ip); } +#else + if (unlikely(kasan_mem_to_shadow((void *)addr) == NULL)) { + return !kasan_report(addr, size, write, ret_ip); + } +#endif if (likely(!memory_is_poisoned(addr, size))) return true; diff --git a/mm/kasan/init.c b/mm/kasan/init.c index b8c6ec172bb2..dcfce7431a42 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -170,8 +170,11 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, if (!p) return -ENOMEM; } else { - pud_populate(&init_mm, pud, - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); +#ifdef CONFIG_LOONGARCH + pmd_init((unsigned long)p, (unsigned long)invalid_pte_table); +#endif + pud_populate(&init_mm, pud, p); } } zero_pmd_populate(pud, addr, next); @@ -211,8 +214,11 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, if (!p) return -ENOMEM; } else { - p4d_populate(&init_mm, p4d, - early_alloc(PAGE_SIZE, NUMA_NO_NODE)); + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); +#ifdef CONFIG_LOONGARCH + pud_init((unsigned long)p, (unsigned long)invalid_pmd_table); +#endif + p4d_populate(&init_mm, p4d, p); } } zero_pud_populate(p4d, addr, next); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index ac499456740f..a75b1658f17a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -139,15 +139,21 @@ struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, struct kasan_free_meta *get_free_info(struct kmem_cache *cache, const void *object); +#ifndef __HAVE_ARCH_SHADOW_MAP static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT); } +#endif static inline bool addr_has_shadow(const void *addr) { +#ifdef __HAVE_ARCH_SHADOW_MAP + return (kasan_mem_to_shadow((void *)addr) != NULL); +#else return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); +#endif } void kasan_poison_shadow(const void *address, size_t size, u8 value); -- Gitee From d42854e62a20382758eaf2f136ec6e5f66a90221 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 9 Sep 2023 18:33:01 +0800 Subject: [PATCH 210/241] kasan: Cleanup the __HAVE_ARCH_SHADOW_MAP usage As Linus suggested, __HAVE_ARCH_XYZ is "stupid" and "having historical uses of it doesn't make it good". So migrate __HAVE_ARCH_SHADOW_MAP to separate macros named after the respective functions. Change-Id: I6b68ed4863d05b884d2bfea56df85c14a420c92f Suggested-by: Linus Torvalds Reviewed-by: WANG Xuerui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kasan.h | 10 ++++++++-- include/linux/kasan.h | 2 +- mm/kasan/kasan.h | 10 ++++------ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index d987ba4fe5cb..9c3bf7775904 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -10,8 +10,6 @@ #include #include -#define __HAVE_ARCH_SHADOW_MAP - #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) @@ -62,6 +60,7 @@ extern bool kasan_early_stage; extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; +#define kasan_mem_to_shadow kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { if (kasan_early_stage) { @@ -91,6 +90,7 @@ static inline void *kasan_mem_to_shadow(const void *addr) } } +#define kasan_shadow_to_mem kasan_shadow_to_mem static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { unsigned long addr = (unsigned long)shadow_addr; @@ -113,6 +113,12 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) } } +#define addr_has_metadata addr_has_metadata +static inline bool addr_has_metadata(const void *addr) +{ + return (kasan_mem_to_shadow((void *)addr) != NULL); +} + void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 7b8f6a72551f..09471d24b847 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -29,7 +29,7 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; int kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end); -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_mem_to_shadow static inline void *kasan_mem_to_shadow(const void *addr) { return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index a75b1658f17a..43befe117386 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -139,7 +139,7 @@ struct kasan_alloc_meta *get_alloc_info(struct kmem_cache *cache, struct kasan_free_meta *get_free_info(struct kmem_cache *cache, const void *object); -#ifndef __HAVE_ARCH_SHADOW_MAP +#ifndef kasan_shadow_to_mem static inline const void *kasan_shadow_to_mem(const void *shadow_addr) { return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) @@ -147,14 +147,12 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) } #endif -static inline bool addr_has_shadow(const void *addr) +#ifndef addr_has_metadata +static inline bool addr_has_metadata(const void *addr) { -#ifdef __HAVE_ARCH_SHADOW_MAP - return (kasan_mem_to_shadow((void *)addr) != NULL); -#else return (addr >= kasan_shadow_to_mem((void *)KASAN_SHADOW_START)); -#endif } +#endif void kasan_poison_shadow(const void *address, size_t size, u8 value); -- Gitee From 7bf2b9b9076d6cfb467a2a6e8943159c7dbe906e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 9 Sep 2023 19:42:10 +0800 Subject: [PATCH 211/241] LoongArch: Don't inline kasan_mem_to_shadow()/kasan_shadow_to_mem() As Linus suggested, kasan_mem_to_shadow()/kasan_shadow_to_mem() are not performance-critical and too big to inline. This is simply wrong so just define them out-of-line. If they really need to be inlined in future, such as the objtool / SMAP issue for X86, we should mark them __always_inline. Change-Id: I2400d6ed91f6834f3f64d696172b9c455abc320b Suggested-by: Linus Torvalds Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kasan.h | 51 ++---------------------------- arch/loongarch/mm/kasan_init.c | 51 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 49 deletions(-) diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h index 9c3bf7775904..e0d430ce9775 100644 --- a/arch/loongarch/include/asm/kasan.h +++ b/arch/loongarch/include/asm/kasan.h @@ -61,57 +61,10 @@ extern bool kasan_early_stage; extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; #define kasan_mem_to_shadow kasan_mem_to_shadow -static inline void *kasan_mem_to_shadow(const void *addr) -{ - if (kasan_early_stage) { - return (void *)(kasan_early_shadow_page); - } else { - unsigned long maddr = (unsigned long)addr; - unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; - unsigned long offset = 0; - - maddr &= XRANGE_SHADOW_MASK; - switch (xrange) { - case XKPRANGE_CC_SEG: - offset = XKPRANGE_CC_SHADOW_OFFSET; - break; - case XKPRANGE_UC_SEG: - offset = XKPRANGE_UC_SHADOW_OFFSET; - break; - case XKVRANGE_VC_SEG: - offset = XKVRANGE_VC_SHADOW_OFFSET; - break; - default: - WARN_ON(1); - return NULL; - } - - return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); - } -} +void *kasan_mem_to_shadow(const void *addr); #define kasan_shadow_to_mem kasan_shadow_to_mem -static inline const void *kasan_shadow_to_mem(const void *shadow_addr) -{ - unsigned long addr = (unsigned long)shadow_addr; - - if (unlikely(addr > KASAN_SHADOW_END) || - unlikely(addr < KASAN_SHADOW_START)) { - WARN_ON(1); - return NULL; - } - - if (addr >= XKVRANGE_VC_SHADOW_OFFSET) - return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); - else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) - return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); - else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) - return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); - else { - WARN_ON(1); - return NULL; - } -} +const void *kasan_shadow_to_mem(const void *shadow_addr); #define addr_has_metadata addr_has_metadata static inline bool addr_has_metadata(const void *addr) diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index d05f2a7c9d39..6690b6ce7fe2 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -23,6 +23,57 @@ static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); bool kasan_early_stage = true; +void *kasan_mem_to_shadow(const void *addr) +{ + if (kasan_early_stage) { + return (void *)(kasan_early_shadow_page); + } else { + unsigned long maddr = (unsigned long)addr; + unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; + unsigned long offset = 0; + + maddr &= XRANGE_SHADOW_MASK; + switch (xrange) { + case XKPRANGE_CC_SEG: + offset = XKPRANGE_CC_SHADOW_OFFSET; + break; + case XKPRANGE_UC_SEG: + offset = XKPRANGE_UC_SHADOW_OFFSET; + break; + case XKVRANGE_VC_SEG: + offset = XKVRANGE_VC_SHADOW_OFFSET; + break; + default: + WARN_ON(1); + return NULL; + } + + return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset); + } +} + +const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + unsigned long addr = (unsigned long)shadow_addr; + + if (unlikely(addr > KASAN_SHADOW_END) || + unlikely(addr < KASAN_SHADOW_START)) { + WARN_ON(1); + return NULL; + } + + if (addr >= XKVRANGE_VC_SHADOW_OFFSET) + return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START); + else if (addr >= XKPRANGE_UC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START); + else if (addr >= XKPRANGE_CC_SHADOW_OFFSET) + return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START); + else { + WARN_ON(1); + return NULL; + } +} + /* * Alloc memory for shadow memory page table. */ -- Gitee From ae27bf8dd3241e116758713eca73c902245cddf4 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 28 Jan 2024 22:43:38 +0800 Subject: [PATCH 212/241] LoongArch: Fix earlycon parameter if KASAN enabled The earlycon parameter is based on fixmap, and fixmap addresses are not supposed to be shadowed by KASAN. So return the kasan_early_shadow_page in kasan_mem_to_shadow() if the input address is above FIXADDR_START. Otherwise earlycon cannot work after kasan_init(). Change-Id: I2e814db3886a2bc20b97a708d0cfaa03dc7b1a9c Signed-off-by: Huacai Chen --- arch/loongarch/mm/kasan_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index 6690b6ce7fe2..b3eaceac1489 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -32,6 +32,9 @@ void *kasan_mem_to_shadow(const void *addr) unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff; unsigned long offset = 0; + if (maddr >= FIXADDR_START) + return (void *)(kasan_early_shadow_page); + maddr &= XRANGE_SHADOW_MASK; switch (xrange) { case XKPRANGE_CC_SEG: -- Gitee From 6524348a8616a93b2dc73678845a0a7fb73a3f5d Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Thu, 12 Jan 2023 08:36:25 +0800 Subject: [PATCH 213/241] LoongArch: Adjust PC value when unwind next frame in unwinder When state->first is not set, the PC is a return address in the previous frame. We need to adjust its value in case overflow to the next symbol. Change-Id: Ib15fcf3cc6675534cf387af7884090e8b53820d0 Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_prologue.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 73ba7cc0f2a6..5f8487b645ad 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -62,7 +62,7 @@ static bool unwind_by_prologue(struct unwind_state *state) struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; unsigned long frame_size = 0, frame_ra = -1; - unsigned long size, offset, pc = state->pc; + unsigned long size, offset, pc; struct pt_regs *regs; if (state->sp >= info->end || state->sp < info->begin) @@ -82,6 +82,11 @@ static bool unwind_by_prologue(struct unwind_state *state) return true; } + /* + * When first is not set, the PC is a return address in the previous frame. + * We need to adjust its value in case overflow to the next symbol. + */ + pc = state->pc - (state->first ? 0 : LOONGARCH_INSN_SIZE); if (!kallsyms_lookup_size_offset(pc, &size, &offset)) return false; -- Gitee From c8db6a0b97c1fd861ff1551d835b3c3fc12a8fe3 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Thu, 12 Jan 2023 08:36:24 +0800 Subject: [PATCH 214/241] LoongArch: Use correct sp value to get graph addr in stack unwinders The stack frame when function_graph enable like follows, --------- <- function sp_on_entry | | | FAKE_RA <- sp_on_entry - sizeof(pt_regs) + PT_R1 | --------- <- sp_on_entry - sizeof(pt_regs) So if we want to get the &FAKE_RA we should get sp_on_entry first. In the unwinder_prologue case, we can get the sp_on_entry as state->sp, because we try to calculate each CFA and the ra saved address. But in the unwinder_guess case, we cannot get it because we do not try to calculate the CFA. Although LoongArch have not fixed frame, the $ra is saved at CFA - 8 in most cases, we can try guess, too. As we store the pc in state, we not need to dereference state->sp, too. Change-Id: I4c54366db7f2c898529f2e5efd636c51ad39c0bf Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ptrace.h | 1 + arch/loongarch/include/asm/unwind.h | 10 ++++++++++ arch/loongarch/kernel/unwind_guess.c | 10 ++++------ arch/loongarch/kernel/unwind_orc.c | 10 ++-------- arch/loongarch/kernel/unwind_prologue.c | 19 +++++++------------ 5 files changed, 24 insertions(+), 26 deletions(-) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index 50991a55d77e..095479abef46 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -6,6 +6,7 @@ #define _ASM_PTRACE_H #include +#include #include #include diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h index fcbf6b53490d..9833dece49f9 100644 --- a/arch/loongarch/include/asm/unwind.h +++ b/arch/loongarch/include/asm/unwind.h @@ -8,6 +8,7 @@ #define _ASM_UNWIND_H #include +#include #include #include @@ -52,4 +53,13 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, vo static inline void unwind_init(void) {} static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif /* CONFIG_UNWINDER_ORC */ + +#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) + +static inline unsigned long unwind_graph_addr(struct unwind_state *state, + unsigned long pc, unsigned long cfa) +{ + return ftrace_graph_ret_addr(state->task, &state->graph_idx, + pc, (unsigned long *)(cfa - GRAPH_FAKE_OFFSET)); +} #endif /* _ASM_UNWIND_H */ diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c index a3c47a9e2d8c..7d113e57e7fa 100644 --- a/arch/loongarch/kernel/unwind_guess.c +++ b/arch/loongarch/kernel/unwind_guess.c @@ -12,10 +12,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state) if (unwind_done(state)) return 0; - if (state->first) - return state->pc; - - return *(unsigned long *)(state->sp); + return state->pc; } EXPORT_SYMBOL_GPL(unwind_get_return_address); @@ -35,8 +32,8 @@ bool unwind_next_frame(struct unwind_state *state) state->sp < info->end; state->sp += sizeof(unsigned long)) { addr = *(unsigned long *)(state->sp); - - if (__kernel_text_address(addr)) + state->pc = unwind_graph_addr(state, addr, state->sp + 8); + if (__kernel_text_address(state->pc)) return true; } @@ -59,6 +56,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, state->pc = regs->csr_era; state->first = true; + state->pc = unwind_graph_addr(state, state->pc, state->sp); get_stack_info(state->sp, state->task, &state->stack_info); if (!unwind_done(state) && !__kernel_text_address(state->pc)) diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c index ebed520ed935..5febcd8c2da4 100644 --- a/arch/loongarch/kernel/unwind_orc.c +++ b/arch/loongarch/kernel/unwind_orc.c @@ -378,8 +378,6 @@ static bool is_entry_func(unsigned long addr) addr < (unsigned long)&kernel_entry_end; } -#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1])) - bool unwind_next_frame(struct unwind_state *state) { struct stack_info *info = &state->stack_info; @@ -437,17 +435,13 @@ bool unwind_next_frame(struct unwind_state *state) if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long))) goto err; - pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, - *p, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); - + pc = unwind_graph_addr(state, *p, state->sp); pc -= INSN_LINK_OFFSET; } else if (orc->ra_reg == ORC_REG_UNDEFINED) { if (!state->ra || state->ra == state->pc) goto err; - pc = ftrace_graph_ret_addr(state->task, &state->graph_idx, - state->ra, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET)); - + pc = unwind_graph_addr(state, state->ra, state->sp); pc -= INSN_LINK_OFFSET; state->ra = 0; } else { diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 5f8487b645ad..2aa8337ace3c 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -14,14 +14,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state) if (unwind_done(state)) return 0; - if (state->enable) { - return state->pc; - } else { - if (state->first) - return state->pc; - - return *(unsigned long *)(state->sp); - } + return state->pc; } EXPORT_SYMBOL_GPL(unwind_get_return_address); @@ -156,7 +149,8 @@ static bool unwind_by_guess(struct unwind_state *state) state->sp += sizeof(unsigned long)) { addr = *(unsigned long *)(state->sp); - if (__kernel_text_address(addr)) + state->pc = unwind_graph_addr(state, addr, state->sp + 8); + if (__kernel_text_address(state->pc)) return true; } @@ -174,9 +168,10 @@ bool unwind_next_frame(struct unwind_state *state) do { if (state->enable) { - if (unwind_by_prologue(state)) + if (unwind_by_prologue(state)) { + state->pc = unwind_graph_addr(state, state->pc, state->sp); return true; - + } if (info->type == STACK_TYPE_IRQ && info->end == state->sp) { regs = (struct pt_regs *)info->next_sp; @@ -224,7 +219,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, state->sp = regs->regs[3]; state->ra = regs->regs[1]; state->first = true; - + state->pc = unwind_graph_addr(state, state->pc, state->sp); get_stack_info(state->sp, state->task, &state->stack_info); if (!unwind_done(state) && !__kernel_text_address(state->pc)) -- Gitee From 69e3bf45988b75e8f7d84088c438a4c8838f3295 Mon Sep 17 00:00:00 2001 From: lvjianmin Date: Thu, 29 Oct 2020 16:29:11 +0800 Subject: [PATCH 215/241] LoongArch: Add platform device drivers Change-Id: Id8b120985191809d05c05062e3807dc9ab377260 Signed-off-by: Huacai Chen --- drivers/platform/Kconfig | 2 + drivers/platform/Makefile | 1 + drivers/platform/loongarch/Kconfig | 38 ++ drivers/platform/loongarch/Makefile | 2 + drivers/platform/loongarch/cpu_hwmon.c | 196 ++++++ drivers/platform/loongarch/loongson-laptop.c | 631 +++++++++++++++++++ 6 files changed, 870 insertions(+) create mode 100644 drivers/platform/loongarch/Kconfig create mode 100644 drivers/platform/loongarch/Makefile create mode 100644 drivers/platform/loongarch/cpu_hwmon.c create mode 100644 drivers/platform/loongarch/loongson-laptop.c diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig index 971426bb4302..5920903485aa 100644 --- a/drivers/platform/Kconfig +++ b/drivers/platform/Kconfig @@ -6,6 +6,8 @@ if MIPS source "drivers/platform/mips/Kconfig" endif +source "drivers/platform/loongarch/Kconfig" + source "drivers/platform/goldfish/Kconfig" source "drivers/platform/chrome/Kconfig" diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile index 6fda58c021ca..c36d65ac4c4d 100644 --- a/drivers/platform/Makefile +++ b/drivers/platform/Makefile @@ -4,6 +4,7 @@ # obj-$(CONFIG_X86) += x86/ +obj-$(CONFIG_LOONGARCH) += loongarch/ obj-$(CONFIG_MELLANOX_PLATFORM) += mellanox/ obj-$(CONFIG_MIPS) += mips/ obj-$(CONFIG_OLPC_EC) += olpc/ diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig new file mode 100644 index 000000000000..3964c147c2e9 --- /dev/null +++ b/drivers/platform/loongarch/Kconfig @@ -0,0 +1,38 @@ +# +# LoongArch Platform Specific Drivers +# + +menuconfig LOONGARCH_PLATFORM_DEVICES + bool "LoongArch Platform Specific Device Drivers" + default y + depends on LOONGARCH + help + Say Y here to get to see options for device drivers of various + LoongArch platforms, including vendor-specific laptop/desktop + extension and hardware monitor drivers. This option itself does + not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if LOONGARCH_PLATFORM_DEVICES + +config CPU_HWMON + bool "Loongson CPU HWMon Driver" + depends on MACH_LOONGSON64 + select HWMON + default y + help + Loongson-3A/3B/3C CPU Hwmon (temperature sensor) driver. + +config LOONGSON_LAPTOP + tristate "Generic Loongson-3A Laptop Driver" + depends on ACPI + depends on BACKLIGHT_CLASS_DEVICE + depends on INPUT + depends on MACH_LOONGSON64 + select INPUT_SPARSEKMAP + default y + help + ACPI-based Loongson-3 family laptops generic driver. + +endif # LOONGARCH_PLATFORM_DEVICES diff --git a/drivers/platform/loongarch/Makefile b/drivers/platform/loongarch/Makefile new file mode 100644 index 000000000000..8038291bae3a --- /dev/null +++ b/drivers/platform/loongarch/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CPU_HWMON) += cpu_hwmon.o +obj-$(CONFIG_LOONGSON_LAPTOP) += loongson-laptop.o diff --git a/drivers/platform/loongarch/cpu_hwmon.c b/drivers/platform/loongarch/cpu_hwmon.c new file mode 100644 index 000000000000..3b736a6c0ac7 --- /dev/null +++ b/drivers/platform/loongarch/cpu_hwmon.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include + +#include + +static int nr_packages; +static struct device *cpu_hwmon_dev; + +static int loongson3_cpu_temp(int cpu) +{ + u32 reg; + + reg = iocsr_read32(LOONGARCH_IOCSR_CPUTEMP) & 0xff; + + return (int)((s8)reg) * 1000; +} + +static ssize_t cpu_temp_label(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int id = (to_sensor_dev_attr(attr))->index - 1; + return sprintf(buf, "CPU %d Temperature\n", id); +} + +static ssize_t get_cpu_temp(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int id = (to_sensor_dev_attr(attr))->index - 1; + int value = loongson3_cpu_temp(id); + return sprintf(buf, "%d\n", value); +} + +static SENSOR_DEVICE_ATTR(temp1_input, 0444, get_cpu_temp, NULL, 1); +static SENSOR_DEVICE_ATTR(temp1_label, 0444, cpu_temp_label, NULL, 1); +static SENSOR_DEVICE_ATTR(temp2_input, 0444, get_cpu_temp, NULL, 2); +static SENSOR_DEVICE_ATTR(temp2_label, 0444, cpu_temp_label, NULL, 2); +static SENSOR_DEVICE_ATTR(temp3_input, 0444, get_cpu_temp, NULL, 3); +static SENSOR_DEVICE_ATTR(temp3_label, 0444, cpu_temp_label, NULL, 3); +static SENSOR_DEVICE_ATTR(temp4_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp4_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp5_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp5_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp6_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp6_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp7_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp7_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp8_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp8_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp9_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp9_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp10_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp10_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp11_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp11_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp12_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp12_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp13_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp13_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp14_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp14_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp15_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp15_label, 0444, cpu_temp_label, NULL, 4); +static SENSOR_DEVICE_ATTR(temp16_input, 0444, get_cpu_temp, NULL, 4); +static SENSOR_DEVICE_ATTR(temp16_label, 0444, cpu_temp_label, NULL, 4); + +static struct attribute *cpu_hwmon_attributes[] = { + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_label.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_label.dev_attr.attr, + &sensor_dev_attr_temp4_input.dev_attr.attr, + &sensor_dev_attr_temp4_label.dev_attr.attr, + &sensor_dev_attr_temp5_input.dev_attr.attr, + &sensor_dev_attr_temp5_label.dev_attr.attr, + &sensor_dev_attr_temp6_input.dev_attr.attr, + &sensor_dev_attr_temp6_label.dev_attr.attr, + &sensor_dev_attr_temp7_input.dev_attr.attr, + &sensor_dev_attr_temp7_label.dev_attr.attr, + &sensor_dev_attr_temp8_input.dev_attr.attr, + &sensor_dev_attr_temp8_label.dev_attr.attr, + &sensor_dev_attr_temp9_input.dev_attr.attr, + &sensor_dev_attr_temp9_label.dev_attr.attr, + &sensor_dev_attr_temp10_input.dev_attr.attr, + &sensor_dev_attr_temp10_label.dev_attr.attr, + &sensor_dev_attr_temp11_input.dev_attr.attr, + &sensor_dev_attr_temp11_label.dev_attr.attr, + &sensor_dev_attr_temp12_input.dev_attr.attr, + &sensor_dev_attr_temp12_label.dev_attr.attr, + &sensor_dev_attr_temp13_input.dev_attr.attr, + &sensor_dev_attr_temp13_label.dev_attr.attr, + &sensor_dev_attr_temp14_input.dev_attr.attr, + &sensor_dev_attr_temp14_label.dev_attr.attr, + &sensor_dev_attr_temp15_input.dev_attr.attr, + &sensor_dev_attr_temp15_label.dev_attr.attr, + &sensor_dev_attr_temp16_input.dev_attr.attr, + &sensor_dev_attr_temp16_label.dev_attr.attr, + NULL +}; +static umode_t cpu_hwmon_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + int id = i / 2; + + if (id < nr_packages) + return attr->mode; + return 0; +} + +static struct attribute_group cpu_hwmon_group = { + .attrs = cpu_hwmon_attributes, + .is_visible = cpu_hwmon_is_visible, +}; + +static const struct attribute_group *cpu_hwmon_groups[] = { + &cpu_hwmon_group, + NULL +}; + +static int cpu_initial_threshold = 72000; +static int cpu_thermal_threshold = 96000; +module_param(cpu_thermal_threshold, int, 0644); +MODULE_PARM_DESC(cpu_thermal_threshold, "cpu thermal threshold (96000 (default))"); + +static struct delayed_work thermal_work; + +static void do_thermal_timer(struct work_struct *work) +{ + int i, value, temp_max = 0; + + for (i=0; i temp_max) + temp_max = value; + } + + if (temp_max <= cpu_thermal_threshold) + schedule_delayed_work(&thermal_work, msecs_to_jiffies(5000)); + else + orderly_poweroff(true); +} + +static int __init loongson_hwmon_init(void) +{ + int i, value, temp_max = 0; + + pr_info("Loongson Hwmon Enter...\n"); + + nr_packages = loongson_sysconf.nr_cpus / + loongson_sysconf.cores_per_package; + + cpu_hwmon_dev = hwmon_device_register_with_groups(NULL, "cpu_hwmon", + NULL, cpu_hwmon_groups); + if (IS_ERR(cpu_hwmon_dev)) { + pr_err("Hwmon register fail with %ld!\n", PTR_ERR(cpu_hwmon_dev)); + return PTR_ERR(cpu_hwmon_dev); + } + + for (i = 0; i < nr_packages; i++) { + value = loongson3_cpu_temp(i); + if (value > temp_max) + temp_max = value; + } + + pr_info("Initial CPU temperature is %d (highest).\n", temp_max); + if (temp_max > cpu_initial_threshold) + cpu_thermal_threshold += temp_max - cpu_initial_threshold; + + INIT_DEFERRABLE_WORK(&thermal_work, do_thermal_timer); + schedule_delayed_work(&thermal_work, msecs_to_jiffies(20000)); + + return 0; +} + +static void __exit loongson_hwmon_exit(void) +{ + cancel_delayed_work_sync(&thermal_work); + hwmon_device_unregister(cpu_hwmon_dev); +} + +module_init(loongson_hwmon_init); +module_exit(loongson_hwmon_exit); + +MODULE_AUTHOR("Yu Xiang "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("Loongson CPU Hwmon driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/platform/loongarch/loongson-laptop.c b/drivers/platform/loongarch/loongson-laptop.c new file mode 100644 index 000000000000..e8981c436651 --- /dev/null +++ b/drivers/platform/loongarch/loongson-laptop.c @@ -0,0 +1,631 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic Loongson processor based LAPTOP/ALL-IN-ONE driver + * + * Jianmin Lv + * Huacai Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* 1. Driver-wide structs and misc. variables */ + +/* ACPI HIDs */ +#define LOONGSON_ACPI_EC_HID "PNP0C09" +#define LOONGSON_ACPI_HKEY_HID "LOON0000" + +#define ACPI_LAPTOP_NAME "loongson-laptop" +#define ACPI_LAPTOP_ACPI_EVENT_PREFIX "loongson" + +#define MAX_ACPI_ARGS 3 +#define GENERIC_HOTKEY_MAP_MAX 64 + +#define GENERIC_EVENT_TYPE_OFF 12 +#define GENERIC_EVENT_TYPE_MASK 0xF000 +#define GENERIC_EVENT_CODE_MASK 0x0FFF + +struct generic_sub_driver { + u32 type; + char *name; + acpi_handle *handle; + struct acpi_device *device; + struct platform_driver *driver; + int (*init)(struct generic_sub_driver *sub_driver); + void (*notify)(struct generic_sub_driver *sub_driver, u32 event); + u8 acpi_notify_installed; +}; + +static u32 input_device_registered; +static struct input_dev *generic_inputdev; + +static acpi_handle hotkey_handle; +static struct key_entry hotkey_keycode_map[GENERIC_HOTKEY_MAP_MAX]; + +int loongson_laptop_turn_on_backlight(void); +int loongson_laptop_turn_off_backlight(void); +static int loongson_laptop_backlight_update(struct backlight_device *bd); + +/* 2. ACPI Helpers and device model */ + +static int acpi_evalf(acpi_handle handle, int *res, char *method, char *fmt, ...) +{ + char res_type; + char *fmt0 = fmt; + va_list ap; + int success, quiet; + acpi_status status; + struct acpi_object_list params; + struct acpi_buffer result, *resultp; + union acpi_object in_objs[MAX_ACPI_ARGS], out_obj; + + if (!*fmt) { + pr_err("acpi_evalf() called with empty format\n"); + return 0; + } + + if (*fmt == 'q') { + quiet = 1; + fmt++; + } else + quiet = 0; + + res_type = *(fmt++); + + params.count = 0; + params.pointer = &in_objs[0]; + + va_start(ap, fmt); + while (*fmt) { + char c = *(fmt++); + switch (c) { + case 'd': /* int */ + in_objs[params.count].integer.value = va_arg(ap, int); + in_objs[params.count++].type = ACPI_TYPE_INTEGER; + break; + /* add more types as needed */ + default: + pr_err("acpi_evalf() called with invalid format character '%c'\n", c); + va_end(ap); + return 0; + } + } + va_end(ap); + + if (res_type != 'v') { + result.length = sizeof(out_obj); + result.pointer = &out_obj; + resultp = &result; + } else + resultp = NULL; + + status = acpi_evaluate_object(handle, method, ¶ms, resultp); + + switch (res_type) { + case 'd': /* int */ + success = (status == AE_OK && out_obj.type == ACPI_TYPE_INTEGER); + if (success && res) + *res = out_obj.integer.value; + break; + case 'v': /* void */ + success = status == AE_OK; + break; + /* add more types as needed */ + default: + pr_err("acpi_evalf() called with invalid format character '%c'\n", res_type); + return 0; + } + + if (!success && !quiet) + pr_err("acpi_evalf(%s, %s, ...) failed: %s\n", + method, fmt0, acpi_format_exception(status)); + + return success; +} + +static int hotkey_status_get(int *status) +{ + if (!acpi_evalf(hotkey_handle, status, "GSWS", "d")) + return -EIO; + + return 0; +} + +static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data) +{ + struct generic_sub_driver *sub_driver = data; + + if (!sub_driver || !sub_driver->notify) + return; + sub_driver->notify(sub_driver, event); +} + +static int __init setup_acpi_notify(struct generic_sub_driver *sub_driver) +{ + int rc; + acpi_status status; + + if (!*sub_driver->handle) + return 0; + + rc = acpi_bus_get_device(*sub_driver->handle, &sub_driver->device); + if (rc < 0) { + pr_err("acpi_bus_get_device(%s) failed: %d\n", sub_driver->name, rc); + return -ENODEV; + } + + sub_driver->device->driver_data = sub_driver; + sprintf(acpi_device_class(sub_driver->device), "%s/%s", + ACPI_LAPTOP_ACPI_EVENT_PREFIX, sub_driver->name); + + status = acpi_install_notify_handler(*sub_driver->handle, + sub_driver->type, dispatch_acpi_notify, sub_driver); + if (ACPI_FAILURE(status)) { + if (status == AE_ALREADY_EXISTS) { + pr_notice("Another device driver is already " + "handling %s events\n", sub_driver->name); + } else { + pr_err("acpi_install_notify_handler(%s) failed: %s\n", + sub_driver->name, acpi_format_exception(status)); + } + return -ENODEV; + } + sub_driver->acpi_notify_installed = 1; + + return 0; +} + +#ifdef CONFIG_PM +static int loongson_hotkey_suspend(struct device *dev) +{ + return 0; +} + +static int loongson_hotkey_resume(struct device *dev) +{ + int status = 0; + struct key_entry ke; + struct backlight_device *bd; + + /* + * Only if the firmware supports SW_LID event model, we can handle the + * event. This is for the consideration of development board without EC. + */ + if (test_bit(SW_LID, generic_inputdev->swbit)) { + if (hotkey_status_get(&status) < 0) + return -EIO; + /* + * The input device sw element records the last lid status. + * When the system is awakened by other wake-up sources, + * the lid event will also be reported. The judgment of + * adding SW_LID bit which in sw element can avoid this + * case. + * + * Input system will drop lid event when current lid event + * value and last lid status in the same. So laptop driver + * doesn't report repeated events. + * + * Lid status is generally 0, but hardware exception is + * considered. So add lid status confirmation. + */ + if (test_bit(SW_LID, generic_inputdev->sw) && !(status & (1 << SW_LID))) { + ke.type = KE_SW; + ke.sw.value = (u8)status; + ke.sw.code = SW_LID; + sparse_keymap_report_entry(generic_inputdev, &ke, 1, true); + } + } + + bd = backlight_device_get_by_type(BACKLIGHT_PLATFORM); + if (bd) { + loongson_laptop_backlight_update(bd) ? + pr_warn("Loongson_backlight: resume brightness failed") : + pr_info("Loongson_backlight: resume brightness %d\n", bd->props.brightness); + } + + return 0; +} + +static SIMPLE_DEV_PM_OPS(loongson_hotkey_pm, + loongson_hotkey_suspend, loongson_hotkey_resume); +#endif + +static int loongson_hotkey_probe(struct platform_device *pdev) +{ + hotkey_handle = ACPI_HANDLE(&pdev->dev); + + if (!hotkey_handle) + return -ENODEV; + + return 0; +} + +static const struct acpi_device_id loongson_device_ids[] = { + {LOONGSON_ACPI_HKEY_HID, 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, loongson_device_ids); + +static struct platform_driver loongson_hotkey_driver = { + .probe = loongson_hotkey_probe, + .driver = { + .name = "loongson-hotkey", + .owner = THIS_MODULE, + .pm = pm_ptr(&loongson_hotkey_pm), + .acpi_match_table = loongson_device_ids, + }, +}; + +static int hotkey_map(void) +{ + u32 index; + acpi_status status; + struct acpi_buffer buf; + union acpi_object *pack; + + buf.length = ACPI_ALLOCATE_BUFFER; + status = acpi_evaluate_object_typed(hotkey_handle, "KMAP", NULL, &buf, ACPI_TYPE_PACKAGE); + if (status != AE_OK) { + pr_err("ACPI exception: %s\n", acpi_format_exception(status)); + return -1; + } + pack = buf.pointer; + for (index = 0; index < pack->package.count; index++) { + union acpi_object *element, *sub_pack; + + sub_pack = &pack->package.elements[index]; + + element = &sub_pack->package.elements[0]; + hotkey_keycode_map[index].type = element->integer.value; + element = &sub_pack->package.elements[1]; + hotkey_keycode_map[index].code = element->integer.value; + element = &sub_pack->package.elements[2]; + hotkey_keycode_map[index].keycode = element->integer.value; + } + + return 0; +} + +static int hotkey_backlight_set(bool enable) +{ + if (!acpi_evalf(hotkey_handle, NULL, "VCBL", "vd", enable ? 1 : 0)) + return -EIO; + + return 0; +} + +static int ec_get_brightness(void) +{ + int status = 0; + + if (!hotkey_handle) + return -ENXIO; + + if (!acpi_evalf(hotkey_handle, &status, "ECBG", "d")) + return -EIO; + + return status; +} + +static int ec_set_brightness(int level) +{ + + int ret = 0; + + if (!hotkey_handle) + return -ENXIO; + + if (!acpi_evalf(hotkey_handle, NULL, "ECBS", "vd", level)) + ret = -EIO; + + return ret; +} + +static int ec_backlight_level(u8 level) +{ + int status = 0; + + if (!hotkey_handle) + return -ENXIO; + + if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d")) + return -EIO; + + if ((status < 0) || (level > status)) + return status; + + if (!acpi_evalf(hotkey_handle, &status, "ECSL", "d")) + return -EIO; + + if ((status < 0) || (level < status)) + return status; + + return level; +} + +static int loongson_laptop_backlight_update(struct backlight_device *bd) +{ + int lvl = ec_backlight_level(bd->props.brightness); + if (lvl < 0) + return -EIO; + if (ec_set_brightness(lvl)) + return -EIO; + + return 0; +} + +static int loongson_laptop_get_brightness(struct backlight_device *bd) +{ + int level; + + level = ec_get_brightness(); + if (level < 0) + return -EIO; + + return level; +} + +static const struct backlight_ops backlight_laptop_ops = { + .update_status = loongson_laptop_backlight_update, + .get_brightness = loongson_laptop_get_brightness, +}; + +static int laptop_backlight_register(void) +{ + int status = 0; + struct backlight_properties props; + + memset(&props, 0, sizeof(props)); + + if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d")) + return -EIO; + + props.brightness = 1; + props.max_brightness = status; + props.type = BACKLIGHT_PLATFORM; + + backlight_device_register("loongson_laptop", + NULL, NULL, &backlight_laptop_ops, &props); + + return 0; +} + +int loongson_laptop_turn_on_backlight(void) +{ + int status; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + + arg0.integer.value = 1; + status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); + if (ACPI_FAILURE(status)) { + pr_info("Loongson lvds error:0x%x\n", status); + return -ENODEV; + } + return 0; +} + +int loongson_laptop_turn_off_backlight(void) +{ + int status; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list args = { 1, &arg0 }; + + arg0.integer.value = 0; + status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); + if (ACPI_FAILURE(status)) { + pr_info("Loongson lvds error:0x%x\n", status); + return -ENODEV; + } + return 0; +} + +static int __init event_init(struct generic_sub_driver *sub_driver) +{ + int ret; + + ret = hotkey_map(); + if (ret < 0) { + pr_err("Failed to parse keymap from DSDT\n"); + return ret; + } + + ret = sparse_keymap_setup(generic_inputdev, hotkey_keycode_map, NULL); + if (ret < 0) { + pr_err("Failed to setup input device keymap\n"); + input_free_device(generic_inputdev); + + return ret; + } + + /* + * This hotkey driver handle backlight event when + * acpi_video_get_backlight_type() gets acpi_backlight_vendor + */ + if (acpi_video_get_backlight_type() == acpi_backlight_vendor) + hotkey_backlight_set(true); + else + hotkey_backlight_set(false); + + pr_info("ACPI: enabling firmware HKEY event interface...\n"); + + return ret; +} + +static void event_notify(struct generic_sub_driver *sub_driver, u32 event) +{ + int type, scan_code; + struct key_entry *ke = NULL; + + scan_code = event & GENERIC_EVENT_CODE_MASK; + type = (event & GENERIC_EVENT_TYPE_MASK) >> GENERIC_EVENT_TYPE_OFF; + ke = sparse_keymap_entry_from_scancode(generic_inputdev, scan_code); + if (ke) { + if (type == KE_SW) { + int status = 0; + + if (hotkey_status_get(&status) < 0) + return; + ke->sw.value = !!(status & (1 << ke->sw.code)); + } + sparse_keymap_report_entry(generic_inputdev, ke, 1, true); + } +} + +/* 3. Infrastructure */ + +static void generic_subdriver_exit(struct generic_sub_driver *sub_driver); + +static int __init generic_subdriver_init(struct generic_sub_driver *sub_driver) +{ + int ret; + + if (!sub_driver || !sub_driver->driver) + return -EINVAL; + + ret = platform_driver_register(sub_driver->driver); + if (ret) + return -EINVAL; + + if (sub_driver->init) + sub_driver->init(sub_driver); + + if (sub_driver->notify) { + ret = setup_acpi_notify(sub_driver); + if (ret == -ENODEV) { + ret = 0; + goto err_out; + } + if (ret < 0) + goto err_out; + } + + return 0; + +err_out: + generic_subdriver_exit(sub_driver); + return (ret < 0) ? ret : 0; +} + +static void generic_subdriver_exit(struct generic_sub_driver *sub_driver) +{ + + if (sub_driver->acpi_notify_installed) { + acpi_remove_notify_handler(*sub_driver->handle, + sub_driver->type, dispatch_acpi_notify); + sub_driver->acpi_notify_installed = 0; + } + platform_driver_unregister(sub_driver->driver); +} + +static struct generic_sub_driver generic_sub_drivers[] __refdata = { + { + .name = "hotkey", + .init = event_init, + .notify = event_notify, + .handle = &hotkey_handle, + .type = ACPI_DEVICE_NOTIFY, + .driver = &loongson_hotkey_driver, + }, +}; + +static int __init generic_acpi_laptop_init(void) +{ + bool ec_found; + int i, ret, status; + + if (acpi_disabled) + return -ENODEV; + + /* The EC device is required */ + ec_found = acpi_dev_found(LOONGSON_ACPI_EC_HID); + if (!ec_found) + return -ENODEV; + + /* Enable SCI for EC */ + acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1); + + generic_inputdev = input_allocate_device(); + if (!generic_inputdev) { + pr_err("Unable to allocate input device\n"); + return -ENOMEM; + } + + /* Prepare input device, but don't register */ + generic_inputdev->name = + "Loongson Generic Laptop/All-in-One Extra Buttons"; + generic_inputdev->phys = ACPI_LAPTOP_NAME "/input0"; + generic_inputdev->id.bustype = BUS_HOST; + generic_inputdev->dev.parent = NULL; + + /* Init subdrivers */ + for (i = 0; i < ARRAY_SIZE(generic_sub_drivers); i++) { + ret = generic_subdriver_init(&generic_sub_drivers[i]); + if (ret < 0) { + input_free_device(generic_inputdev); + while (--i >= 0) + generic_subdriver_exit(&generic_sub_drivers[i]); + return ret; + } + } + + ret = input_register_device(generic_inputdev); + if (ret < 0) { + input_free_device(generic_inputdev); + while (--i >= 0) + generic_subdriver_exit(&generic_sub_drivers[i]); + pr_err("Unable to register input device\n"); + return ret; + } + + input_device_registered = 1; + + if (acpi_evalf(hotkey_handle, &status, "ECBG", "d")) { + pr_info("Loongson Laptop used, init brightness is 0x%x\n", status); + ret = laptop_backlight_register(); + if (ret < 0) + pr_err("Loongson Laptop: laptop-backlight device register failed\n"); + } + + return 0; +} + +static void __exit generic_acpi_laptop_exit(void) +{ + if (generic_inputdev) { + if (input_device_registered) + input_unregister_device(generic_inputdev); + else + input_free_device(generic_inputdev); + } +} + +module_init(generic_acpi_laptop_init); +module_exit(generic_acpi_laptop_exit); + +MODULE_AUTHOR("Jianmin Lv "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("Loongson Laptop/All-in-One ACPI Driver"); +MODULE_LICENSE("GPL"); -- Gitee From d8808d1609c9533d4e0c1bb49c69963d1dd415ca Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 23 Sep 2021 20:22:57 +0800 Subject: [PATCH 216/241] LoongArch: Add LS7A IOMMU support Add driver support for the IOMMU in LS7A. If you need to enable it, please add the loongson_iommu=on kernel parameter. Change-Id: I59ca537084ea397fbddec04c25584b9bef5df94a Signed-off-by: Huacai Chen --- drivers/iommu/Kconfig | 1 + drivers/iommu/Makefile | 2 +- drivers/iommu/loongson/Kconfig | 14 + drivers/iommu/loongson/Makefile | 2 + drivers/iommu/loongson/iommu.c | 2039 +++++++++++++++++++++++++++++++ drivers/iommu/loongson/iommu.h | 219 ++++ drivers/vfio/Kconfig | 2 +- 7 files changed, 2277 insertions(+), 2 deletions(-) create mode 100644 drivers/iommu/loongson/Kconfig create mode 100644 drivers/iommu/loongson/Makefile create mode 100644 drivers/iommu/loongson/iommu.c create mode 100644 drivers/iommu/loongson/iommu.h diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 04878caf6da4..7dfd23262927 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -131,6 +131,7 @@ config MSM_IOMMU source "drivers/iommu/amd/Kconfig" source "drivers/iommu/intel/Kconfig" +source "drivers/iommu/loongson/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 11f1771104f3..8059c094910d 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += amd/ intel/ arm/ +obj-y += amd/ intel/ arm/ loongson/ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o diff --git a/drivers/iommu/loongson/Kconfig b/drivers/iommu/loongson/Kconfig new file mode 100644 index 000000000000..7b5d1b947c03 --- /dev/null +++ b/drivers/iommu/loongson/Kconfig @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Loongson IOMMU support +config LOONGSON_IOMMU + bool "Loongson IOMMU support" + select IOMMU_API + select IOMMU_DMA + select IOMMU_DEFAULT_PASSTHROUGH + depends on LOONGARCH + help + With this option you can enable support for Loongson IOMMU hardware in + your system. An IOMMU is a hardware component which provides remapping + of DMA memory accesses from devices. With an IOMMU you can isolate the + DMA memory of different devices and protect the system from misbehaving + device drivers or hardware. diff --git a/drivers/iommu/loongson/Makefile b/drivers/iommu/loongson/Makefile new file mode 100644 index 000000000000..99e38e56ebc2 --- /dev/null +++ b/drivers/iommu/loongson/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_LOONGSON_IOMMU) += iommu.o diff --git a/drivers/iommu/loongson/iommu.c b/drivers/iommu/loongson/iommu.c new file mode 100644 index 000000000000..9052b7b30827 --- /dev/null +++ b/drivers/iommu/loongson/iommu.c @@ -0,0 +1,2039 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2020-2021 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iommu.h" + +#define LOOP_TIMEOUT 100000 +#define IOVA_START (SZ_256M) +#define IOVA_END0 (SZ_2G + SZ_256M) + +#define IVRS_HEADER_LENGTH 48 +#define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40 +#define IVHD_DEV_ALL 0x01 +#define IVHD_DEV_SELECT 0x02 +#define IVHD_DEV_SELECT_RANGE_START 0x03 +#define IVHD_DEV_RANGE_END 0x04 +#define IVHD_DEV_ALIAS 0x42 +#define IVHD_DEV_EXT_SELECT 0x46 +#define IVHD_DEV_ACPI_HID 0xf0 + +#define IVHD_HEAD_TYPE10 0x10 +#define IVHD_HEAD_TYPE11 0x11 +#define IVHD_HEAD_TYPE40 0x40 + +#define MAX_BDF_NUM 0xffff + +#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) + +/* + * structure describing one IOMMU in the ACPI table. Typically followed by one + * or more ivhd_entrys. + */ +struct ivhd_header { + u8 type; + u8 flags; + u16 length; + u16 devid; + u16 cap_ptr; + u64 mmio_phys; + u16 pci_seg; + u16 info; + u32 efr_attr; + + /* Following only valid on IVHD type 11h and 40h */ + u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */ + u64 res; +} __attribute__((packed)); + +/* + * A device entry describing which devices a specific IOMMU translates and + * which requestor ids they use. + */ +struct ivhd_entry { + u8 type; + u16 devid; + u8 flags; + u32 ext; + u32 hidh; + u64 cid; + u8 uidf; + u8 uidl; + u8 uid; +} __attribute__((packed)); + +LIST_HEAD(loongson_iommu_list); /* list of all IOMMUs in the system */ +LIST_HEAD(loongson_rlookup_iommu_list); + +static u32 rlookup_table_size; /* size if the rlookup table */ +static int loongson_iommu_target_ivhd_type; +u16 loongson_iommu_last_bdf; /* largest PCI device id we have to handle */ + +int loongson_iommu_disable; +static struct iommu_ops loongson_iommu_ops; + +static void iommu_write_regl(loongson_iommu *iommu, unsigned long off, u32 val) +{ + *(u32 *)(iommu->membase + off) = val; + iob(); +} + +static u32 iommu_read_regl(loongson_iommu *iommu, unsigned long off) +{ + u32 val; + + val = *(u32 *)(iommu->membase + off); + iob(); + return val; +} + +static void iommu_translate_disable(loongson_iommu *iommu) +{ + u32 val; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + val = iommu_read_regl(iommu, LA_IOMMU_EIVDB); + + /* Disable */ + val &= ~(1 << 31); + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + /* Write cmd */ + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); +} + +static void iommu_translate_enable(loongson_iommu *iommu) +{ + u32 val = 0; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + val = iommu_read_regl(iommu, LA_IOMMU_EIVDB); + + /* Enable */ + val |= (1 << 31); + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + /* Write cmd */ + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); +} + +static bool loongson_iommu_capable(enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + return false; + } +} + +static dom_info *to_dom_info(struct iommu_domain *dom) +{ + return container_of(dom, dom_info, domain); +} + +/* + * Check whether the system has a priv. + * If yes, it returns 1 and if not, it returns 0 + */ +static int has_dom(loongson_iommu *iommu) +{ + spin_lock(&iommu->dom_info_lock); + while (!list_empty(&iommu->dom_list)) { + spin_unlock(&iommu->dom_info_lock); + return 1; + } + spin_unlock(&iommu->dom_info_lock); + + return 0; +} + +static int update_dev_table(struct loongson_iommu_dev_data *dev_data, int flag) +{ + u32 val = 0; + int index; + unsigned short bdf; + loongson_iommu *iommu; + u16 domain_id; + + if (dev_data == NULL) { + pr_err("%s dev_data is NULL", __func__); + return 0; + } + + if (dev_data->iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + if (dev_data->iommu_entry == NULL) { + pr_err("%s iommu_entry is NULL", __func__); + return 0; + } + + iommu = dev_data->iommu; + domain_id = dev_data->iommu_entry->id; + bdf = dev_data->bdf; + + /* Set device table */ + if (flag) { + index = find_first_zero_bit(iommu->devtable_bitmap, MAX_ATTACHED_DEV_ID); + if (index < MAX_ATTACHED_DEV_ID) { + __set_bit(index, iommu->devtable_bitmap); + dev_data->index = index; + } else { + pr_err("%s get id from dev table failed\n", __func__); + return 0; + } + + pr_info("%s bdf %x domain_id %d index %x" + " iommu segment %d flag %x\n", + __func__, bdf, domain_id, index, + iommu->segment, flag); + + val = bdf & 0xffff; + val |= ((domain_id & 0xf) << 16); /* domain id */ + val |= ((index & 0xf) << 24); /* index */ + val |= (0x1 << 20); /* valid */ + val |= (0x1 << 31); /* enable */ + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); + } else { + /* Flush device table */ + index = dev_data->index; + pr_info("%s bdf %x domain_id %d index %x" + " iommu segment %d flag %x\n", + __func__, bdf, domain_id, index, + iommu->segment, flag); + + val = iommu_read_regl(iommu, LA_IOMMU_EIVDB); + val &= ~(0x7fffffff); + val |= ((index & 0xf) << 24); /* index */ + iommu_write_regl(iommu, LA_IOMMU_EIVDB, val); + + val = iommu_read_regl(iommu, LA_IOMMU_CMD); + val &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, val); + + if (index < MAX_ATTACHED_DEV_ID) + __clear_bit(index, iommu->devtable_bitmap); + } + + return 0; +} + +static void flush_iotlb(loongson_iommu *iommu) +{ + u32 val, cmd; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + val = iommu_read_regl(iommu, LA_IOMMU_VBTC); + val &= ~0x1f; + + /* Flush all tlb */ + val |= 0x5; + iommu_write_regl(iommu, LA_IOMMU_VBTC, val); + + cmd = iommu_read_regl(iommu, LA_IOMMU_CMD); + cmd &= 0xfffffffc; + iommu_write_regl(iommu, LA_IOMMU_CMD, cmd); +} + +static int flush_pgtable_is_busy(loongson_iommu *iommu) +{ + u32 val; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + val = iommu_read_regl(iommu, LA_IOMMU_VBTC); + + return val & IOMMU_PGTABLE_BUSY; +} + +static int __iommu_flush_iotlb_all(loongson_iommu *iommu) +{ + u32 retry = 0; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return 0; + } + + flush_iotlb(iommu); + while (flush_pgtable_is_busy(iommu)) { + if (retry == LOOP_TIMEOUT) { + pr_err("Loongson-IOMMU: iotlb flush busy\n"); + return -EIO; + } + retry++; + udelay(1); + } + iommu_translate_enable(iommu); + + return 0; +} + +static void priv_flush_iotlb_pde(loongson_iommu *iommu) +{ + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + __iommu_flush_iotlb_all(iommu); +} + +static void do_attach(iommu_info *info, struct loongson_iommu_dev_data *dev_data) +{ + if (!dev_data->count) + return; + + dev_data->iommu_entry = info; + + spin_lock(&info->devlock); + list_add(&dev_data->list, &info->dev_list); + info->dev_cnt += 1; + spin_unlock(&info->devlock); + + update_dev_table(dev_data, 1); + if (info->dev_cnt > 0) + priv_flush_iotlb_pde(dev_data->iommu); +} + +static void do_detach(struct loongson_iommu_dev_data *dev_data) +{ + iommu_info *iommu_entry = NULL; + + if (dev_data == NULL) { + pr_err("%s dev_data is NULL", __func__); + return; + } + + if (dev_data->count) + return; + + iommu_entry = dev_data->iommu_entry; + if (iommu_entry == NULL) { + pr_err("%s iommu_entry is NULL", __func__); + return; + } + + list_del(&dev_data->list); + iommu_entry->dev_cnt -= 1; + + update_dev_table(dev_data, 0); + dev_data->iommu_entry = NULL; +} + +static void cleanup_iommu_entry(iommu_info *iommu_entry) +{ + struct loongson_iommu_dev_data *dev_data = NULL; + + spin_lock(&iommu_entry->devlock); + while (!list_empty(&iommu_entry->dev_list)) { + dev_data = list_first_entry(&iommu_entry->dev_list, + struct loongson_iommu_dev_data, list); + do_detach(dev_data); + } + + spin_unlock(&iommu_entry->devlock); +} + +static int domain_id_alloc(loongson_iommu *iommu) +{ + int id = -1; + + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return id; + } + + spin_lock(&iommu->domain_bitmap_lock); + id = find_first_zero_bit(iommu->domain_bitmap, MAX_DOMAIN_ID); + if (id < MAX_DOMAIN_ID) + __set_bit(id, iommu->domain_bitmap); + else + pr_err("Loongson-IOMMU: Alloc domain id over max domain id\n"); + + spin_unlock(&iommu->domain_bitmap_lock); + + return id; +} + +static void domain_id_free(loongson_iommu *iommu, int id) +{ + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + spin_lock(&iommu->domain_bitmap_lock); + if ((id >= 0) && (id < MAX_DOMAIN_ID)) + __clear_bit(id, iommu->domain_bitmap); + + spin_unlock(&iommu->domain_bitmap_lock); +} + +/* + * This function adds a private domain to the global domain list + */ +static void add_domain_to_list(loongson_iommu *iommu, dom_info *priv) +{ + spin_lock(&iommu->dom_info_lock); + list_add(&priv->list, &iommu->dom_list); + spin_unlock(&iommu->dom_info_lock); +} + +static void del_domain_from_list(loongson_iommu *iommu, dom_info *priv) +{ + spin_lock(&iommu->dom_info_lock); + list_del(&priv->list); + spin_unlock(&iommu->dom_info_lock); +} + +static spt_entry *iommu_zalloc_page(loongson_iommu *iommu) +{ + int index; + void *addr; + spt_entry *shd_entry; + + spin_lock(&iommu->pgtable_bitmap_lock); + index = find_first_zero_bit(iommu->pgtable_bitmap, iommu->maxpages); + if (index < iommu->maxpages) + __set_bit(index, iommu->pgtable_bitmap); + spin_unlock(&iommu->pgtable_bitmap_lock); + + shd_entry = NULL; + if (index < iommu->maxpages) { + shd_entry = kmalloc(sizeof(*shd_entry), GFP_KERNEL); + if (!shd_entry) { + pr_err("%s alloc memory for shadow page entry failed\n", __func__); + goto fail; + } + + shd_entry->shadow_ptable = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!shd_entry->shadow_ptable) { + pr_err("Loongson-IOMMU: get zeroed page err\n"); + kfree(shd_entry); + goto fail; + } + + addr = iommu->pgtbase + index * IOMMU_PAGE_SIZE; + memset(addr, 0x0, IOMMU_PAGE_SIZE); + shd_entry->index = index; + shd_entry->gmem_ptable = addr; + } + + return shd_entry; +fail: + spin_lock(&iommu->pgtable_bitmap_lock); + __clear_bit(index, iommu->pgtable_bitmap); + spin_unlock(&iommu->pgtable_bitmap_lock); + return NULL; +} + +static void iommu_free_page(loongson_iommu *iommu, spt_entry *shadw_entry) +{ + void *addr; + + if (shadw_entry->index < iommu->maxpages) { + addr = shadw_entry->gmem_ptable; + memset(addr, 0x0, IOMMU_PAGE_SIZE); + + spin_lock(&iommu->pgtable_bitmap_lock); + __clear_bit(shadw_entry->index, iommu->pgtable_bitmap); + spin_unlock(&iommu->pgtable_bitmap_lock); + + shadw_entry->index = -1; + free_page((unsigned long)shadw_entry->shadow_ptable); + shadw_entry->shadow_ptable = NULL; + shadw_entry->gmem_ptable = NULL; + kfree(shadw_entry); + } +} + +static void free_pagetable_one_level(iommu_info *iommu_entry, spt_entry *shd_entry, int level) +{ + int i; + unsigned long *psentry; + spt_entry *shd_entry_tmp; + loongson_iommu *iommu = iommu_entry->iommu; + + psentry = (unsigned long *)shd_entry; + if (level == IOMMU_PT_LEVEL1) { + if (iommu_pt_present(psentry) && (!iommu_pt_huge(psentry))) + iommu_free_page(iommu, shd_entry); + return; + } + + for (i = 0; i < IOMMU_PTRS_PER_LEVEL; i++) { + psentry = shd_entry->shadow_ptable + i; + if (!iommu_pt_present(psentry)) + continue; + + shd_entry_tmp = (spt_entry *)(*psentry); + free_pagetable_one_level(iommu_entry, shd_entry_tmp, level - 1); + } + + iommu_free_page(iommu, shd_entry); +} + +static void free_pagetable(iommu_info *iommu_entry) +{ + spt_entry *shd_entry; + loongson_iommu *iommu; + + iommu = iommu_entry->iommu; + shd_entry = iommu_entry->shadow_pgd; + free_pagetable_one_level(iommu_entry, shd_entry, IOMMU_LEVEL_MAX); + iommu_entry->shadow_pgd = NULL; +} + +static dom_info *dom_info_alloc(void) +{ + dom_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) { + pr_err("%s alloc memory for info failed\n", __func__); + return NULL; + } + + /* 0x10000000~0x8fffffff */ + info->mmio_pgd = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 6); + if (info->mmio_pgd == NULL) { + pr_err("%s alloc memory for virtio pgtable failed\n", __func__); + kfree(info); + return NULL; + } + + INIT_LIST_HEAD(&info->iommu_devlist); + spin_lock_init(&info->lock); + return info; +} + +static void dom_info_free(dom_info *info) +{ + /* 0x10000000~0x8fffffff */ + if (info->mmio_pgd) { + free_pages((unsigned long)info->mmio_pgd, 6); + info->mmio_pgd = NULL; + } + + kfree(info); +} + +static struct iommu_domain *loongson_iommu_domain_alloc(unsigned int type) +{ + dom_info *info; + + switch (type) { + case IOMMU_DOMAIN_UNMANAGED: + info = dom_info_alloc(); + if (info == NULL) + return NULL; + + info->domain.geometry.aperture_start = 0; + info->domain.geometry.aperture_end = ~0ULL; + info->domain.geometry.force_aperture = true; + break; + + default: + return NULL; + } + + return &info->domain; +} + +void domain_deattach_iommu(dom_info *priv, iommu_info *iommu_entry) +{ + loongson_iommu *iommu = NULL; + + if (priv == NULL) { + pr_err("%s priv is NULL", __func__); + return; + } + + if (iommu_entry == NULL) { + pr_err("%s iommu_entry is NULL", __func__); + return; + } + + if (iommu_entry->dev_cnt != 0) + return; + + iommu = iommu_entry->iommu; + if (iommu == NULL) { + pr_err("%s iommu is NULL", __func__); + return; + } + + domain_id_free(iommu_entry->iommu, iommu_entry->id); + + mutex_lock(&iommu->loongson_iommu_pgtlock); + free_pagetable(iommu_entry); + mutex_unlock(&iommu->loongson_iommu_pgtlock); + + spin_lock(&priv->lock); + list_del(&iommu_entry->list); + spin_unlock(&priv->lock); + + kfree(iommu_entry); + del_domain_from_list(iommu, priv); +} + +static void loongson_iommu_domain_free(struct iommu_domain *domain) +{ + + dom_info *priv; + loongson_iommu *iommu = NULL; + struct iommu_info *iommu_entry, *iommu_entry_temp; + + priv = to_dom_info(domain); + + spin_lock(&priv->lock); + list_for_each_entry_safe(iommu_entry, iommu_entry_temp, &priv->iommu_devlist, list) { + iommu = iommu_entry->iommu; + + if (iommu_entry->dev_cnt > 0) + cleanup_iommu_entry(iommu_entry); + + spin_unlock(&priv->lock); + domain_deattach_iommu(priv, iommu_entry); + spin_lock(&priv->lock); + + __iommu_flush_iotlb_all(iommu); + + if (!has_dom(iommu)) + iommu_translate_disable(iommu); + + } + spin_unlock(&priv->lock); + + dom_info_free(priv); +} + +struct loongson_iommu_rlookup_entry *lookup_rlooptable(int pcisegment) +{ + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + + list_for_each_entry(rlookupentry, &loongson_rlookup_iommu_list, list) { + if (rlookupentry->pcisegment == pcisegment) + return rlookupentry; + } + + return NULL; +} + +loongson_iommu *find_iommu_by_dev(struct pci_dev *pdev) +{ + int pcisegment; + unsigned short devid; + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + loongson_iommu *iommu = NULL; + + devid = pdev->devfn & 0xff; + + pcisegment = pci_domain_nr(pdev->bus); + + rlookupentry = lookup_rlooptable(pcisegment); + if (rlookupentry == NULL) { + pr_info("%s find segment %d rlookupentry failed\n", __func__, + pcisegment); + return iommu; + } + + iommu = rlookupentry->loongson_iommu_rlookup_table[devid]; + + return iommu; +} + +static int iommu_init_device(struct device *dev) +{ + unsigned char busnum; + unsigned short bdf, devid; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + struct loongson_iommu_dev_data *dev_data; + loongson_iommu *iommu = NULL; + + bdf = pdev->devfn & 0xff; + busnum = bus->number; + if (busnum != 0) { + while (bus->parent->parent) + bus = bus->parent; + bdf = bus->self->devfn & 0xff; + } + + if (dev_iommu_priv_get(dev)) { + pr_info("Loongson-IOMMU: bdf:0x%x has added\n", bdf); + return 0; + } + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + devid = PCI_DEVID(bus->number, bdf); + dev_data->bdf = devid; + + pci_info(pdev, "%s devid %x bus %x\n", __func__, devid, busnum); + iommu = find_iommu_by_dev(pdev); + if (iommu == NULL) + pci_info(pdev, "%s find iommu failed by dev\n", __func__); + + /* The initial state is 0, and 1 is added only when attach dev */ + dev_data->count = 0; + dev_data->iommu = iommu; + + dev_iommu_priv_set(dev, dev_data); + + return 0; +} + +static struct iommu_device *loongson_iommu_probe_device(struct device *dev) +{ + int ret = 0; + + ret = iommu_init_device(dev); + if (ret < 0) + pr_err("Loongson-IOMMU: unable to alloc memory for dev_data\n"); + + return 0; +} + +static struct iommu_group *loongson_iommu_device_group(struct device *dev) +{ + struct iommu_group *group; + + /* + * We don't support devices sharing stream IDs other than PCI RID + * aliases, since the necessary ID-to-device lookup becomes rather + * impractical given a potential sparse 32-bit stream ID space. + */ + if (dev_is_pci(dev)) + group = pci_device_group(dev); + else + group = generic_device_group(dev); + + return group; +} + +static void loongson_iommu_release_device(struct device *dev) +{ + struct loongson_iommu_dev_data *dev_data; + + dev_data = dev_iommu_priv_get(dev); + dev_iommu_priv_set(dev, NULL); + kfree(dev_data); +} + +iommu_info *get_first_iommu_entry(dom_info *priv) +{ + struct iommu_info *iommu_entry; + + if (priv == NULL) { + pr_err("%s priv is NULL", __func__); + return NULL; + } + + iommu_entry = list_first_entry_or_null(&priv->iommu_devlist, + struct iommu_info, list); + + return iommu_entry; +} + +iommu_info *get_iommu_entry(dom_info *priv, loongson_iommu *iommu) +{ + struct iommu_info *iommu_entry; + + spin_lock(&priv->lock); + list_for_each_entry(iommu_entry, &priv->iommu_devlist, list) { + if (iommu_entry->iommu == iommu) { + spin_unlock(&priv->lock); + return iommu_entry; + } + } + spin_unlock(&priv->lock); + + return NULL; +} + +iommu_info *domain_attach_iommu(dom_info *priv, loongson_iommu *iommu) +{ + unsigned long pgd_pa; + u32 dir_ctrl, pgd_lo, pgd_hi; + struct iommu_info *iommu_entry = NULL; + spt_entry *shd_entry = NULL; + + iommu_entry = get_iommu_entry(priv, iommu); + if (iommu_entry) + return iommu_entry; + + iommu_entry = kzalloc(sizeof(struct iommu_info), GFP_KERNEL); + if (iommu_entry == NULL) { + pr_info("%s alloc memory for iommu_entry failed\n", __func__); + return NULL; + } + + INIT_LIST_HEAD(&iommu_entry->dev_list); + iommu_entry->iommu = iommu; + iommu_entry->id = domain_id_alloc(iommu); + if (iommu_entry->id == -1) { + pr_info("%s alloc id for domain failed\n", __func__); + kfree(iommu_entry); + return NULL; + } + + shd_entry = iommu_zalloc_page(iommu); + if (!shd_entry) { + pr_info("%s alloc shadow page entry err\n", __func__); + domain_id_free(iommu, iommu_entry->id); + kfree(iommu_entry); + return NULL; + } + + iommu_entry->shadow_pgd = shd_entry; + dir_ctrl = (IOMMU_LEVEL_STRIDE << 26) | (IOMMU_LEVEL_SHIFT(2) << 20); + dir_ctrl |= (IOMMU_LEVEL_STRIDE << 16) | (IOMMU_LEVEL_SHIFT(1) << 10); + dir_ctrl |= (IOMMU_LEVEL_STRIDE << 6) | IOMMU_LEVEL_SHIFT(0); + pgd_pa = iommu_pgt_v2p(iommu, shd_entry->gmem_ptable); + pgd_hi = pgd_pa >> 32; + pgd_lo = pgd_pa & 0xffffffff; + iommu_write_regl(iommu, LA_IOMMU_DIR_CTRL(iommu_entry->id), dir_ctrl); + iommu_write_regl(iommu, LA_IOMMU_PGD_HI(iommu_entry->id), pgd_hi); + iommu_write_regl(iommu, LA_IOMMU_PGD_LO(iommu_entry->id), pgd_lo); + + spin_lock(&priv->lock); + list_add(&iommu_entry->list, &priv->iommu_devlist); + spin_unlock(&priv->lock); + + add_domain_to_list(iommu, priv); + pr_info("%s iommu_entry->iommu %lx id %x\n", __func__, + (unsigned long)iommu_entry->iommu, iommu_entry->id); + + return iommu_entry; +} + +static struct loongson_iommu_dev_data *iommu_get_devdata(dom_info *info, + loongson_iommu *iommu, unsigned long bdf) +{ + struct iommu_info *entry; + struct loongson_iommu_dev_data *dev_data; + + entry = get_iommu_entry(info, iommu); + if (!entry) + return NULL; + + /* Find from priv list */ + spin_lock(&entry->devlock); + list_for_each_entry(dev_data, &entry->dev_list, list) { + if (dev_data->bdf == bdf) { + spin_unlock(&entry->devlock); + return dev_data; + } + } + + spin_unlock(&entry->devlock); + return NULL; +} + +static int loongson_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) +{ + unsigned short bdf; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + unsigned char busnum = pdev->bus->number; + struct loongson_iommu_dev_data *dev_data; + dom_info *priv = to_dom_info(domain); + loongson_iommu *iommu; + iommu_info *iommu_entry = NULL; + + bdf = pdev->devfn & 0xff; + if (busnum != 0) { + while (bus->parent->parent) + bus = bus->parent; + bdf = bus->self->devfn & 0xff; + } + + dev_data = (struct loongson_iommu_dev_data *)dev_iommu_priv_get(dev); + if (dev_data == NULL) { + pci_info(pdev, "%s dev_data is Invalid\n", __func__); + return 0; + } + + iommu = dev_data->iommu; + if (iommu == NULL) { + pci_info(pdev, "%s iommu is Invalid\n", __func__); + return 0; + } + + pci_info(pdev, "%s busnum %x bdf %x priv %lx iommu %lx\n", __func__, + busnum, bdf, (unsigned long)priv, (unsigned long)iommu); + dev_data = iommu_get_devdata(priv, iommu, bdf); + if (dev_data) { + dev_data->count++; + pci_info(pdev, "Loongson-IOMMU: bdf 0x%x devfn %x has attached," + " count:0x%x\n", + bdf, pdev->devfn, dev_data->count); + return 0; + } else { + dev_data = (struct loongson_iommu_dev_data *)dev_iommu_priv_get(dev); + } + + iommu_entry = domain_attach_iommu(priv, iommu); + if (iommu_entry == NULL) { + pci_info(pdev, "domain attach iommu failed\n"); + return 0; + } + + dev_data->count++; + do_attach(iommu_entry, dev_data); + + return 0; +} + +static void loongson_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + unsigned short bdf; + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_bus *bus = pdev->bus; + unsigned char busnum = pdev->bus->number; + struct loongson_iommu_dev_data *dev_data; + dom_info *priv = to_dom_info(domain); + loongson_iommu *iommu; + iommu_info *iommu_entry = NULL; + + bdf = pdev->devfn & 0xff; + if (busnum != 0) { + while (bus->parent->parent) + bus = bus->parent; + bdf = bus->self->devfn & 0xff; + } + + dev_data = (struct loongson_iommu_dev_data *)dev_iommu_priv_get(dev); + if (dev_data == NULL) { + pci_info(pdev, "%s dev_data is Invalid\n", __func__); + return; + } + + iommu = dev_data->iommu; + if (iommu == NULL) { + pci_info(pdev, "%s iommu is Invalid\n", __func__); + return; + } + + dev_data = iommu_get_devdata(priv, iommu, bdf); + if (dev_data == NULL) { + pci_info(pdev, "Loongson-IOMMU: bdf 0x%x devfn %x dev_data is NULL\n", + bdf, pdev->devfn & 0xff); + return; + } + + iommu = dev_data->iommu; + dev_data->count--; + iommu_entry = get_iommu_entry(priv, iommu); + if (iommu_entry == NULL) { + pci_info(pdev, "%s get iommu_entry failed\n", __func__); + return; + } + + spin_lock(&iommu_entry->devlock); + do_detach(dev_data); + spin_unlock(&iommu_entry->devlock); + + pci_info(pdev, "%s iommu devid %x sigment %x\n", __func__, + iommu->devid, iommu->segment); +} + +static unsigned long *iommu_get_spte(spt_entry *entry, unsigned long iova, int level) +{ + int i; + unsigned long *pte; + + if (level > (IOMMU_LEVEL_MAX - 1)) + return NULL; + + for (i = IOMMU_LEVEL_MAX - 1; i >= level; i--) { + pte = iommu_shadow_offset(entry, iova, i); + if (!iommu_pt_present(pte)) + break; + + if (iommu_pt_huge(pte)) + break; + + entry = (spt_entry *)(*pte); + } + + return pte; +} + +static int _iommu_alloc_ptable(loongson_iommu *iommu, + unsigned long *psentry, unsigned long *phwentry) +{ + unsigned long pte; + iommu_pte *new_phwentry; + spt_entry *new_shd_entry; + + if (!iommu_pt_present(psentry)) { + new_shd_entry = iommu_zalloc_page(iommu); + if (!new_shd_entry) { + pr_err("Loongson-IOMMU: new_shd_entry alloc err\n"); + return -ENOMEM; + } + /* fill shd_entry */ + *psentry = (unsigned long)new_shd_entry; + /* fill gmem phwentry */ + new_phwentry = (iommu_pte *)new_shd_entry->gmem_ptable; + pte = iommu_pgt_v2p(iommu, new_phwentry) & IOMMU_PAGE_MASK; + pte |= IOMMU_PTE_RW; + *phwentry = pte; + } + + return 0; +} + +static size_t iommu_ptw_map(loongson_iommu *iommu, spt_entry *shd_entry, + unsigned long start, unsigned long end, phys_addr_t pa, int level) +{ + int ret, huge; + unsigned long pte; + unsigned long next, old, step; + unsigned long *psentry, *phwentry; + + old = start; + psentry = iommu_shadow_offset(shd_entry, start, level); + phwentry = iommu_ptable_offset(shd_entry->gmem_ptable, start, level); + if (level == IOMMU_PT_LEVEL0) { + pa = pa & IOMMU_PAGE_MASK; + do { + pte = pa | IOMMU_PTE_RW; + *phwentry = pte; + *psentry = pte; + psentry++; + phwentry++; + start += IOMMU_PAGE_SIZE; + pa += IOMMU_PAGE_SIZE; + } while (start < end); + + return start - old; + } + + do { + next = iommu_ptable_end(start, end, level); + step = next - start; + + huge = 0; + if ((level == IOMMU_PT_LEVEL1) && (step == IOMMU_HPAGE_SIZE)) + if (!iommu_pt_present(psentry) || iommu_pt_huge(psentry)) + huge = 1; + + if (huge) { + pte = (pa & IOMMU_HPAGE_MASK) | IOMMU_PTE_RW | IOMMU_PTE_HP; + *phwentry = pte; + *psentry = pte; + } else { + ret = _iommu_alloc_ptable(iommu, psentry, phwentry); + if (ret != 0) + break; + iommu_ptw_map(iommu, (spt_entry *)*psentry, start, next, pa, level - 1); + } + + psentry++; + phwentry++; + pa += step; + start = next; + } while (start < end); + + return start - old; +} + +static int dev_map_page(iommu_info *iommu_entry, unsigned long start, + phys_addr_t pa, size_t size) +{ + int ret = 0; + spt_entry *entry; + phys_addr_t end; + size_t map_size; + loongson_iommu *iommu; + + end = start + size; + iommu = iommu_entry->iommu; + + mutex_lock(&iommu->loongson_iommu_pgtlock); + entry = iommu_entry->shadow_pgd; + map_size = iommu_ptw_map(iommu, entry, start, end, pa, IOMMU_LEVEL_MAX - 1); + if (map_size != size) + ret = -EFAULT; + + if (has_dom(iommu)) + __iommu_flush_iotlb_all(iommu); + mutex_unlock(&iommu->loongson_iommu_pgtlock); + + return ret; +} + +static size_t iommu_ptw_unmap(loongson_iommu *iommu, spt_entry *shd_entry, + unsigned long start, unsigned long end, int level) +{ + unsigned long next, old; + unsigned long *psentry, *phwentry; + + old = start; + psentry = iommu_shadow_offset(shd_entry, start, level); + phwentry = iommu_ptable_offset(shd_entry->gmem_ptable, start, level); + if (level == IOMMU_PT_LEVEL0) { + do { + *phwentry++ = 0; + *psentry++ = 0; + start += IOMMU_PAGE_SIZE; + } while (start < end); + } else { + do { + next = iommu_ptable_end(start, end, level); + if (!iommu_pt_present(psentry)) + continue; + + if (iommu_pt_huge(psentry)) { + if ((next - start) != IOMMU_HPAGE_SIZE) + pr_err("Map pte on hugepage not supported now\n"); + *phwentry = 0; + *psentry = 0; + } else + iommu_ptw_unmap(iommu, (spt_entry *)*psentry, start, next, level - 1); + } while (psentry++, phwentry++, start = next, start < end); + } + + return start - old; +} + +static int iommu_map_page(dom_info *priv, unsigned long start, + phys_addr_t pa, size_t size, int prot, gfp_t gfp) +{ + + unsigned long *pte; + int ret = 0; + iommu_info *iommu_entry = NULL; + + /* 0x10000000~0x8fffffff */ + if ((start >= IOVA_START) && (start < IOVA_END0)) { + start -= IOVA_START; + pte = (unsigned long *)priv->mmio_pgd; + while (size > 0) { + pte[start >> LA_VIRTIO_PAGE_SHIFT] = + pa & LA_VIRTIO_PAGE_MASK; + size -= IOMMU_PAGE_SIZE; + start += IOMMU_PAGE_SIZE; + pa += IOMMU_PAGE_SIZE; + } + return 0; + } + + spin_lock(&priv->lock); + list_for_each_entry(iommu_entry, &priv->iommu_devlist, list) { + ret |= dev_map_page(iommu_entry, start, pa, size); + } + spin_unlock(&priv->lock); + + return ret; +} + +static size_t iommu_unmap_page(iommu_info *iommu_entry, unsigned long start, size_t size) +{ + loongson_iommu *iommu; + spt_entry *entry; + size_t unmap_len; + unsigned long end; + + end = start + size; + iommu = iommu_entry->iommu; + mutex_lock(&iommu->loongson_iommu_pgtlock); + entry = iommu_entry->shadow_pgd; + unmap_len = iommu_ptw_unmap(iommu, entry, start, end, (IOMMU_LEVEL_MAX - 1)); + + if (has_dom(iommu)) + __iommu_flush_iotlb_all(iommu); + mutex_unlock(&iommu->loongson_iommu_pgtlock); + return unmap_len; +} + +static size_t domain_unmap_page(dom_info *info, unsigned long start, size_t size) +{ + unsigned long *pte; + size_t unmap_len = 0; + iommu_info *entry; + + /* 0x10000000~0x8fffffff */ + if ((start >= IOVA_START) && (start < IOVA_END0)) { + start -= IOVA_START; + pte = (unsigned long *)info->mmio_pgd; + while (size > 0) { + pte[start >> LA_VIRTIO_PAGE_SHIFT] = 0; + size -= 0x4000; + unmap_len += 0x4000; + start += 0x4000; + } + unmap_len += size; + + return unmap_len; + } + + spin_lock(&info->lock); + list_for_each_entry(entry, &info->iommu_devlist, list) + unmap_len = iommu_unmap_page(entry, start, size); + spin_unlock(&info->lock); + + return unmap_len; +} + +static int loongson_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t pa, size_t len, int prot, gfp_t gfp) +{ + dom_info *priv = to_dom_info(domain); + + return iommu_map_page(priv, iova, pa, len, prot, GFP_KERNEL); +} + +static size_t loongson_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *gather) +{ + dom_info *priv = to_dom_info(domain); + + return domain_unmap_page(priv, iova, size); +} + +static phys_addr_t loongson_iommu_iova_to_pa(struct iommu_domain *domain, + dma_addr_t iova) +{ + unsigned long pa, offset, tmpva, page_size, page_mask; + dom_info *priv = to_dom_info(domain); + unsigned long *psentry, *pte; + int ret = 0; + spt_entry *entry; + loongson_iommu *iommu; + iommu_info *iommu_entry = NULL; + + /* 0x10000000~0x8fffffff */ + if ((iova >= IOVA_START) && (iova < IOVA_END0)) { + tmpva = iova & LA_VIRTIO_PAGE_MASK; + pte = (unsigned long *)priv->mmio_pgd; + offset = iova & ((1ULL << LA_VIRTIO_PAGE_SHIFT) - 1); + pa = pte[(tmpva - IOVA_START) >> 14] + offset; + + return pa; + } + + iommu_entry = get_first_iommu_entry(priv); + if (iommu_entry == NULL) { + pr_err("%s iova:0x%llx iommu_entry is invalid\n", + __func__, iova); + ret = -EFAULT; + return ret; + } + + iommu = iommu_entry->iommu; + + mutex_lock(&iommu->loongson_iommu_pgtlock); + entry = iommu_entry->shadow_pgd; + psentry = iommu_get_spte(entry, iova, IOMMU_PT_LEVEL0); + mutex_unlock(&iommu->loongson_iommu_pgtlock); + + if (!psentry || !iommu_pt_present(psentry)) { + ret = -EFAULT; + pr_warn_once("Loongson-IOMMU: shadow pte is null or not present with iova %llx \n", iova); + return ret; + } + + if (iommu_pt_huge(psentry)) { + page_size = IOMMU_HPAGE_SIZE; + page_mask = IOMMU_HPAGE_MASK; + } else { + page_size = IOMMU_PAGE_SIZE; + page_mask = IOMMU_PAGE_MASK; + } + + pa = *psentry & page_mask; + pa |= (iova & (page_size - 1)); + + return (phys_addr_t)pa; +} + +static phys_addr_t loongson_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + phys_addr_t pa; + + pa = loongson_iommu_iova_to_pa(domain, iova); + + return pa; +} + +static void loongson_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + int ret; + dom_info *priv = to_dom_info(domain); + iommu_info *iommu_entry; + loongson_iommu *iommu; + + spin_lock(&priv->lock); + list_for_each_entry(iommu_entry, &priv->iommu_devlist, list) { + iommu = iommu_entry->iommu; + + ret = __iommu_flush_iotlb_all(iommu); + } + spin_unlock(&priv->lock); +} + +static void loongson_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + loongson_iommu_flush_iotlb_all(domain); +} + +static struct iommu_ops loongson_iommu_ops = { + .capable = loongson_iommu_capable, + .domain_alloc = loongson_iommu_domain_alloc, + .domain_free = loongson_iommu_domain_free, + .attach_dev = loongson_iommu_attach_dev, + .detach_dev = loongson_iommu_detach_dev, + .map = loongson_iommu_map, + .unmap = loongson_iommu_unmap, + .iova_to_phys = loongson_iommu_iova_to_phys, + .probe_device = loongson_iommu_probe_device, + .release_device = loongson_iommu_release_device, + .device_group = loongson_iommu_device_group, + .pgsize_bitmap = LA_IOMMU_PGSIZE, + .flush_iotlb_all = loongson_iommu_flush_iotlb_all, + .iotlb_sync = loongson_iommu_iotlb_sync, +}; + +loongson_iommu *loongarch_get_iommu(struct pci_dev *pdev) +{ + int pcisegment; + unsigned short devid; + loongson_iommu *iommu = NULL; + + devid = pdev->devfn & 0xff; + pcisegment = pci_domain_nr(pdev->bus); + + list_for_each_entry(iommu, &loongson_iommu_list, list) { + if ((iommu->segment == pcisegment) && + (iommu->devid == devid)) { + return iommu; + } + } + + return NULL; +} + +static int loongson_iommu_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret = 1; + int bitmap_sz = 0; + int tmp; + struct loongson_iommu *iommu = NULL; + resource_size_t base, size; + + iommu = loongarch_get_iommu(pdev); + if (iommu == NULL) { + pci_info(pdev, "%s can't find iommu\n", __func__); + return -ENODEV; + } + + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + if (!request_mem_region(base, size, "loongson_iommu")) { + pci_err(pdev, "can't reserve mmio registers\n"); + return -ENOMEM; + } + + iommu->membase = ioremap(base, size); + if (iommu->membase == NULL) { + pci_info(pdev, "%s iommu pci dev bar0 is NULL\n", __func__); + return ret; + } + + base = pci_resource_start(pdev, 2); + size = pci_resource_len(pdev, 2); + if (!request_mem_region(base, size, "loongson_iommu")) { + pci_err(pdev, "can't reserve mmio registers\n"); + return -ENOMEM; + } + iommu->pgtbase = ioremap(base, size); + if (iommu->pgtbase == NULL) + return -ENOMEM; + + iommu->maxpages = size / IOMMU_PAGE_SIZE; + pr_info("iommu membase %p pgtbase %p pgtsize %llx maxpages %lx\n", iommu->membase, iommu->pgtbase, size, iommu->maxpages); + tmp = MAX_DOMAIN_ID / 8; + bitmap_sz = (MAX_DOMAIN_ID % 8) ? (tmp + 1) : tmp; + iommu->domain_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->domain_bitmap == NULL) { + pr_err("Loongson-IOMMU: domain bitmap alloc err bitmap_sz:%d\n", bitmap_sz); + goto out_err; + } + + tmp = MAX_ATTACHED_DEV_ID / 8; + bitmap_sz = (MAX_ATTACHED_DEV_ID % 8) ? (tmp + 1) : tmp; + iommu->devtable_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->devtable_bitmap == NULL) { + pr_err("Loongson-IOMMU: devtable bitmap alloc err bitmap_sz:%d\n", bitmap_sz); + goto out_err_1; + } + + tmp = iommu->maxpages / 8; + bitmap_sz = (iommu->maxpages % 8) ? (tmp + 1) : tmp; + iommu->pgtable_bitmap = bitmap_zalloc(bitmap_sz, GFP_KERNEL); + if (iommu->pgtable_bitmap == NULL) { + pr_err("Loongson-IOMMU: pgtable bitmap alloc err bitmap_sz:%d\n", bitmap_sz); + goto out_err_2; + } + + bus_set_iommu(&pci_bus_type, &loongson_iommu_ops); + + return 0; + +out_err_2: + kfree(iommu->devtable_bitmap); + iommu->devtable_bitmap = NULL; +out_err_1: + kfree(iommu->domain_bitmap); + iommu->domain_bitmap = NULL; +out_err: + + return ret; +} + +static void loongson_iommu_remove(struct pci_dev *pdev) +{ + struct loongson_iommu *iommu = NULL; + + iommu = loongarch_get_iommu(pdev); + if (iommu == NULL) + return; + + if (iommu->domain_bitmap != NULL) { + kfree(iommu->domain_bitmap); + iommu->domain_bitmap = NULL; + } + + if (iommu->devtable_bitmap != NULL) { + kfree(iommu->devtable_bitmap); + iommu->devtable_bitmap = NULL; + } + + if (iommu->pgtable_bitmap != NULL) { + kfree(iommu->pgtable_bitmap); + iommu->pgtable_bitmap = NULL; + } + + iommu->membase = NULL; + iommu->pgtbase = NULL; +} + +static int __init check_ivrs_checksum(struct acpi_table_header *table) +{ + int i; + u8 checksum = 0, *p = (u8 *)table; + + for (i = 0; i < table->length; ++i) + checksum += p[i]; + if (checksum != 0) { + /* ACPI table corrupt */ + pr_err("IVRS invalid checksum\n"); + return -ENODEV; + } + + return 0; +} + +struct loongson_iommu_rlookup_entry *create_rlookup_entry(int pcisegment) +{ + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + + rlookupentry = kzalloc(sizeof(struct loongson_iommu_rlookup_entry), GFP_KERNEL); + if (rlookupentry == NULL) + return rlookupentry; + + rlookupentry->pcisegment = pcisegment; + + /* IOMMU rlookup table - find the IOMMU for a specific device */ + rlookupentry->loongson_iommu_rlookup_table = (void *)__get_free_pages( + GFP_KERNEL | __GFP_ZERO, get_order(rlookup_table_size)); + if (rlookupentry->loongson_iommu_rlookup_table == NULL) { + kfree(rlookupentry); + rlookupentry = NULL; + } else { + list_add(&rlookupentry->list, &loongson_rlookup_iommu_list); + } + + return rlookupentry; +} + +/* Writes the specific IOMMU for a device into the rlookup table */ +static void __init set_iommu_for_device(loongson_iommu *iommu, + u16 devid) +{ + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry == NULL) + rlookupentry = create_rlookup_entry(iommu->segment); + + if (rlookupentry != NULL) + rlookupentry->loongson_iommu_rlookup_table[devid] = iommu; +} + +static inline u32 get_ivhd_header_size(struct ivhd_header *h) +{ + u32 size = 0; + + switch (h->type) { + case IVHD_HEAD_TYPE10: + size = 24; + break; + case IVHD_HEAD_TYPE11: + case IVHD_HEAD_TYPE40: + size = 40; + break; + } + return size; +} + +static inline void update_last_devid(u16 devid) +{ + if (devid > loongson_iommu_last_bdf) + loongson_iommu_last_bdf = devid; +} + +/* + * This function calculates the length of a given IVHD entry + */ +static inline int ivhd_entry_length(u8 *ivhd) +{ + u32 type = ((struct ivhd_entry *)ivhd)->type; + + if (type < 0x80) { + return 0x04 << (*ivhd >> 6); + } else if (type == IVHD_DEV_ACPI_HID) { + /* For ACPI_HID, offset 21 is uid len */ + return *((u8 *)ivhd + 21) + 22; + } + return 0; +} + +/* + * After reading the highest device id from the IOMMU PCI capability header + * this function looks if there is a higher device id defined in the ACPI table + */ +static int __init find_last_devid_from_ivhd(struct ivhd_header *h) +{ + u8 *p = (void *)h, *end = (void *)h; + struct ivhd_entry *dev; + + u32 ivhd_size = get_ivhd_header_size(h); + + if (!ivhd_size) { + pr_err("loongson-iommu: Unsupported IVHD type %#x\n", h->type); + return -EINVAL; + } + + p += ivhd_size; + end += h->length; + + while (p < end) { + dev = (struct ivhd_entry *)p; + switch (dev->type) { + case IVHD_DEV_ALL: + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(MAX_BDF_NUM); + break; + case IVHD_DEV_SELECT: + case IVHD_DEV_RANGE_END: + case IVHD_DEV_ALIAS: + case IVHD_DEV_EXT_SELECT: + /* all the above subfield types refer to device ids */ + update_last_devid(dev->devid); + break; + default: + break; + } + p += ivhd_entry_length(p); + } + + WARN_ON(p != end); + + return 0; +} + +/* + * Iterate over all IVHD entries in the ACPI table and find the highest device + * id which we need to handle. This is the first of three functions which parse + * the ACPI table. So we check the checksum here. + */ +static int __init find_last_devid_acpi(struct acpi_table_header *table) +{ + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + + p += IVRS_HEADER_LENGTH; + + end += table->length; + while (p < end) { + h = (struct ivhd_header *)p; + if (h->type == loongson_iommu_target_ivhd_type) { + int ret = find_last_devid_from_ivhd(h); + + if (ret) + return ret; + } + + if (h->length == 0) + break; + + p += h->length; + } + + if (p != end) + return -EINVAL; + + + return 0; +} + +/* + * Takes a pointer to an loongarch IOMMU entry in the ACPI table and + * initializes the hardware and our data structures with it. + */ +static int __init init_iommu_from_acpi(loongson_iommu *iommu, + struct ivhd_header *h) +{ + u8 *p = (u8 *)h; + u8 *end = p; + u16 devid = 0, devid_start = 0; + u32 dev_i, ivhd_size; + struct ivhd_entry *e; + + /* + * Done. Now parse the device entries + */ + ivhd_size = get_ivhd_header_size(h); + if (!ivhd_size) { + pr_err("loongarch iommu: Unsupported IVHD type %#x\n", h->type); + return -EINVAL; + } + + if (h->length == 0) + return -EINVAL; + + p += ivhd_size; + end += h->length; + + while (p < end) { + e = (struct ivhd_entry *)p; + switch (e->type) { + case IVHD_DEV_ALL: + for (dev_i = 0; dev_i <= loongson_iommu_last_bdf; ++dev_i) + set_iommu_for_device(iommu, dev_i); + break; + + case IVHD_DEV_SELECT: + pr_info(" DEV_SELECT\t\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); + + devid = e->devid; + set_iommu_for_device(iommu, devid); + break; + + case IVHD_DEV_SELECT_RANGE_START: + pr_info(" DEV_SELECT_RANGE_START\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); + + devid_start = e->devid; + break; + + case IVHD_DEV_RANGE_END: + pr_info(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", + PCI_BUS_NUM(e->devid), PCI_SLOT(e->devid), PCI_FUNC(e->devid)); + + devid = e->devid; + for (dev_i = devid_start; dev_i <= devid; ++dev_i) + set_iommu_for_device(iommu, dev_i); + break; + + default: + break; + } + + p += ivhd_entry_length(p); + } + + return 0; +} + +/* + * This function clues the initialization function for one IOMMU + * together and also allocates the command buffer and programs the + * hardware. It does NOT enable the IOMMU. This is done afterwards. + */ +static int __init init_iommu_one(loongson_iommu *iommu, struct ivhd_header *h) +{ + int ret; + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + + spin_lock_init(&iommu->domain_bitmap_lock); + spin_lock_init(&iommu->dom_info_lock); + spin_lock_init(&iommu->pgtable_bitmap_lock); + mutex_init(&iommu->loongson_iommu_pgtlock); + + /* Add IOMMU to internal data structures */ + INIT_LIST_HEAD(&iommu->dom_list); + + list_add_tail(&iommu->list, &loongson_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = h->devid; + iommu->segment = h->pci_seg; + + ret = init_iommu_from_acpi(iommu, h); + if (ret) { + pr_err("%s init iommu from acpi failed\n", __func__); + return ret; + } + + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry != NULL) { + /* + * Make sure IOMMU is not considered to translate itself. + * The IVRS table tells us so, but this is a lie! + */ + rlookupentry->loongson_iommu_rlookup_table[iommu->devid] = NULL; + } + + return 0; +} + +/* + * Iterates over all IOMMU entries in the ACPI table, allocates the + * IOMMU structure and initializes it with init_iommu_one() + */ +static int __init init_iommu_all(struct acpi_table_header *table) +{ + int ret; + u8 *p = (u8 *)table, *end = (u8 *)table; + struct ivhd_header *h; + loongson_iommu *iommu; + + end += table->length; + p += IVRS_HEADER_LENGTH; + + while (p < end) { + h = (struct ivhd_header *)p; + + if (h->length == 0) + break; + + if (*p == loongson_iommu_target_ivhd_type) { + pr_info("device: %02x:%02x.%01x seg: %d\n", + PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid), PCI_FUNC(h->devid), h->pci_seg); + + iommu = kzalloc(sizeof(loongson_iommu), GFP_KERNEL); + if (iommu == NULL) { + pr_info("%s alloc memory for iommu failed\n", __func__); + return -ENOMEM; + } + + ret = init_iommu_one(iommu, h); + if (ret) { + kfree(iommu); + pr_info("%s init iommu failed\n", __func__); + return ret; + } + } + p += h->length; + } + + if (p != end) + return -EINVAL; + + return 0; +} + +/** + * get_highest_supported_ivhd_type - Look up the appropriate IVHD type + * @ivrs Pointer to the IVRS header + * + * This function search through all IVDB of the maximum supported IVHD + */ +static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs) +{ + u8 *base = (u8 *)ivrs; + struct ivhd_header *ivhd = (struct ivhd_header *)(base + IVRS_HEADER_LENGTH); + u8 last_type = ivhd->type; + u16 devid = ivhd->devid; + + while (((u8 *)ivhd - base < ivrs->length) && + (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED) && + (ivhd->length > 0)) { + u8 *p = (u8 *) ivhd; + + if (ivhd->devid == devid) + last_type = ivhd->type; + ivhd = (struct ivhd_header *)(p + ivhd->length); + } + + return last_type; +} + +static inline unsigned long tbl_size(int entry_size) +{ + unsigned int shift = PAGE_SHIFT + + get_order(((int)loongson_iommu_last_bdf + 1) * entry_size); + + return 1UL << shift; +} + +static int __init loongson_iommu_ivrs_init(void) +{ + int ret = 0; + acpi_status status; + struct acpi_table_header *ivrs_base; + + status = acpi_get_table("IVRS", 0, &ivrs_base); + if (status == AE_NOT_FOUND) { + pr_info("%s get ivrs table failed\n", __func__); + return -ENODEV; + } + + /* + * Validate checksum here so we don't need to do it when + * we actually parse the table + */ + ret = check_ivrs_checksum(ivrs_base); + if (ret) + goto out; + + loongson_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); + pr_info("Using IVHD type %#x\n", loongson_iommu_target_ivhd_type); + + /* + * First parse ACPI tables to find the largest Bus/Dev/Func + * we need to handle. Upon this information the shared data + * structures for the IOMMUs in the system will be allocated + */ + ret = find_last_devid_acpi(ivrs_base); + if (ret) { + pr_err("%s find last devid failed\n", __func__); + goto out; + } + + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + + /* + * now the data structures are allocated and basically initialized + * start the real acpi table scan + */ + ret = init_iommu_all(ivrs_base); + +out: + /* Don't leak any ACPI memory */ + acpi_put_table(ivrs_base); + ivrs_base = NULL; + + return ret; +} + +static int __init loongson_iommu_ivrs_init_stub(void) +{ + u32 dev_i; + loongson_iommu *iommu; + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + + /* Use maximum BDF value for DEV_ALL */ + update_last_devid(MAX_BDF_NUM); + + rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); + + iommu = kzalloc(sizeof(loongson_iommu), GFP_KERNEL); + if (iommu == NULL) { + pr_info("%s alloc memory for iommu failed\n", __func__); + return -ENOMEM; + } + + spin_lock_init(&iommu->domain_bitmap_lock); + spin_lock_init(&iommu->dom_info_lock); + spin_lock_init(&iommu->pgtable_bitmap_lock); + mutex_init(&iommu->loongson_iommu_pgtlock); + + /* Add IOMMU to internal data structures */ + INIT_LIST_HEAD(&iommu->dom_list); + + list_add_tail(&iommu->list, &loongson_iommu_list); + + /* + * Copy data from ACPI table entry to the iommu struct + */ + iommu->devid = 0xd0; + iommu->segment = 0; + + for (dev_i = 0; dev_i <= loongson_iommu_last_bdf; ++dev_i) + set_iommu_for_device(iommu, dev_i); + + rlookupentry = lookup_rlooptable(iommu->segment); + if (rlookupentry != NULL) { + /* + * Make sure IOMMU is not considered to translate itself. + * The IVRS table tells us so, but this is a lie! + */ + rlookupentry->loongson_iommu_rlookup_table[iommu->devid] = NULL; + } + + return 0; +} + +static void free_iommu_rlookup_entry(void) +{ + loongson_iommu *iommu = NULL; + struct loongson_iommu_rlookup_entry *rlookupentry = NULL; + + while (!list_empty(&loongson_iommu_list)) { + iommu = list_first_entry(&loongson_iommu_list, loongson_iommu, list); + list_del(&iommu->list); + kfree(iommu); + } + + while (!list_empty(&loongson_rlookup_iommu_list)) { + rlookupentry = list_first_entry(&loongson_rlookup_iommu_list, + struct loongson_iommu_rlookup_entry, list); + + list_del(&rlookupentry->list); + if (rlookupentry->loongson_iommu_rlookup_table != NULL) { + free_pages( + (unsigned long)rlookupentry->loongson_iommu_rlookup_table, + get_order(rlookup_table_size)); + + rlookupentry->loongson_iommu_rlookup_table = NULL; + } + + kfree(rlookupentry); + } +} + +static int __init loonson_iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + + while (*str) { + if (!strncmp(str, "on", 2)) { + loongson_iommu_disable = 0; + pr_info("IOMMU enabled\n"); + } else if (!strncmp(str, "off", 3)) { + loongson_iommu_disable = 1; + pr_info("IOMMU disabled\n"); + } + str += strcspn(str, ","); + while (*str == ',') + str++; + } + return 0; +} +__setup("loongson_iommu=", loonson_iommu_setup); + +static const struct pci_device_id loongson_iommu_pci_tbl[] = { + { PCI_DEVICE(0x14, 0x7a1f) }, + { 0, } +}; + +static struct pci_driver loongson_iommu_driver = { + .name = "loongson-iommu", + .probe = loongson_iommu_probe, + .remove = loongson_iommu_remove, + .id_table = loongson_iommu_pci_tbl, +}; + +static int __init loongson_iommu_driver_init(void) +{ + int ret = 0; + + if (!loongson_iommu_disable) { + ret = loongson_iommu_ivrs_init(); + if (ret < 0) { + free_iommu_rlookup_entry(); + pr_err("Failed to init iommu by ivrs\n"); + ret = loongson_iommu_ivrs_init_stub(); + if (ret < 0) { + free_iommu_rlookup_entry(); + pr_err("Failed to init iommu by stub\n"); + return ret; + } + } + + ret = pci_register_driver(&loongson_iommu_driver); + if (ret < 0) { + pr_err("Failed to register IOMMU driver\n"); + return ret; + } + } + + return ret; +} + +static void __exit loongson_iommu_driver_exit(void) +{ + if (!loongson_iommu_disable) { + free_iommu_rlookup_entry(); + pci_unregister_driver(&loongson_iommu_driver); + } +} + +module_init(loongson_iommu_driver_init); +module_exit(loongson_iommu_driver_exit); diff --git a/drivers/iommu/loongson/iommu.h b/drivers/iommu/loongson/iommu.h new file mode 100644 index 000000000000..d50eb541f28d --- /dev/null +++ b/drivers/iommu/loongson/iommu.h @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Loongson IOMMU Driver + * + * Copyright (C) 2020-2021 Loongson Technology Ltd. + * Author: Lv Chen + * Wang Yang + */ + +#ifndef LOONGSON_IOMMU_H +#define LOONGSON_IOMMU_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#define IOVA_WIDTH 47 + +/* Bit value definition for I/O PTE fields */ +#define IOMMU_PTE_PR (1ULL << 0) /* Present */ +#define IOMMU_PTE_HP (1ULL << 1) /* HugePage */ +#define IOMMU_PTE_IR (1ULL << 2) /* Readable */ +#define IOMMU_PTE_IW (1ULL << 3) /* Writeable */ +#define IOMMU_PTE_RW (IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) + +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) +#define IOMMU_PTE_HUGEPAGE(pte) ((pte) & IOMMU_PTE_HP) + +#define iommu_pt_present(shadow_entry) ((*shadow_entry != 0)) + +/* + * shadow_entry using kmalloc to request memory space, + * the memory address requested by kmalloc is ARCH_DMA_MINALIGN-aligned, + * when the shadow_entry address is not a ARCH_DMA_MINALIGN-aligned + * address, we think that shadow_entry store large pages + */ +#define iommu_pt_huge(shd_entry_ptr) ((*shd_entry_ptr) & IOMMU_PTE_HP) + +#define LA_IOMMU_PGSIZE (SZ_16K | SZ_32M) + +#define IOMMU_PT_LEVEL0 0x00 +#define IOMMU_PT_LEVEL1 0x01 + +/* IOMMU page table */ +#define IOMMU_PAGE_SHIFT PAGE_SHIFT +#define IOMMU_PAGE_SIZE (_AC(1, UL) << IOMMU_PAGE_SHIFT) +#define IOMMU_LEVEL_STRIDE (IOMMU_PAGE_SHIFT - 3) +#define IOMMU_PTRS_PER_LEVEL (IOMMU_PAGE_SIZE >> 3) +#define IOMMU_LEVEL_SHIFT(n) (((n) * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT) +#define IOMMU_LEVEL_SIZE(n) (_AC(1, UL) << (((n) * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT)) +#define IOMMU_LEVEL_MASK(n) (~(IOMMU_LEVEL_SIZE(n) - 1)) +#define IOMMU_LEVEL_MAX DIV_ROUND_UP((IOVA_WIDTH - IOMMU_PAGE_SHIFT), IOMMU_LEVEL_STRIDE) +#define IOMMU_PAGE_MASK (~(IOMMU_PAGE_SIZE - 1)) + +#define IOMMU_HPAGE_SIZE (1UL << IOMMU_LEVEL_SHIFT(IOMMU_PT_LEVEL1)) +#define IOMMU_HPAGE_MASK (~(IOMMU_HPAGE_SIZE - 1)) + +/* Virtio page use size of 16k */ +#define LA_VIRTIO_PAGE_SHIFT 14 +#define LA_VIRTIO_PAGE_SIZE (_AC(1, UL) << LA_VIRTIO_PAGE_SHIFT) +#define LA_VIRTIO_PAGE_MASK (~((1ULL << LA_VIRTIO_PAGE_SHIFT) - 1)) + +/* Bits of iommu map address space field */ +#define LA_IOMMU_PFN_LO 0x0 +#define PFN_LO_SHIFT 12 +#define LA_IOMMU_PFN_HI 0x4 +#define PFN_HI_MASK 0x3ffff +#define LA_IOMMU_VFN_LO 0x8 +#define VFN_LO_SHIFT 12 +#define LA_IOMMU_VFN_HI 0xC +#define VFN_HI_MASK 0x3ffff + +/* wired | index | domain | shift */ +#define LA_IOMMU_WIDS 0x10 +/* valid | busy | tlbar/aw | cmd */ +#define LA_IOMMU_VBTC 0x14 +#define IOMMU_PGTABLE_BUSY (1 << 16) +/* enable |index | valid | domain | bdf */ +#define LA_IOMMU_EIVDB 0x18 +/* enable | valid | cmd */ +#define LA_IOMMU_CMD 0x1C +#define LA_IOMMU_PGD0_LO 0x20 +#define LA_IOMMU_PGD0_HI 0x24 +#define STEP_PGD 0x8 +#define STEP_PGD_SHIFT 3 +#define LA_IOMMU_PGD_LO(domain_id) \ + (LA_IOMMU_PGD0_LO + ((domain_id) << STEP_PGD_SHIFT)) +#define LA_IOMMU_PGD_HI(domain_id) \ + (LA_IOMMU_PGD0_HI + ((domain_id) << STEP_PGD_SHIFT)) + +#define LA_IOMMU_DIR_CTRL0 0xA0 +#define LA_IOMMU_DIR_CTRL1 0xA4 +#define LA_IOMMU_DIR_CTRL(x) (LA_IOMMU_DIR_CTRL0 + ((x) << 2)) + +#define LA_IOMMU_SAFE_BASE_HI 0xE0 +#define LA_IOMMU_SAFE_BASE_LO 0xE4 +#define LA_IOMMU_EX_ADDR_LO 0xE8 +#define LA_IOMMU_EX_ADDR_HI 0xEC + +#define LA_IOMMU_PFM_CNT_EN 0x100 + +#define LA_IOMMU_RD_HIT_CNT_0 0x110 +#define LA_IOMMU_RD_MISS_CNT_O 0x114 +#define LA_IOMMU_WR_HIT_CNT_0 0x118 +#define LA_IOMMU_WR_MISS_CNT_0 0x11C +#define LA_IOMMU_RD_HIT_CNT_1 0x120 +#define LA_IOMMU_RD_MISS_CNT_1 0x124 +#define LA_IOMMU_WR_HIT_CNT_1 0x128 +#define LA_IOMMU_WR_MISS_CNT_1 0x12C +#define LA_IOMMU_RD_HIT_CNT_2 0x130 +#define LA_IOMMU_RD_MISS_CNT_2 0x134 +#define LA_IOMMU_WR_HIT_CNT_2 0x138 +#define LA_IOMMU_WR_MISS_CNT_2 0x13C + +#define MAX_DOMAIN_ID 16 +#define MAX_ATTACHED_DEV_ID 16 +#define MAX_PAGES_NUM (SZ_128M / IOMMU_PAGE_SIZE) + +#define iommu_ptable_end(addr, end, level) \ +({ unsigned long __boundary = ((addr) + IOMMU_LEVEL_SIZE(level)) & IOMMU_LEVEL_MASK(level); \ + (__boundary - 1 < (end) - 1) ? __boundary : (end); \ +}) + +/* To find an entry in an iommu page table directory */ +#define iommu_shadow_index(addr, level) \ + (((addr) >> ((level * IOMMU_LEVEL_STRIDE) + IOMMU_PAGE_SHIFT)) & (IOMMU_PTRS_PER_LEVEL - 1)) + +/* IOMMU iommu_table entry */ +typedef struct { unsigned long iommu_pte; } iommu_pte; + +typedef struct loongson_iommu { + struct list_head list; + spinlock_t domain_bitmap_lock; /* Lock for domain allocing */ + spinlock_t dom_info_lock; /* Lock for priv->list */ + spinlock_t pgtable_bitmap_lock; /* Lock for bitmap of page table */ + struct mutex loongson_iommu_pgtlock; /* Lock for iommu page table */ + void *domain_bitmap; /* Bitmap of global domains */ + void *devtable_bitmap; /* Bitmap of devtable */ + void *pgtable_bitmap; /* Bitmap of devtable pages for page table */ + struct list_head dom_list; /* List of all domain privates */ + u16 devid; /* PCI device id of the IOMMU device */ + int segment; /* PCI segment# */ + void *membase; + void *pgtbase; + unsigned long maxpages; +} loongson_iommu; + +struct loongson_iommu_rlookup_entry +{ + struct list_head list; + struct loongson_iommu **loongson_iommu_rlookup_table; + int pcisegment; +}; + +/* shadow page table entry */ +typedef struct spt_entry { + unsigned long *gmem_ptable; /* gmemory entry address base*/ + unsigned long *shadow_ptable; /* virtual address base for shadow page */ + int index; /* index 128M gmem */ + int dirty; + int present; +} spt_entry; + +typedef struct iommu_info { + struct list_head list; /* for loongson_iommu_pri->iommu_devlist */ + spt_entry *shadow_pgd; + struct loongson_iommu *iommu; + spinlock_t devlock; /* priv dev list lock */ + struct list_head dev_list; /* List of all devices in this domain iommu */ + unsigned int dev_cnt; /* devices assigned to this domain iommu */ + short id; +} iommu_info; + +/* One vm is equal to a domain, one domain has a priv */ +typedef struct dom_info { + struct list_head list; /* For list of all domains */ + struct list_head iommu_devlist; + struct iommu_domain domain; + void *mmio_pgd; /* 0x10000000~0x8fffffff */ + spinlock_t lock; /* Lock for priv->iommu_devlist */ +} dom_info; + +/* A device for passthrough */ +struct loongson_iommu_dev_data { + struct list_head list; /* for iommu_entry->dev_list */ + struct loongson_iommu *iommu; + iommu_info *iommu_entry; + unsigned short bdf; + int count; + int index; /* index in device table */ +}; + +static inline unsigned long iommu_pgt_v2p(loongson_iommu *iommu, void *va) +{ + return (unsigned long)(va - iommu->pgtbase); +} + +static inline unsigned long *iommu_ptable_offset(unsigned long *table_entry, + unsigned long addr, int level) +{ + return table_entry + iommu_shadow_index(addr, level); +} + +static inline unsigned long *iommu_shadow_offset(spt_entry *shadow_entry, + unsigned long addr, int level) +{ + unsigned long *table_base; + + table_base = shadow_entry->shadow_ptable; + + return table_base + iommu_shadow_index(addr, level); +} + +#endif /* LOONGSON_IOMMU_H */ diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index e44bf736e2b2..3a625e39601d 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -22,7 +22,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64 || LOONGARCH) help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. -- Gitee From 38404ba44d65efcaec84b3dd674b4f9bf6fac995 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 27 Oct 2022 11:07:15 +0800 Subject: [PATCH 217/241] platform/loongarch: laptop: Adjust resume order for loongson_hotkey_resume() Some laptops don't support SW_LID, but still have backlight control, move backlight resuming before SW_LID event handling so as to avoid backlight mistake due to early return. Change-Id: I8a0ef96fdd3458684a116ee1267c44d24988dc42 Signed-off-by: Huacai Chen --- drivers/platform/loongarch/loongson-laptop.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/platform/loongarch/loongson-laptop.c b/drivers/platform/loongarch/loongson-laptop.c index e8981c436651..e47ba6b7b37e 100644 --- a/drivers/platform/loongarch/loongson-laptop.c +++ b/drivers/platform/loongarch/loongson-laptop.c @@ -209,6 +209,13 @@ static int loongson_hotkey_resume(struct device *dev) struct key_entry ke; struct backlight_device *bd; + bd = backlight_device_get_by_type(BACKLIGHT_PLATFORM); + if (bd) { + loongson_laptop_backlight_update(bd) ? + pr_warn("Loongson_backlight: resume brightness failed") : + pr_info("Loongson_backlight: resume brightness %d\n", bd->props.brightness); + } + /* * Only if the firmware supports SW_LID event model, we can handle the * event. This is for the consideration of development board without EC. @@ -238,13 +245,6 @@ static int loongson_hotkey_resume(struct device *dev) } } - bd = backlight_device_get_by_type(BACKLIGHT_PLATFORM); - if (bd) { - loongson_laptop_backlight_update(bd) ? - pr_warn("Loongson_backlight: resume brightness failed") : - pr_info("Loongson_backlight: resume brightness %d\n", bd->props.brightness); - } - return 0; } -- Gitee From dbc118bfbcb6b9dd577be310d39636162fc8b598 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 14 Oct 2022 09:52:12 +0800 Subject: [PATCH 218/241] platform/loongarch: laptop: Fix possible UAF and simplify generic_acpi_laptop_init() Currently the return value of 'sub_driver->init' is not checked. If sparse_keymap_setup() called in the init function fails, 'generic_ inputdev' is freed, then it will lead a UAF when using it in generic_ acpi_laptop_init(). Fix it by checking the return value and setting generic_inputdev to NULL after free, so as to avoid double free it. The error code in generic_subdriver_init() is always negative, so the return of generic_subdriver_init() can be simplified. Change-Id: I49095c15fc028984aeadbd81a3d792fafec3fd16 Fixes: 6246ed09111f ("LoongArch: Add ACPI-based generic laptop driver") Signed-off-by: Yang Yingliang Signed-off-by: Huacai Chen --- drivers/platform/loongarch/loongson-laptop.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/platform/loongarch/loongson-laptop.c b/drivers/platform/loongarch/loongson-laptop.c index e47ba6b7b37e..26b136a5d5e5 100644 --- a/drivers/platform/loongarch/loongson-laptop.c +++ b/drivers/platform/loongarch/loongson-laptop.c @@ -456,6 +456,7 @@ static int __init event_init(struct generic_sub_driver *sub_driver) if (ret < 0) { pr_err("Failed to setup input device keymap\n"); input_free_device(generic_inputdev); + generic_inputdev = NULL; return ret; } @@ -509,8 +510,11 @@ static int __init generic_subdriver_init(struct generic_sub_driver *sub_driver) if (ret) return -EINVAL; - if (sub_driver->init) - sub_driver->init(sub_driver); + if (sub_driver->init) { + ret = sub_driver->init(sub_driver); + if (ret) + goto err_out; + } if (sub_driver->notify) { ret = setup_acpi_notify(sub_driver); @@ -526,7 +530,7 @@ static int __init generic_subdriver_init(struct generic_sub_driver *sub_driver) err_out: generic_subdriver_exit(sub_driver); - return (ret < 0) ? ret : 0; + return ret; } static void generic_subdriver_exit(struct generic_sub_driver *sub_driver) -- Gitee From 37fefeef5205739a70fd248f056cc2e59d3a59ed Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Thu, 19 Nov 2020 15:08:39 +0800 Subject: [PATCH 219/241] ipmi: msghandler: Suppress suspicious RCU usage warning while running ipmi, ipmi_smi_watcher_register() caused a suspicious RCU usage warning. ----- ============================= WARNING: suspicious RCU usage 5.10.0-rc3+ #1 Not tainted ----------------------------- drivers/char/ipmi/ipmi_msghandler.c:750 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by syz-executor.0/4254: stack backtrace: CPU: 0 PID: 4254 Comm: syz-executor.0 Not tainted 5.10.0-rc3+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/ 01/2014 Call Trace: dump_stack+0x19d/0x200 ipmi_smi_watcher_register+0x2d3/0x340 [ipmi_msghandler] acpi_ipmi_init+0xb1/0x1000 [acpi_ipmi] do_one_initcall+0x149/0x7e0 do_init_module+0x1ef/0x700 load_module+0x3467/0x4140 __do_sys_finit_module+0x10d/0x1a0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x468ded ----- It is safe because smi_watchers_mutex is locked and srcu_read_lock has been used, so simply pass lockdep_is_held() to the list_for_each_entry_rcu() to suppress this warning. Change-Id: Ia2f4b9a427f7b9993d44d855b6691fe0c9412451 Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Message-Id: <20201119070839.381-1-miaoqinglang@huawei.com> Signed-off-by: Corey Minyard Signed-off-by: Huacai Chen --- drivers/char/ipmi/ipmi_msghandler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index b89f300751b1..e955ed7497c0 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -749,7 +749,8 @@ int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher) list_add(&watcher->link, &smi_watchers); index = srcu_read_lock(&ipmi_interfaces_srcu); - list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + list_for_each_entry_rcu(intf, &ipmi_interfaces, link, + lockdep_is_held(&smi_watchers_mutex)) { int intf_num = READ_ONCE(intf->intf_num); if (intf_num == -1) -- Gitee From f99142f58afa41d8dacdf8507e10c9f38a07f53b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 220/241] Input: i8042 - Add PNP checking hook for Loongson Add PNP checking related functions for Loongson, so that i8042 driver can work well under the ACPI firmware with PNP typed keyboard and mouse configured in DSDT. Change-Id: Ied4d507780d95aa264cae3e553fd5f2e29d06486 Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen --- drivers/input/serio/i8042-loongsonio.h | 330 +++++++++++++++++++++++++ drivers/input/serio/i8042.h | 2 + 2 files changed, 332 insertions(+) create mode 100644 drivers/input/serio/i8042-loongsonio.h diff --git a/drivers/input/serio/i8042-loongsonio.h b/drivers/input/serio/i8042-loongsonio.h new file mode 100644 index 000000000000..2ea83b14f13d --- /dev/null +++ b/drivers/input/serio/i8042-loongsonio.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * i8042-loongsonio.h + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Author: Jianmin Lv + * Huacai Chen + */ + +#ifndef _I8042_LOONGSONIO_H +#define _I8042_LOONGSONIO_H + +/* + * Names. + */ + +#define I8042_KBD_PHYS_DESC "isa0060/serio0" +#define I8042_AUX_PHYS_DESC "isa0060/serio1" +#define I8042_MUX_PHYS_DESC "isa0060/serio%d" + +/* + * IRQs. + */ +#define I8042_MAP_IRQ(x) (x) + +#define I8042_KBD_IRQ i8042_kbd_irq +#define I8042_AUX_IRQ i8042_aux_irq + +static int i8042_kbd_irq; +static int i8042_aux_irq; + +/* + * Register numbers. + */ + +#define I8042_COMMAND_REG i8042_command_reg +#define I8042_STATUS_REG i8042_command_reg +#define I8042_DATA_REG i8042_data_reg + +static int i8042_command_reg = 0x64; +static int i8042_data_reg = 0x60; + + +static inline int i8042_read_data(void) +{ + return inb(I8042_DATA_REG); +} + +static inline int i8042_read_status(void) +{ + return inb(I8042_STATUS_REG); +} + +static inline void i8042_write_data(int val) +{ + outb(val, I8042_DATA_REG); +} + +static inline void i8042_write_command(int val) +{ + outb(val, I8042_COMMAND_REG); +} + +#ifdef CONFIG_PNP +#include + +static bool i8042_pnp_kbd_registered; +static unsigned int i8042_pnp_kbd_devices; +static bool i8042_pnp_aux_registered; +static unsigned int i8042_pnp_aux_devices; + +static int i8042_pnp_command_reg; +static int i8042_pnp_data_reg; +static int i8042_pnp_kbd_irq; +static int i8042_pnp_aux_irq; + +static char i8042_pnp_kbd_name[32]; +static char i8042_pnp_aux_name[32]; + +static void i8042_pnp_id_to_string(struct pnp_id *id, char *dst, int dst_size) +{ + strlcpy(dst, "PNP:", dst_size); + + while (id) { + strlcat(dst, " ", dst_size); + strlcat(dst, id->id, dst_size); + id = id->next; + } +} + +static int i8042_pnp_kbd_probe(struct pnp_dev *dev, + const struct pnp_device_id *did) +{ + if (pnp_port_valid(dev, 0) && pnp_port_len(dev, 0) == 1) + i8042_pnp_data_reg = pnp_port_start(dev, 0); + + if (pnp_port_valid(dev, 1) && pnp_port_len(dev, 1) == 1) + i8042_pnp_command_reg = pnp_port_start(dev, 1); + + if (pnp_irq_valid(dev, 0)) + i8042_pnp_kbd_irq = pnp_irq(dev, 0); + + strlcpy(i8042_pnp_kbd_name, did->id, sizeof(i8042_pnp_kbd_name)); + if (strlen(pnp_dev_name(dev))) { + strlcat(i8042_pnp_kbd_name, ":", sizeof(i8042_pnp_kbd_name)); + strlcat(i8042_pnp_kbd_name, pnp_dev_name(dev), + sizeof(i8042_pnp_kbd_name)); + } + i8042_pnp_id_to_string(dev->id, i8042_kbd_firmware_id, + sizeof(i8042_kbd_firmware_id)); + + /* Keyboard ports are always supposed to be wakeup-enabled */ + device_set_wakeup_enable(&dev->dev, true); + + i8042_pnp_kbd_devices++; + return 0; +} + +static int i8042_pnp_aux_probe(struct pnp_dev *dev, + const struct pnp_device_id *did) +{ + if (pnp_port_valid(dev, 0) && pnp_port_len(dev, 0) == 1) + i8042_pnp_data_reg = pnp_port_start(dev, 0); + + if (pnp_port_valid(dev, 1) && pnp_port_len(dev, 1) == 1) + i8042_pnp_command_reg = pnp_port_start(dev, 1); + + if (pnp_irq_valid(dev, 0)) + i8042_pnp_aux_irq = pnp_irq(dev, 0); + + strlcpy(i8042_pnp_aux_name, did->id, sizeof(i8042_pnp_aux_name)); + if (strlen(pnp_dev_name(dev))) { + strlcat(i8042_pnp_aux_name, ":", sizeof(i8042_pnp_aux_name)); + strlcat(i8042_pnp_aux_name, pnp_dev_name(dev), + sizeof(i8042_pnp_aux_name)); + } + i8042_pnp_id_to_string(dev->id, i8042_aux_firmware_id, + sizeof(i8042_aux_firmware_id)); + + i8042_pnp_aux_devices++; + return 0; +} + +static const struct pnp_device_id pnp_kbd_devids[] = { + { .id = "PNP0300", .driver_data = 0 }, + { .id = "PNP0301", .driver_data = 0 }, + { .id = "PNP0302", .driver_data = 0 }, + { .id = "PNP0303", .driver_data = 0 }, + { .id = "PNP0304", .driver_data = 0 }, + { .id = "PNP0305", .driver_data = 0 }, + { .id = "PNP0306", .driver_data = 0 }, + { .id = "PNP0309", .driver_data = 0 }, + { .id = "PNP030a", .driver_data = 0 }, + { .id = "PNP030b", .driver_data = 0 }, + { .id = "PNP0320", .driver_data = 0 }, + { .id = "PNP0343", .driver_data = 0 }, + { .id = "PNP0344", .driver_data = 0 }, + { .id = "PNP0345", .driver_data = 0 }, + { .id = "CPQA0D7", .driver_data = 0 }, + { .id = "", }, +}; +MODULE_DEVICE_TABLE(pnp, pnp_kbd_devids); + +static struct pnp_driver i8042_pnp_kbd_driver = { + .name = "i8042 kbd", + .id_table = pnp_kbd_devids, + .probe = i8042_pnp_kbd_probe, + .driver = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, + }, +}; + +static const struct pnp_device_id pnp_aux_devids[] = { + { .id = "AUI0200", .driver_data = 0 }, + { .id = "FJC6000", .driver_data = 0 }, + { .id = "FJC6001", .driver_data = 0 }, + { .id = "PNP0f03", .driver_data = 0 }, + { .id = "PNP0f0b", .driver_data = 0 }, + { .id = "PNP0f0e", .driver_data = 0 }, + { .id = "PNP0f12", .driver_data = 0 }, + { .id = "PNP0f13", .driver_data = 0 }, + { .id = "PNP0f19", .driver_data = 0 }, + { .id = "PNP0f1c", .driver_data = 0 }, + { .id = "SYN0801", .driver_data = 0 }, + { .id = "", }, +}; +MODULE_DEVICE_TABLE(pnp, pnp_aux_devids); + +static struct pnp_driver i8042_pnp_aux_driver = { + .name = "i8042 aux", + .id_table = pnp_aux_devids, + .probe = i8042_pnp_aux_probe, + .driver = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, + }, +}; + +static void i8042_pnp_exit(void) +{ + if (i8042_pnp_kbd_registered) { + i8042_pnp_kbd_registered = false; + pnp_unregister_driver(&i8042_pnp_kbd_driver); + } + + if (i8042_pnp_aux_registered) { + i8042_pnp_aux_registered = false; + pnp_unregister_driver(&i8042_pnp_aux_driver); + } +} +#ifdef CONFIG_ACPI +#include +#endif +static int __init i8042_pnp_init(void) +{ + char kbd_irq_str[4] = { 0 }, aux_irq_str[4] = { 0 }; + bool pnp_data_busted = false; + int err; + + if (i8042_nopnp) { + pr_info("PNP detection disabled\n"); + return 0; + } + + err = pnp_register_driver(&i8042_pnp_kbd_driver); + if (!err) + i8042_pnp_kbd_registered = true; + + err = pnp_register_driver(&i8042_pnp_aux_driver); + if (!err) + i8042_pnp_aux_registered = true; + + if (!i8042_pnp_kbd_devices && !i8042_pnp_aux_devices) { + i8042_pnp_exit(); + pr_info("PNP: No PS/2 controller found.\n"); +#ifdef CONFIG_ACPI + if (acpi_disabled == 0) + return -ENODEV; +#endif + pr_info("Probing ports directly.\n"); + return 0; + } + + if (i8042_pnp_kbd_devices) + snprintf(kbd_irq_str, sizeof(kbd_irq_str), + "%d", i8042_pnp_kbd_irq); + if (i8042_pnp_aux_devices) + snprintf(aux_irq_str, sizeof(aux_irq_str), + "%d", i8042_pnp_aux_irq); + + pr_info("PNP: PS/2 Controller [%s%s%s] at %#x,%#x irq %s%s%s\n", + i8042_pnp_kbd_name, + (i8042_pnp_kbd_devices && i8042_pnp_aux_devices) ? "," : "", + i8042_pnp_aux_name, + i8042_pnp_data_reg, i8042_pnp_command_reg, + kbd_irq_str, + (i8042_pnp_kbd_devices && i8042_pnp_aux_devices) ? "," : "", + aux_irq_str); + + if (((i8042_pnp_data_reg & ~0xf) == (i8042_data_reg & ~0xf) && + i8042_pnp_data_reg != i8042_data_reg) || + !i8042_pnp_data_reg) { + pr_warn("PNP: PS/2 controller has invalid data port %#x; using default %#x\n", + i8042_pnp_data_reg, i8042_data_reg); + i8042_pnp_data_reg = i8042_data_reg; + pnp_data_busted = true; + } + + if (((i8042_pnp_command_reg & ~0xf) == (i8042_command_reg & ~0xf) && + i8042_pnp_command_reg != i8042_command_reg) || + !i8042_pnp_command_reg) { + pr_warn("PNP: PS/2 controller has invalid command port %#x; using default %#x\n", + i8042_pnp_command_reg, i8042_command_reg); + i8042_pnp_command_reg = i8042_command_reg; + pnp_data_busted = true; + } + + if (!i8042_nokbd && !i8042_pnp_kbd_irq) { + pr_warn("PNP: PS/2 controller doesn't have KBD irq; using default %d\n", + i8042_kbd_irq); + i8042_pnp_kbd_irq = i8042_kbd_irq; + pnp_data_busted = true; + } + + if (!i8042_noaux && !i8042_pnp_aux_irq) { + if (!pnp_data_busted && i8042_pnp_kbd_irq) { + pr_warn("PNP: PS/2 appears to have AUX port disabled, " + "if this is incorrect please boot with i8042.nopnp\n"); + i8042_noaux = true; + } else { + pr_warn("PNP: PS/2 controller doesn't have AUX irq; using default %d\n", + i8042_aux_irq); + i8042_pnp_aux_irq = i8042_aux_irq; + } + } + + i8042_data_reg = i8042_pnp_data_reg; + i8042_command_reg = i8042_pnp_command_reg; + i8042_kbd_irq = i8042_pnp_kbd_irq; + i8042_aux_irq = i8042_pnp_aux_irq; + + return 0; +} + +#else /* !CONFIG_PNP */ +static inline int i8042_pnp_init(void) { return 0; } +static inline void i8042_pnp_exit(void) { } +#endif /* CONFIG_PNP */ + +static int __init i8042_platform_init(void) +{ + int retval; + + i8042_kbd_irq = I8042_MAP_IRQ(1); + i8042_aux_irq = I8042_MAP_IRQ(12); + + retval = i8042_pnp_init(); + if (retval) + return retval; + + return retval; +} + +static inline void i8042_platform_exit(void) +{ + i8042_pnp_exit(); +} + +#endif /* _I8042_LOONGSONIO_H */ diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h index bf2592fa9a78..6ce5cc007b0f 100644 --- a/drivers/input/serio/i8042.h +++ b/drivers/input/serio/i8042.h @@ -19,6 +19,8 @@ #include "i8042-snirm.h" #elif defined(CONFIG_SPARC) #include "i8042-sparcio.h" +#elif defined(CONFIG_MACH_LOONGSON64) +#include "i8042-loongsonio.h" #elif defined(CONFIG_X86) || defined(CONFIG_IA64) #include "i8042-acpipnpio.h" #else -- Gitee From 41668b6e0ce182bbc52b9c5920fc31f5eb7109b6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 23 Oct 2021 10:14:30 +0800 Subject: [PATCH 221/241] USB: Fix OHCI/XHCI wakeup problems Change-Id: I994eb4600a934a586bed32d8bf218168f9e831e8 Signed-off-by: Huacai Chen --- drivers/usb/host/ohci-hcd.c | 4 +++- drivers/usb/host/ohci-hub.c | 3 +++ drivers/usb/host/pci-quirks.c | 4 ++++ drivers/usb/host/xhci-pci.c | 10 ++++++++++ drivers/usb/host/xhci.h | 1 + 5 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 73e13e7c2b46..0a859891bf35 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -1132,8 +1132,10 @@ int ohci_resume(struct usb_hcd *hcd, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); /* Make sure resume from hibernation re-enumerates everything */ - if (hibernated) + if (hibernated) { + ohci_writel(ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable); ohci_usb_reset(ohci); + } /* See if the controller is already running or has been reset */ ohci->hc_control = ohci_readl(ohci, &ohci->regs->control); diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 44504c1751e0..865123024568 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -91,6 +91,9 @@ __acquires(ohci->lock) update_done_list(ohci); ohci_work(ohci); + /* All ED unlinks should be finished, no need for SOF interrupts */ + ohci_writel(ohci, OHCI_INTR_SF, &ohci->regs->intrdisable); + /* * Some controllers don't handle "global" suspend properly if * there are unsuspended ports. For these controllers, put all diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index ef08d68b9714..2975e592e231 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -948,6 +948,10 @@ static void quirk_usb_disable_ehci(struct pci_dev *pdev) * booting from USB disk or using a usb keyboard */ hcc_params = readl(base + EHCI_HCC_PARAMS); + if (pdev->vendor == PCI_VENDOR_ID_LOONGSON && + pdev->device == PCI_DEVICE_ID_LOONGSON_EHCI) + hcc_params &= ~(0xffL << 8); + offset = (hcc_params >> 8) & 0xff; while (offset && --count) { pci_read_config_dword(pdev, offset, &cap); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 8034e643a4af..9ba0c27c06e6 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -274,11 +274,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x0014) { xhci->quirks |= XHCI_TRUST_TX_LENGTH; xhci->quirks |= XHCI_ZERO_64B_REGS; + xhci->quirks |= XHCI_LWP_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0015) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_ZERO_64B_REGS; + xhci->quirks |= XHCI_LWP_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_VIA) xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -542,6 +544,10 @@ static void xhci_pci_remove(struct pci_dev *dev) if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(dev, PCI_D3hot); + /* Workaround for decreasing power consumption after S5 */ + if (xhci->quirks & XHCI_LWP_QUIRK) + pci_set_power_state(dev, PCI_D3hot); + usb_hcd_pci_remove(dev); } @@ -687,6 +693,10 @@ static void xhci_pci_shutdown(struct usb_hcd *hcd) /* Yet another workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(pdev, PCI_D3hot); + + /* Workaround for decreasing power consumption after S5 */ + if (xhci->quirks & XHCI_LWP_QUIRK) + pci_set_power_state(pdev, PCI_D3hot); } #endif /* CONFIG_PM */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 5a8443f6ed70..c6e6d73fa6f6 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1899,6 +1899,7 @@ struct xhci_hcd { #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) #define XHCI_ZHAOXIN_HOST BIT_ULL(46) +#define XHCI_LWP_QUIRK BIT_ULL(47) unsigned int num_active_eps; unsigned int limit_active_eps; -- Gitee From 35f5a6bbb2a2663001d63ce577e014185201a761 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 16 Feb 2021 09:02:36 -0500 Subject: [PATCH 222/241] drm/radeon: OLAND boards don't have VCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable it on those boards. No functional change, this just removes the message about VCE failing to initialize. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=197327 Change-Id: Ibdd7b4c8988f80dcfe7e7e178bfeab4e0729523f Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Huacai Chen --- drivers/gpu/drm/radeon/radeon_asic.c | 3 +++ drivers/gpu/drm/radeon/radeon_vce.c | 1 - drivers/gpu/drm/radeon/vce_v1_0.c | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 495700d16fc9..c551420268c7 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2477,6 +2477,9 @@ int radeon_asic_init(struct radeon_device *rdev) if (rdev->family == CHIP_HAINAN) { rdev->has_uvd = false; rdev->has_vce = false; + } else if (rdev->family == CHIP_OLAND) { + rdev->has_uvd = true; + rdev->has_vce = false; } else { rdev->has_uvd = true; rdev->has_vce = true; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 5e8006444704..7f12bef4f562 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -68,7 +68,6 @@ int radeon_vce_init(struct radeon_device *rdev) case CHIP_TAHITI: case CHIP_PITCAIRN: case CHIP_VERDE: - case CHIP_OLAND: case CHIP_ARUBA: fw_name = FIRMWARE_TAHITI; break; diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index bd75bbcf5bf6..028213eabd94 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -168,7 +168,6 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) chip_id = 0x01000015; break; case CHIP_PITCAIRN: - case CHIP_OLAND: chip_id = 0x01000016; break; case CHIP_ARUBA: -- Gitee From 02eedb34c93ca7396f9fe09d59496ce8b800b303 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 22 Feb 2021 10:53:47 +0800 Subject: [PATCH 223/241] drm/radeon: Workaround radeon driver bug for Loongson Radeon driver can not handle the interrupt is faster than DMA data, so irq handler must update an old ih.rptr value in IH_RB_RPTR register to enable interrupt again when interrupt is faster than DMA data. Change-Id: Iedf08b0e4b0338ce2334d418dac26a2be2dc19d6 Signed-off-by: Huacai Chen Signed-off-by: Zhijie Zhang --- drivers/gpu/drm/radeon/cik.c | 1 + drivers/gpu/drm/radeon/evergreen.c | 1 + drivers/gpu/drm/radeon/r600.c | 1 + drivers/gpu/drm/radeon/si.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 13f25ec1fe25..4d1c4a2bf909 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -8108,6 +8108,7 @@ int cik_irq_process(struct radeon_device *rdev) if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 14d90dc376e7..11e3e99a9f01 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4919,6 +4919,7 @@ int evergreen_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index d9a33ca768f3..cd5418ccf20e 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4331,6 +4331,7 @@ int r600_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 31e2c1083b08..24581db5161a 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6443,6 +6443,7 @@ int si_irq_process(struct radeon_device *rdev) if (queue_thermal && rdev->pm.dpm_enabled) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; + WREG32(IH_RB_RPTR, rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ -- Gitee From f4aeb6c3a7e834f446cf0410f3b913603e19942d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 20 Feb 2024 14:28:02 +0800 Subject: [PATCH 224/241] drm/radeon: Call mmiowb() at the end of radeon_ring_commit() Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of mmiowb()") remove all mmiowb() in drivers, but it says: "NOTE: mmiowb() has only ever guaranteed ordering in conjunction with spin_unlock(). However, pairing each mmiowb() removal in this patch with the corresponding call to spin_unlock() is not at all trivial, so there is a small chance that this change may regress any drivers incorrectly relying on mmiowb() to order MMIO writes between CPUs using lock-free synchronisation." The mmio in radeon_ring_commit() is protected by a mutex rather than a spinlock, but in the mutex fastpath it behaves similar to spinlock and need a mmiowb() to make sure the wptr is up-to-date for hardware. Without this, we get such an error when run 'glxgears' on weak ordering architectures such as LoongArch: radeon 0000:04:00.0: ring 0 stalled for more than 10324msec radeon 0000:04:00.0: ring 3 stalled for more than 10240msec radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3) radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) radeon 0000:04:00.0: scheduling IB failed (-35). [drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35) Change-Id: Ib8d7d1b53ad80a2bc03d3121b4984ec54c8a481f Signed-off-by: Tianyang Zhang Signed-off-by: Huacai Chen --- drivers/gpu/drm/radeon/radeon_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 37093cea24c5..a5706f144b3a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -184,6 +184,7 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, if (hdp_flush && rdev->asic->mmio_hdp_flush) rdev->asic->mmio_hdp_flush(rdev); radeon_ring_set_wptr(rdev, ring); + mmiowb(); /* Make sure wptr is up-to-date for hw */ } /** -- Gitee From c23186fc7a53d494717cff08e9b733ecb98b5020 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Nov 2020 16:18:09 -0500 Subject: [PATCH 225/241] drm/amdgpu/display: FP fixes for DCN3.x (v4) Add proper FP_START/END handling and adjust Makefiles per previous asics. v2: fix up harder. v3: fix clkmgr Makefile for dcn30 v4: fix old gcc handling is only required for x86 Change-Id: Ibefb63e7c88bff89db135bd5255f68f58077c1cc Build-tested-by: Nathan Chancellor Reviewed-by: Harry Wentland (v1) Reviewed-by: Nicholas Kazlauskas (v1) Signed-off-by: Alex Deucher Signed-off-by: Huacai Chen --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 13 ++++ .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 +- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 2 + .../drm/amd/display/dc/dcn30/dcn30_resource.c | 71 +++++++++++++++++-- 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 52b1ce775a1e..90d19fdd6607 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -114,6 +114,19 @@ ifdef CONFIG_DRM_AMD_DC_DCN3_0 ############################################################################### CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o +# prevent build errors regarding soft-float vs hard-float FP ABI tags +# this code is currently unused on ppc64, as it applies to VanGogh APUs only +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) +endif + +# prevent build errors: +# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types +# this file is unused on arm64, just like on ppc64 +ifdef CONFIG_ARM64 +CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only +endif + AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index cab47bb21172..3069110fe62a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -104,7 +104,7 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk } } -static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) +static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { /* defaults */ double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; @@ -201,7 +201,9 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) clk_mgr_base->funcs->get_memclk_states_from_smu(clk_mgr_base); /* WM range table */ + DC_FP_START(); dcn3_build_wm_range_table(clk_mgr); + DC_FP_END(); } static int dcn30_get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index bd2a068f9863..248c2711aace 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -52,6 +52,7 @@ IS_OLD_GCC = 1 endif endif +ifdef CONFIG_X86 ifdef IS_OLD_GCC # Stack alignment mismatch, proceed with caution. # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 @@ -62,6 +63,7 @@ else CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 endif +endif AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 32993ce24a58..ca3044122834 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1465,7 +1465,19 @@ int dcn30_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn30_populate_dml_writeback_from_context( +/* + * This must be noinline to ensure anything that deals with FP registers + * is contained within this call; previously our compiling with hard-float + * would result in fp instructions being emitted outside of the boundaries + * of the DC_FP_START/END macros, which makes sense as the compiler has no + * idea about what is wrapped and what is not + * + * This is largely just a workaround to avoid breakage introduced with 5.6, + * ideally all fp-using code should be moved into its own file, only that + * should be compiled with hard-float, and all code exported from there + * should be strictly wrapped with DC_FP_START/END + */ +static noinline void dcn30_populate_dml_writeback_from_context_fp( struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) { int pipe_cnt, i, j; @@ -1554,6 +1566,14 @@ void dcn30_populate_dml_writeback_from_context( } +void dcn30_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn30_populate_dml_writeback_from_context_fp(dc, res_ctx, pipes); + DC_FP_END(); +} + unsigned int dcn30_calc_max_scaled_time( unsigned int time_per_pixel, enum mmhubbub_wbif_mode mode, @@ -2202,7 +2222,19 @@ static bool dcn30_internal_validate_bw( return out; } -void dcn30_calculate_wm_and_dlg( +/* + * This must be noinline to ensure anything that deals with FP registers + * is contained within this call; previously our compiling with hard-float + * would result in fp instructions being emitted outside of the boundaries + * of the DC_FP_START/END macros, which makes sense as the compiler has no + * idea about what is wrapped and what is not + * + * This is largely just a workaround to avoid breakage introduced with 5.6, + * ideally all fp-using code should be moved into its own file, only that + * should be compiled with hard-float, and all code exported from there + * should be strictly wrapped with DC_FP_START/END + */ +static noinline void dcn30_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, @@ -2358,7 +2390,18 @@ void dcn30_calculate_wm_and_dlg( dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; } -bool dcn30_validate_bandwidth(struct dc *dc, +void dcn30_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn30_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + +static noinline bool dcn30_validate_bandwidth_fp(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -2409,7 +2452,20 @@ bool dcn30_validate_bandwidth(struct dc *dc, return out; } -static void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, +bool dcn30_validate_bandwidth(struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool out; + + DC_FP_START(); + out = dcn30_validate_bandwidth_fp(dc, context, fast_validate); + DC_FP_END(); + + return out; +} + +static noinline void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, unsigned int *optimal_dcfclk, unsigned int *optimal_fclk) { @@ -2497,8 +2553,10 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params // Calculate optimal dcfclk for each uclk for (i = 0; i < num_uclk_states; i++) { + DC_FP_START(); get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, &optimal_dcfclk_for_uclk[i], NULL); + DC_FP_END(); if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; } @@ -2602,6 +2660,8 @@ static bool dcn30_resource_construct( struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; + DC_FP_START(); + ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn3; @@ -2866,10 +2926,13 @@ static bool dcn30_resource_construct( dc->cap_funcs = cap_funcs; + DC_FP_END(); + return true; create_fail: + DC_FP_END(); dcn30_resource_destruct(pool); return false; -- Gitee From 95fa0c6a99009cd0abc410ec2d2f34954b553ef2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 10 Nov 2020 18:25:01 -0500 Subject: [PATCH 226/241] drm/amdgpu/display: fix FP handling in DCN30 Adjust the FP handling to avoid nested calls. The nested calls cause the warning below WARNING: CPU: 3 PID: 384 at arch/x86/kernel/fpu/core.c:129 kernel_fpu_begin Change-Id: I97d5bab3a33f3d3f50768dc5813dd8d9316d0115 Fixes: 582e2ce5b4ece3 ("drm/amdgpu/display: FP fixes for DCN3.x (v4)") Signed-off-by: Alex Deucher Signed-off-by: Bhawanpreet Lakha Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Signed-off-by: Huacai Chen --- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 43 +++---------------- 1 file changed, 6 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index ca3044122834..6873b053bb3d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1465,20 +1465,8 @@ int dcn30_populate_dml_pipes_from_context( return pipe_cnt; } -/* - * This must be noinline to ensure anything that deals with FP registers - * is contained within this call; previously our compiling with hard-float - * would result in fp instructions being emitted outside of the boundaries - * of the DC_FP_START/END macros, which makes sense as the compiler has no - * idea about what is wrapped and what is not - * - * This is largely just a workaround to avoid breakage introduced with 5.6, - * ideally all fp-using code should be moved into its own file, only that - * should be compiled with hard-float, and all code exported from there - * should be strictly wrapped with DC_FP_START/END - */ -static noinline void dcn30_populate_dml_writeback_from_context_fp( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) +void dcn30_populate_dml_writeback_from_context( + struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) { int pipe_cnt, i, j; double max_calc_writeback_dispclk; @@ -1566,14 +1554,6 @@ static noinline void dcn30_populate_dml_writeback_from_context_fp( } -void dcn30_populate_dml_writeback_from_context( - struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) -{ - DC_FP_START(); - dcn30_populate_dml_writeback_from_context_fp(dc, res_ctx, pipes); - DC_FP_END(); -} - unsigned int dcn30_calc_max_scaled_time( unsigned int time_per_pixel, enum mmhubbub_wbif_mode mode, @@ -1960,7 +1940,7 @@ static struct pipe_ctx *dcn30_find_split_pipe( return pipe; } -static bool dcn30_internal_validate_bw( +static noinline bool dcn30_internal_validate_bw( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1982,6 +1962,7 @@ static bool dcn30_internal_validate_bw( pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes); + DC_FP_START(); if (!pipe_cnt) { out = true; goto validate_out; @@ -2219,6 +2200,7 @@ static bool dcn30_internal_validate_bw( out = false; validate_out: + DC_FP_END(); return out; } @@ -2401,7 +2383,7 @@ void dcn30_calculate_wm_and_dlg( DC_FP_END(); } -static noinline bool dcn30_validate_bandwidth_fp(struct dc *dc, +bool dcn30_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { @@ -2452,19 +2434,6 @@ static noinline bool dcn30_validate_bandwidth_fp(struct dc *dc, return out; } -bool dcn30_validate_bandwidth(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out; - - DC_FP_START(); - out = dcn30_validate_bandwidth_fp(dc, context, fast_validate); - DC_FP_END(); - - return out; -} - static noinline void get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, unsigned int *optimal_dcfclk, unsigned int *optimal_fclk) -- Gitee From e5f9c5ed58da7f21dae46ade0bf3902199d81f94 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 25 Aug 2021 11:53:05 -0400 Subject: [PATCH 227/241] drm/amd/display: Remove duplicate dml init [Why & How] DML is initialized again unnecessarily after its done conditionally. Remove the duplicate initialization Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1403 Change-Id: Ia9b799b3c53359f416192b33bcfdab02e23efaa8 Signed-off-by: Aurabindo Pillai Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Huacai Chen --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 6873b053bb3d..0a805a06a07a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2593,11 +2593,6 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (dc->current_state) dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); } - - /* re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); } static const struct resource_funcs dcn30_res_pool_funcs = { -- Gitee From d00debec29a07b5f8f211029d64ab869d68ae9a8 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 24 Jan 2022 01:23:35 +0100 Subject: [PATCH 228/241] drm/amd/display: Fix FP start/end for dcn30_internal_validate_bw. It calls populate_dml_pipes which uses doubles to initialize the scale_ratio_depth params. Mirrors the dcn20 logic. Change-Id: If3b4f080541b86fc9a474f6f41fb03c7c273b5de Cc: stable@vger.kernel.org Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher Signed-off-by: Huacai Chen --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 0a805a06a07a..ecdb6d033bad 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1962,7 +1962,6 @@ static noinline bool dcn30_internal_validate_bw( pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes); - DC_FP_START(); if (!pipe_cnt) { out = true; goto validate_out; @@ -2200,7 +2199,6 @@ static noinline bool dcn30_internal_validate_bw( out = false; validate_out: - DC_FP_END(); return out; } @@ -2398,7 +2396,9 @@ bool dcn30_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; -- Gitee From 35cbfc050e49ed97017b79592fe612048441be0d Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 12 Feb 2022 16:40:00 +0100 Subject: [PATCH 229/241] drm/amd/display: Protect update_bw_bounding_box FPU code. For DCN3/3.01/3.02 at least these use the fpu. v2: squash in build fix for when DCN is not enabled (Leo) Change-Id: I36f81dc40a2b894ed35dcd9188e8018906fa7509 Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- .../gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 2 ++ drivers/gpu/drm/amd/display/dc/core/dc.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 3069110fe62a..5e7cfd87bb72 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -418,8 +418,10 @@ static void dcn3_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base) clk_mgr_base->bw_params->clk_table.num_entries = num_levels ? num_levels : 1; /* Refresh bounding box */ + DC_FP_START(); clk_mgr_base->ctx->dc->res_pool->funcs->update_bw_bounding_box( clk_mgr->base.ctx->dc, clk_mgr_base->bw_params); + DC_FP_END(); } static bool dcn3_are_clock_states_equal(struct dc_clocks *a, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 36a9e9c84ed4..605ef1bd8cbc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -734,12 +734,15 @@ static bool dc_construct(struct dc *dc, goto fail; #ifdef CONFIG_DRM_AMD_DC_DCN3_0 dc->clk_mgr->force_smu_not_present = init_params->force_smu_not_present; -#endif dc->debug.force_ignore_link_settings = init_params->force_ignore_link_settings; - if (dc->res_pool->funcs->update_bw_bounding_box) + if (dc->res_pool->funcs->update_bw_bounding_box) { + DC_FP_START(); dc->res_pool->funcs->update_bw_bounding_box(dc, dc->clk_mgr->bw_params); + DC_FP_END(); + } +#endif /* Creation of current_state must occur after dc->dml * is initialized in dc_create_resource_pool because -- Gitee From 2036728a80fec1e8fcd073bbe932392cf84784f2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 4 Mar 2023 15:03:37 +0800 Subject: [PATCH 230/241] drm/amdgpu/display: Enable DCN for LoongArch Change-Id: I4da61742b59598ed7d2a1b1ac226096eb0ce30b0 Signed-off-by: Huacai Chen --- drivers/gpu/drm/amd/display/Kconfig | 4 ++-- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 7 +++++++ drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 8 ++++++++ drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 4 ++++ drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 4 ++++ drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 5 +++++ drivers/gpu/drm/amd/display/dc/dml/Makefile | 16 ++++++++++++++++ drivers/gpu/drm/amd/display/dc/dsc/Makefile | 5 +++++ drivers/gpu/drm/amd/display/dc/os_types.h | 4 ++++ 9 files changed, 55 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6c4cba09d23b..f044c029b190 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -7,7 +7,7 @@ config DRM_AMD_DC default y depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || LOONGARCH || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and @@ -26,7 +26,7 @@ config DRM_AMD_DC_DCN config DRM_AMD_DC_DCN3_0 bool "DCN 3.0 family" - depends on DRM_AMD_DC && X86 + depends on DRM_AMD_DC && (X86 || LOONGARCH) depends on DRM_AMD_DC_DCN help Choose this option if you want to have diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 4674aca8f206..b41b92d806b5 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -33,6 +33,10 @@ ifdef CONFIG_PPC64 calcs_ccflags := -mhard-float -maltivec endif +ifdef CONFIG_LOONGARCH +calcs_rcflags := -msoft-float +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -53,6 +57,9 @@ endif CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_ccflags) CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_ccflags) CFLAGS_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare +CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calcs.o := $(calcs_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calc_auto.o := $(calcs_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/calcs/dcn_calc_math.o := $(calcs_rcflags) BW_CALCS = dce_calcs.o bw_fixed.o custom_float.o diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 90d19fdd6607..88a13880aecb 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -104,6 +104,10 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif +ifdef CONFIG_LOONGARCH +CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -msoft-float +endif + AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) @@ -127,6 +131,10 @@ ifdef CONFIG_ARM64 CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only endif +ifdef CONFIG_LOONGARCH +CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -msoft-float +endif + AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 5fcaf78334ff..8651a70737f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -17,6 +17,10 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec endif +ifdef CONFIG_LOONGARCH +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -msoft-float +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 07684d3e375a..255a6edf14a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -13,6 +13,10 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec endif +ifdef CONFIG_LOONGARCH +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -msoft-float +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 248c2711aace..2a1a7e6ac1b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -46,6 +46,11 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only endif +ifdef CONFIG_LOONGARCH +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -msoft-float +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -msoft-float +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 417331438c30..1cf99c828358 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,6 +33,11 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif +ifdef CONFIG_LOONGARCH +dml_ccflags := -mfpu=64 +dml_rcflags := -msoft-float +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -60,13 +65,24 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_ccflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) endif ifdef CONFIG_DRM_AMD_DC_DCN3_0 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) -Wframe-larger-than=2048 CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) endif CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) DML = display_mode_lib.o display_rq_dlg_helpers.o dml1_display_rq_dlg_calc.o \ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index ea29cf95d470..02ff495d57a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -10,6 +10,10 @@ ifdef CONFIG_PPC64 dsc_ccflags := -mhard-float -maltivec endif +ifdef CONFIG_LOONGARCH +dsc_rcflags := -msoft-float +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -28,6 +32,7 @@ endif endif CFLAGS_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_ccflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dsc/rc_calc.o := $(dsc_rcflags) DSC = dc_dsc.o rc_calc.o rc_calc_dpi.o diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 126c2f3a4dd3..eb7d0021c86e 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -55,6 +55,10 @@ #include #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() +#elif defined(CONFIG_LOONGARCH) +#include +#define DC_FP_START() kernel_fpu_begin() +#define DC_FP_END() kernel_fpu_end() #elif defined(CONFIG_PPC64) #include #include -- Gitee From dc90f179bb226d85017bf3727f2d84d87749499d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 22 May 2018 16:54:04 +0800 Subject: [PATCH 231/241] drm: Add Loongson LS7A Display-Controller driver Change-Id: I3e3b9ea68628062805eec53ffe69e68dc3f78d92 Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 1 + drivers/gpio/Kconfig | 2 +- drivers/gpio/gpio-loongson.c | 6 +- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/loongson/Kconfig | 19 + drivers/gpu/drm/loongson/Makefile | 16 + drivers/gpu/drm/loongson/loongson_connector.c | 241 +++++ drivers/gpu/drm/loongson/loongson_crtc.c | 509 ++++++++++ drivers/gpu/drm/loongson/loongson_cursor.c | 203 ++++ drivers/gpu/drm/loongson/loongson_drv.c | 656 +++++++++++++ drivers/gpu/drm/loongson/loongson_drv.h | 241 +++++ drivers/gpu/drm/loongson/loongson_encoder.c | 134 +++ drivers/gpu/drm/loongson/loongson_i2c.c | 189 ++++ drivers/gpu/drm/loongson/loongson_i2c.h | 43 + drivers/gpu/drm/loongson/loongson_irq.c | 70 ++ drivers/gpu/drm/loongson/loongson_vbios.c | 892 ++++++++++++++++++ drivers/gpu/drm/loongson/loongson_vbios.h | 283 ++++++ 18 files changed, 3504 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/loongson/Kconfig create mode 100644 drivers/gpu/drm/loongson/Makefile create mode 100644 drivers/gpu/drm/loongson/loongson_connector.c create mode 100644 drivers/gpu/drm/loongson/loongson_crtc.c create mode 100644 drivers/gpu/drm/loongson/loongson_cursor.c create mode 100644 drivers/gpu/drm/loongson/loongson_drv.c create mode 100644 drivers/gpu/drm/loongson/loongson_drv.h create mode 100644 drivers/gpu/drm/loongson/loongson_encoder.c create mode 100644 drivers/gpu/drm/loongson/loongson_i2c.c create mode 100644 drivers/gpu/drm/loongson/loongson_i2c.h create mode 100644 drivers/gpu/drm/loongson/loongson_irq.c create mode 100644 drivers/gpu/drm/loongson/loongson_vbios.c create mode 100644 drivers/gpu/drm/loongson/loongson_vbios.h diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 5e3553d8e90e..17462c17f978 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -582,6 +582,7 @@ CONFIG_DRM_AMDGPU_SI=y CONFIG_DRM_AMDGPU_CIK=y CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y +CONFIG_DRM_LOONGSON=y CONFIG_FB_EFI=y CONFIG_FB_RADEON=y CONFIG_LCD_PLATFORM=m diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 39f3e1366409..9e4059836d5d 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -350,7 +350,7 @@ config GPIO_LOGICVC config GPIO_LOONGSON bool "Loongson-2/3 GPIO support" - depends on CPU_LOONGSON2EF || CPU_LOONGSON64 + depends on CPU_LOONGSON2EF || CPU_LOONGSON64 || LOONGARCH help driver for GPIO functionality on Loongson-2F/3A/3B processors. diff --git a/drivers/gpio/gpio-loongson.c b/drivers/gpio/gpio-loongson.c index a42145873cc9..c47bcd33f1eb 100644 --- a/drivers/gpio/gpio-loongson.c +++ b/drivers/gpio/gpio-loongson.c @@ -22,10 +22,10 @@ #define STLS2F_N_GPIO 4 #define STLS3A_N_GPIO 16 -#ifdef CONFIG_CPU_LOONGSON64 -#define LOONGSON_N_GPIO STLS3A_N_GPIO -#else +#ifdef CONFIG_CPU_LOONGSON2EF #define LOONGSON_N_GPIO STLS2F_N_GPIO +#else +#define LOONGSON_N_GPIO STLS3A_N_GPIO #endif /* diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 4e9b3a95fa7c..b37e6660dd4e 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -313,6 +313,8 @@ source "drivers/gpu/drm/udl/Kconfig" source "drivers/gpu/drm/ast/Kconfig" +source "drivers/gpu/drm/loongson/Kconfig" + source "drivers/gpu/drm/mgag200/Kconfig" source "drivers/gpu/drm/armada/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 81569009f884..e9dd6847c9fa 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ obj-$(CONFIG_DRM_GMA500) += gma500/ obj-$(CONFIG_DRM_UDL) += udl/ obj-$(CONFIG_DRM_AST) += ast/ +obj-$(CONFIG_DRM_LOONGSON) +=loongson/ obj-$(CONFIG_DRM_ARMADA) += armada/ obj-$(CONFIG_DRM_ATMEL_HLCDC) += atmel-hlcdc/ obj-y += rcar-du/ diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig new file mode 100644 index 000000000000..0c5f22703b6a --- /dev/null +++ b/drivers/gpu/drm/loongson/Kconfig @@ -0,0 +1,19 @@ + +config DRM_LOONGSON + tristate "DRM support for LS7A Display Controller" + depends on DRM && PCI + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER + select DRM_GEM_CMA_HELPER + select DRM_KMS_CMA_HELPER + default n + help + Support the display controllers found on the Loongson's LS7A + bridge. + + This driver provides no built-in acceleration; acceleration is + performed by Vivante GC1000. This driver provides kernel mode + setting and buffer management to userspace. diff --git a/drivers/gpu/drm/loongson/Makefile b/drivers/gpu/drm/loongson/Makefile new file mode 100644 index 000000000000..a5272137848c --- /dev/null +++ b/drivers/gpu/drm/loongson/Makefile @@ -0,0 +1,16 @@ +# +# Makefile for the drm device driver. This driver provides support for the +# Direct Rendering Infrastructure (DRI) +# + +ccflags-y := -Iinclude/drm +loongson-y := loongson_drv.o \ + loongson_i2c.o \ + loongson_irq.o \ + loongson_crtc.o \ + loongson_encoder.o \ + loongson_connector.o \ + loongson_cursor.o \ + loongson_vbios.o + +obj-$(CONFIG_DRM_LOONGSON) += loongson.o diff --git a/drivers/gpu/drm/loongson/loongson_connector.c b/drivers/gpu/drm/loongson/loongson_connector.c new file mode 100644 index 000000000000..721336709a6b --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_connector.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "loongson_drv.h" + +/** + * loongson_connector_best_encoder + * + * @connector: point to the drm_connector structure + * + * Select the best encoder for the given connector. Used by both the helpers in + * drm_atomic_helper_check_modeset() and legacy CRTC helpers + */ +static struct drm_encoder *loongson_connector_best_encoder(struct drm_connector + *connector) +{ + struct drm_encoder *encoder; + + /* There is only one encoder per connector */ + drm_connector_for_each_possible_encoder(connector, encoder) + return encoder; + + return NULL; +} + +/** + * loongson_get_modes + * + * @connetcor: central DRM connector control structure + * + * Fill in all modes currently valid for the sink into the connector->probed_modes list. + * It should also update the EDID property by calling drm_connector_update_edid_property(). + */ +static int loongson_get_modes(struct drm_connector *connector) +{ + int id, ret = 0; + enum loongson_edid_method ledid_method; + struct edid *edid = NULL; + struct loongson_connector *lconnector = to_loongson_connector(connector); + struct i2c_adapter *adapter = lconnector->i2c->adapter; + + id = drm_connector_index(connector); + + ledid_method = lconnector->edid_method; + switch (ledid_method) { + case via_i2c: + case via_encoder: + default: + edid = drm_get_edid(connector, adapter); + break; + case via_vbios: + edid = kmalloc(EDID_LENGTH * 2, GFP_KERNEL); + memcpy(edid, lconnector->vbios_edid, EDID_LENGTH * 2); + } + + if (edid) { + drm_connector_update_edid_property(connector, edid); + ret = drm_add_edid_modes(connector, edid); + kfree(edid); + } else { + ret += drm_add_modes_noedid(connector, 1920, 1080); + drm_set_preferred_mode(connector, 1024, 768); + } + + return ret; +} + +static bool is_connected(struct loongson_connector *lconnector) +{ + unsigned char start = 0x0; + struct i2c_adapter *adapter; + struct i2c_msg msgs = { + .addr = DDC_ADDR, + .len = 1, + .flags = 0, + .buf = &start, + }; + + if (!lconnector->i2c) + return false; + + adapter = lconnector->i2c->adapter; + if (i2c_transfer(adapter, &msgs, 1) < 1) { + DRM_DEBUG_KMS("display-%d not connect\n", lconnector->id); + return false; + } + + return true; +} + +/** + * loongson_connector_detect + * + * @connector: point to drm_connector + * @force: bool + * + * Check to see if anything is attached to the connector. + * The parameter force is set to false whilst polling, + * true when checking the connector due to a user request + */ +static enum drm_connector_status loongson_connector_detect(struct drm_connector + *connector, bool force) +{ + int r; + enum drm_connector_status ret = connector_status_connected; + struct loongson_connector *lconnector = to_loongson_connector(connector); + + DRM_DEBUG("loongson_connector_detect connector_id=%d, ledid_method=%d\n", + drm_connector_index(connector), lconnector->edid_method); + + if (lconnector->edid_method != via_vbios) { + r = pm_runtime_get_sync(connector->dev->dev); + if (r < 0) + return connector_status_disconnected; + + if (is_connected(lconnector)) + ret = connector_status_connected; + else + ret = connector_status_disconnected; + + pm_runtime_mark_last_busy(connector->dev->dev); + pm_runtime_put_autosuspend(connector->dev->dev); + } + + return ret; +} + +/** + * These provide the minimum set of functions required to handle a connector + * + * Helper operations for connectors.These functions are used + * by the atomic and legacy modeset helpers and by the probe helpers. + */ +static const struct drm_connector_helper_funcs loongson_connector_helper_funcs = { + .get_modes = loongson_get_modes, + .best_encoder = loongson_connector_best_encoder, +}; + +/** + * These provide the minimum set of functions required to handle a connector + * + * Control connectors on a given device. + * The functions below allow the core DRM code to control connectors, + * enumerate available modes and so on. + */ +static const struct drm_connector_funcs loongson_connector_funcs = { + .dpms = drm_helper_connector_dpms, + .detect = loongson_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const unsigned short normal_i2c[] = { 0x50, I2C_CLIENT_END }; + + +/** + * loongson_connector_init + * + * @dev: drm device + * @connector_id: + * + * Vga is the interface between host and monitor + * This function is to init vga + */ +struct drm_connector *loongson_connector_init(struct drm_device *dev, unsigned int index) +{ + struct i2c_adapter *adapter; + struct i2c_client *ddc_client; + struct drm_connector *connector; + struct loongson_encoder *loongson_encoder; + struct loongson_connector *loongson_connector; + struct loongson_drm_device *ldev = (struct loongson_drm_device*)dev->dev_private; + + const struct i2c_board_info ddc_info = { + .type = "ddc-dev", + .addr = DDC_ADDR, + .flags = I2C_CLASS_DDC, + }; + + loongson_encoder = ldev->mode_info[index].encoder; + adapter = loongson_encoder->i2c->adapter; + ddc_client = i2c_new_client_device(adapter, &ddc_info); + if (IS_ERR(ddc_client)) { + i2c_del_adapter(adapter); + DRM_ERROR("Failed to create standard ddc client\n"); + return NULL; + } + + loongson_connector = kzalloc(sizeof(struct loongson_connector), GFP_KERNEL); + if (!loongson_connector) + return NULL; + + ldev->connector_active0 = 0; + ldev->connector_active1 = 0; + loongson_connector->id = index; + loongson_connector->ldev = ldev; + loongson_connector->type = get_connector_type(ldev, index); + loongson_connector->i2c_id = get_connector_i2cid(ldev, index); + loongson_connector->hotplug = get_hotplug_mode(ldev, index); + loongson_connector->edid_method = get_edid_method(ldev, index); + if (loongson_connector->edid_method == via_vbios) + loongson_connector->vbios_edid = get_vbios_edid(ldev, index); + + loongson_connector->i2c = &ldev->i2c_bus[index]; + if (!loongson_connector->i2c) + DRM_INFO("connector-%d match i2c-%d err\n", index, + loongson_connector->i2c_id); + + connector = &loongson_connector->base; + + drm_connector_helper_add(connector, &loongson_connector_helper_funcs); + + drm_connector_init(dev, connector, + &loongson_connector_funcs, loongson_connector->type); + + drm_connector_register(connector); + + return connector; +} diff --git a/drivers/gpu/drm/loongson/loongson_crtc.c b/drivers/gpu/drm/loongson/loongson_crtc.c new file mode 100644 index 000000000000..a9972a9a1a68 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_crtc.c @@ -0,0 +1,509 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * Jiaxun Yang + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "loongson_drv.h" + +/** + * This file contains setup code for the CRTC + */ + +DEFINE_SPINLOCK(loongson_crtc_lock); + +static int loongson_crtc_enable_vblank(struct drm_crtc *crtc) +{ + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = lcrtc->ldev; + + if(lcrtc->crtc_id == 0) { + ldev->int_reg |= (BIT(INT_DVO0_FB_END) << 16); + } else { + ldev->int_reg |= (BIT(INT_DVO1_FB_END) << 16); + } + + spin_lock(&loongson_reglock); + writel(ldev->int_reg, ldev->mmio + FB_INT_REG); + spin_unlock(&loongson_reglock); + + return 0; +} + +static void loongson_crtc_disable_vblank(struct drm_crtc *crtc) +{ + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = lcrtc->ldev; + + + if(lcrtc->crtc_id == 0) { + ldev->int_reg &= (~BIT(INT_DVO0_FB_END) << 16); + } else { + ldev->int_reg &= (~BIT(INT_DVO1_FB_END) << 16); + } + + spin_lock(&loongson_reglock); + writel(ldev->int_reg, ldev->mmio + FB_INT_REG); + spin_unlock(&loongson_reglock); +} + +#define PLL_REF_CLK_MHZ 100 +#define PCLK_PRECISION_INDICATOR 10000 + +/** + * cal_freq + * + * @pixclock: unsigned int + * @pll_config: point to the pix_pll structure + * + * Calculate frequency + */ +static unsigned int cal_freq(unsigned int pixclock, struct pix_pll *pll_config) +{ + int i, j, loopc_offset; + unsigned int refc_set[] = {4, 5, 3}; + unsigned int prec_set[] = {1, 5, 10, 50, 100}; /*in 1/PCLK_PRECISION_INDICATOR*/ + unsigned int pstdiv, loopc, refc; + unsigned int precision_req, precision; + unsigned int loopc_min, loopc_max, loopc_mid; + unsigned long long real_dvo, req_dvo; + + /*try precision from high to low*/ + for (j = 0; j < sizeof(prec_set)/sizeof(int); j++){ + precision_req = prec_set[j]; + + /*try each refc*/ + for (i = 0; i < sizeof(refc_set)/sizeof(int); i++) { + refc = refc_set[i]; + loopc_min = (1200 / PLL_REF_CLK_MHZ) * refc; /*1200 / (PLL_REF_CLK_MHZ / refc)*/ + loopc_max = (3200 / PLL_REF_CLK_MHZ) * refc; /*3200 / (PLL_REF_CLK_MHZ / refc)*/ + loopc_mid = (2200 / PLL_REF_CLK_MHZ) * refc; /*(loopc_min + loopc_max) / 2;*/ + loopc_offset = -1; + + /*try each loopc*/ + for (loopc = loopc_mid; (loopc <= loopc_max) && (loopc >= loopc_min); loopc += loopc_offset) { + if(loopc_offset < 0) + loopc_offset = -loopc_offset; + else + loopc_offset = -(loopc_offset+1); + + pstdiv = loopc * PLL_REF_CLK_MHZ * 1000 / refc / pixclock; + if((pstdiv > 127) || (pstdiv < 1)) + continue; + + /*real_freq is float type which is not available, but read_freq * pstdiv is available.*/ + req_dvo = (pixclock * pstdiv); + real_dvo = (loopc * PLL_REF_CLK_MHZ * 1000 / refc); + precision = abs(real_dvo * PCLK_PRECISION_INDICATOR / req_dvo - PCLK_PRECISION_INDICATOR); + + if(precision < precision_req){ + pll_config->l2_div = pstdiv; + pll_config->l1_loopc = loopc; + pll_config->l1_frefc = refc; + if(j > 1) + printk("Warning: PIX clock precision degraded to %d / %d\n", precision_req, PCLK_PRECISION_INDICATOR); + return 1; + } + } + } + } + return 0; +} + +/** + * config_pll + * + * @pll_base: represent a long type + * @pll_cfg: point to the pix_pll srtucture + * + * Config pll apply to ls7a + */ +static void config_pll(void *pll_base, struct pix_pll *pll_cfg) +{ + unsigned long val; + + /* clear sel_pll_out0 */ + val = readl(pll_base + 0x4); + val &= ~(1UL << 8); + writel(val, pll_base + 0x4); + /* set pll_pd */ + val = readl(pll_base + 0x4); + val |= (1UL << 13); + writel(val, pll_base + 0x4); + /* clear set_pll_param */ + val = readl(pll_base + 0x4); + val &= ~(1UL << 11); + writel(val, pll_base + 0x4); + /* clear old value & config new value */ + val = readl(pll_base + 0x4); + val &= ~(0x7fUL << 0); + val |= (pll_cfg->l1_frefc << 0); /* refc */ + writel(val, pll_base + 0x4); + val = readl(pll_base + 0x0); + val &= ~(0x7fUL << 0); + val |= (pll_cfg->l2_div << 0); /* div */ + val &= ~(0x1ffUL << 21); + val |= (pll_cfg->l1_loopc << 21);/* loopc */ + writel(val, pll_base + 0x0); + /* set set_pll_param */ + val = readl(pll_base + 0x4); + val |= (1UL << 11); + writel(val, pll_base + 0x4); + /* clear pll_pd */ + val = readl(pll_base + 0x4); + val &= ~(1UL << 13); + writel(val, pll_base + 0x4); + /* wait pll lock */ + while(!(readl(pll_base + 0x4) & 0x80)) + cpu_relax(); + /* set sel_pll_out0 */ + val = readl(pll_base + 0x4); + val |= (1UL << 8); + writel(val, pll_base + 0x4); +} + +static void loongson_config_pll(int id, unsigned int pix_freq) +{ + unsigned int out; + struct pix_pll pll_cfg; + + out = cal_freq(pix_freq, &pll_cfg); + if (id == 0) + config_pll(LS7A_PIX0_PLL, &pll_cfg); + else + config_pll(LS7A_PIX1_PLL, &pll_cfg); +} + +/** + * These provide the minimum set of functions required to handle a CRTC + * Each driver is responsible for filling out this structure at startup time + * + * The drm_crtc_funcs structure is the central CRTC management structure + * in the DRM. Each CRTC controls one or more connectors + */ +static const struct drm_crtc_funcs loongson_swcursor_crtc_funcs = { + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = loongson_crtc_enable_vblank, + .disable_vblank = loongson_crtc_disable_vblank, +}; + +static const struct drm_crtc_funcs loongson_hwcursor_crtc_funcs = { + .cursor_set2 = loongson_crtc_cursor_set2, + .cursor_move = loongson_crtc_cursor_move, + .destroy = drm_crtc_cleanup, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = loongson_crtc_enable_vblank, + .disable_vblank = loongson_crtc_disable_vblank, +}; + +static const uint32_t loongson_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +}; + +static const uint64_t loongson_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + +static enum drm_mode_status loongson_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + int id = crtc->index; + struct drm_device *dev = crtc->dev; + struct loongson_drm_device *ldev = (struct loongson_drm_device*)dev->dev_private; + + if (mode->hdisplay > get_crtc_max_width(ldev, id)) + return MODE_BAD; + if (mode->vdisplay > get_crtc_max_height(ldev, id)) + return MODE_BAD; + if (ldev->num_crtc == 1) { + if (mode->hdisplay % 16) + return MODE_BAD; + } else { + if (mode->hdisplay % 64) + return MODE_BAD; + } + + return MODE_OK; +} + +u32 crtc_read(struct loongson_crtc *lcrtc, u32 offset) +{ + struct loongson_drm_device *ldev = lcrtc->ldev; + return readl(ldev->mmio + offset + (lcrtc->crtc_id * CRTC_REG_OFFSET)); +} + +void crtc_write(struct loongson_crtc *lcrtc, u32 offset, u32 val) +{ + struct loongson_drm_device *ldev = lcrtc->ldev; + writel(val, ldev->mmio + offset + (lcrtc->crtc_id * CRTC_REG_OFFSET)); +} + +static void loongson_crtc_mode_set_nofb(struct drm_crtc *crtc) +{ + unsigned int hr, hss, hse, hfl; + unsigned int vr, vss, vse, vfl; + unsigned int pix_freq; + unsigned long flags; + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + + hr = mode->hdisplay; + hss = mode->hsync_start; + hse = mode->hsync_end; + hfl = mode->htotal; + + vr = mode->vdisplay; + vss = mode->vsync_start; + vse = mode->vsync_end; + vfl = mode->vtotal; + + pix_freq = mode->clock; + + DRM_DEBUG("crtc_id = %d, hr = %d, hss = %d, hse = %d, hfl = %d, vr = %d, vss = %d, vse = %d, vfl = %d, pix_freq = %d,\n", + lcrtc->crtc_id, hr, hss, hse, hfl, vr, vss, vse, vfl, pix_freq); + + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_DITCFG_DVO_REG, 0); + crtc_write(lcrtc, FB_DITTAB_LO_DVO_REG, 0); + crtc_write(lcrtc, FB_DITTAB_HI_DVO_REG, 0); + crtc_write(lcrtc, FB_PANCFG_DVO_REG, 0x80001311); + crtc_write(lcrtc, FB_PANTIM_DVO_REG, 0); + + crtc_write(lcrtc, FB_HDISPLAY_DVO_REG, (mode->crtc_htotal << 16) | mode->crtc_hdisplay); + crtc_write(lcrtc, FB_HSYNC_DVO_REG, 0x40000000 | (mode->crtc_hsync_end << 16) | mode->crtc_hsync_start); + + crtc_write(lcrtc, FB_VDISPLAY_DVO_REG, (mode->crtc_vtotal << 16) | mode->crtc_vdisplay); + crtc_write(lcrtc, FB_VSYNC_DVO_REG, 0x40000000 | (mode->crtc_vsync_end << 16) | mode->crtc_vsync_start); + + crtc_write(lcrtc, FB_STRI_DVO_REG, (crtc->primary->state->fb->pitches[0] + 255) & ~255); + + DRM_DEBUG("Stride: %x\n",(crtc->primary->state->fb->pitches[0] + 255) & ~255); + + switch (crtc->primary->state->fb->format->format) { + case DRM_FORMAT_RGB565: + lcrtc->cfg_reg |= 0x3; + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + break; + case DRM_FORMAT_RGB888: + default: + lcrtc->cfg_reg |= 0x4; + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + break; + } + spin_unlock_irqrestore(&loongson_reglock, flags); + + loongson_config_pll(lcrtc->crtc_id, mode->clock); +} + +static void loongson_crtc_atomic_enable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) +{ + unsigned long flags; + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + + if (lcrtc->cfg_reg & CFG_ENABLE) + goto vblank_on; + + lcrtc->cfg_reg |= CFG_ENABLE; + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + spin_unlock_irqrestore(&loongson_reglock, flags); + +vblank_on: + drm_crtc_vblank_on(crtc); +} + +static void loongson_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_state) +{ + unsigned long flags; + struct loongson_crtc *lcrtc = to_loongson_crtc(crtc); + + lcrtc->cfg_reg &= ~CFG_ENABLE; + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg); + spin_unlock_irqrestore(&loongson_reglock, flags); + + spin_lock_irq(&crtc->dev->event_lock); + if (crtc->state->event) { + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + } + spin_unlock_irq(&crtc->dev->event_lock); + + drm_crtc_vblank_off(crtc); +} + +static void loongson_crtc_atomic_flush(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) +{ + struct drm_pending_vblank_event *event = crtc->state->event; + + if (event) { + crtc->state->event = NULL; + + spin_lock_irq(&crtc->dev->event_lock); + if (drm_crtc_vblank_get(crtc) == 0) + drm_crtc_arm_vblank_event(crtc, event); + else + drm_crtc_send_vblank_event(crtc, event); + spin_unlock_irq(&crtc->dev->event_lock); + } +} + +static void loongson_plane_atomic_update(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + int id, clonemode; + unsigned int pitch; + unsigned long flags; + struct loongson_crtc *lcrtc; + struct loongson_drm_device *ldev; + struct drm_plane_state *state = plane->state; + + if (!state->crtc || !state->fb) + return; + + pitch = state->fb->pitches[0]; + lcrtc = to_loongson_crtc(state->crtc); + ldev = lcrtc->ldev; + id = lcrtc->crtc_id; + clonemode = clone_mode(ldev); + + /* CRTC1 cloned from CRTC0 in clone mode */ + if (clonemode) + ldev->lcrtc[1].cfg_reg |= CFG_PANELSWITCH; + else + ldev->lcrtc[1].cfg_reg &= ~CFG_PANELSWITCH; + + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc, FB_STRI_DVO_REG, (pitch + 255) & ~255); + if (crtc_read(lcrtc, FB_CFG_DVO_REG) & CFG_FBNUM) + crtc_write(lcrtc, FB_ADDR0_DVO_REG, drm_fb_cma_get_gem_addr(state->fb, state, 0)); + else + crtc_write(lcrtc, FB_ADDR1_DVO_REG, drm_fb_cma_get_gem_addr(state->fb, state, 0)); + + lcrtc->cfg_reg |= CFG_ENABLE; + crtc_write(lcrtc, FB_CFG_DVO_REG, lcrtc->cfg_reg | CFG_FBSWITCH); + if (clonemode) { + if (id) + crtc_write(&ldev->lcrtc[0], FB_CFG_DVO_REG, ldev->lcrtc[0].cfg_reg | CFG_ENABLE); + else + crtc_write(&ldev->lcrtc[1], FB_CFG_DVO_REG, ldev->lcrtc[1].cfg_reg | CFG_ENABLE); + } + spin_unlock_irqrestore(&loongson_reglock, flags); + + udelay(2500); +} + +/** + * These provide the minimum set of functions required to handle a CRTC + * + * The drm_crtc_helper_funcs is a helper operations for CRTC + */ +static const struct drm_crtc_helper_funcs loongson_crtc_helper_funcs = { + .mode_valid = loongson_crtc_mode_valid, + .mode_set_nofb = loongson_crtc_mode_set_nofb, + .atomic_enable = loongson_crtc_atomic_enable, + .atomic_disable = loongson_crtc_atomic_disable, + .atomic_flush = loongson_crtc_atomic_flush, +}; + +static void loongson_plane_destroy(struct drm_plane *plane) +{ + drm_plane_cleanup(plane); +} + +static bool loongson_format_mod_supported(struct drm_plane *plane, + uint32_t format, uint64_t modifier) +{ + return (modifier == DRM_FORMAT_MOD_LINEAR); +} + +static const struct drm_plane_funcs loongson_plane_funcs = { + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, + .destroy = loongson_plane_destroy, + .disable_plane = drm_atomic_helper_disable_plane, + .reset = drm_atomic_helper_plane_reset, + .update_plane = drm_atomic_helper_update_plane, + .format_mod_supported = loongson_format_mod_supported, +}; + +static const struct drm_plane_helper_funcs loongson_plane_helper_funcs = { + .atomic_update = loongson_plane_atomic_update, +}; + +/** + * loongosn_crtc_init + * + * @ldev: point to the loongson_drm_device structure + * + * Init CRTC + */ +int loongson_crtc_init(struct loongson_drm_device *ldev) +{ + int i, ret; + extern bool hw_cursor; + + for(i=0;inum_crtc;i++){ + ldev->lcrtc[i].ldev = ldev; + ldev->lcrtc[i].crtc_id = i; + + ldev->lcrtc[i].cfg_reg = CFG_RESET; + ldev->lcrtc[i].primary = devm_kzalloc(ldev->dev->dev, sizeof(*ldev->lcrtc[i].primary), GFP_KERNEL); + if (!ldev->lcrtc[i].primary) + return -ENOMEM; + + ret = drm_universal_plane_init(ldev->dev, ldev->lcrtc[i].primary, BIT(i), &loongson_plane_funcs, + loongson_formats, ARRAY_SIZE(loongson_formats), + loongson_format_modifiers, DRM_PLANE_TYPE_PRIMARY, NULL); + if (ret) + return ret; + + drm_plane_helper_add(ldev->lcrtc[i].primary, &loongson_plane_helper_funcs); + + if (hw_cursor) + ret = drm_crtc_init_with_planes(ldev->dev, &ldev->lcrtc[i].base,ldev->lcrtc[i].primary, + NULL, &loongson_hwcursor_crtc_funcs, NULL); + else + ret = drm_crtc_init_with_planes(ldev->dev, &ldev->lcrtc[i].base,ldev->lcrtc[i].primary, + NULL, &loongson_swcursor_crtc_funcs, NULL); + if (ret) { + loongson_plane_destroy(ldev->lcrtc[i].primary); + return ret; + } + drm_crtc_helper_add(&ldev->lcrtc[i].base, &loongson_crtc_helper_funcs); + } + + return 0; +} diff --git a/drivers/gpu/drm/loongson/loongson_cursor.c b/drivers/gpu/drm/loongson/loongson_cursor.c new file mode 100644 index 000000000000..2f792539168a --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_cursor.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include "loongson_drv.h" + +static void flush_scache_range(void *addr, unsigned long size) +{ + int inc; + unsigned long start, end; + + start = (unsigned long)addr; + end = (unsigned long)addr + size; + inc = cpu_last_level_cache_line_size(); + + for (; start < end; start += inc) + flush_cache_line(Cache_LEAF3, start); +} + +/* + Hide the cursor off screen. We can't disable the cursor hardware because it + takes too long to re-activate and causes momentary corruption +*/ +static void loongson_hide_cursor(struct drm_crtc *crtc) +{ + unsigned long flags; + volatile void __iomem *base; + struct drm_device *dev = crtc->dev; + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + unsigned int tmp, crtc_id = loongson_crtc->crtc_id; + + base = ldev->mmio; + tmp = readl(base + FB_CUR_CFG_REG); + tmp &= ~0xff; + if (clone_mode(ldev)) { + spin_lock_irqsave(&loongson_reglock, flags); + writel(tmp | 0x00, base + FB_CUR_CFG_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + ldev->cursor_showed = false; + } else { + if (ldev->cursor_crtc_id != crtc_id) + return; + + spin_lock_irqsave(&loongson_reglock, flags); + if (crtc_id) { + writel(tmp | 0x10, base + FB_CUR_CFG_REG); + } else { + writel(tmp | 0x00, base + FB_CUR_CFG_REG); + } + spin_unlock_irqrestore(&loongson_reglock, flags); + ldev->cursor_showed = false; + } +} + +static void loongson_show_cursor(struct drm_crtc *crtc) +{ + unsigned long flags; + volatile void __iomem *base; + struct drm_device *dev = crtc->dev; + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + unsigned int crtc_id = loongson_crtc->crtc_id; + + base = ldev->mmio; + if (clone_mode(ldev)) { + spin_lock_irqsave(&loongson_reglock, flags); + writel(0x00050202, base + FB_CUR_CFG_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + ldev->cursor_crtc_id = 0; + ldev->cursor_showed = true; + } else { + if (ldev->cursor_crtc_id == crtc_id) { + spin_lock_irqsave(&loongson_reglock, flags); + if(crtc_id == 0){ + writel(0x00050202, base + FB_CUR_CFG_REG); + }else{ + writel(0x00050212, base + FB_CUR_CFG_REG); + } + spin_unlock_irqrestore(&loongson_reglock, flags); + + ldev->cursor_showed = true; + ldev->cursor_crtc_id = crtc_id; + } + } +} + +int loongson_crtc_cursor_set2(struct drm_crtc *crtc, + struct drm_file *file_priv, + uint32_t handle, + uint32_t width, + uint32_t height, + int32_t hot_x, int32_t hot_y) +{ + u32 gpu_addr; + unsigned long flags; + unsigned int crtc_id; + volatile void __iomem *base; + struct drm_gem_object *obj; + struct drm_device *dev = crtc->dev; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = (struct loongson_drm_device *)dev->dev_private; + struct drm_gem_cma_object *cma, *cursor = ldev->cursor; + + base = ldev->mmio; + crtc_id = loongson_crtc->crtc_id; + + if ((width != 32 || height != 32) && handle) { + return -EINVAL; + } + + if (!handle || !file_priv) { + loongson_hide_cursor(crtc); + return 0; + } + + obj = drm_gem_object_lookup(file_priv, handle); + if (!obj) + return -ENOENT; + + cma = to_drm_gem_cma_obj(obj); + + flush_scache_range(cma->vaddr, 32*32*4); + memcpy(cursor->vaddr, cma->vaddr, 32*32*4); + + /* Program gpu address of cursor buffer */ + gpu_addr = ldev->cursor->paddr; + spin_lock_irqsave(&loongson_reglock, flags); + writel(gpu_addr, base + FB_CUR_ADDR_REG); + writel(0x00eeeeee, base + FB_CUR_BACK_REG); + writel(0x00aaaaaa, base + FB_CUR_FORE_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + + loongson_show_cursor(crtc); + + drm_gem_object_put(obj); + + return 0; +} + +int loongson_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) +{ + unsigned long flags; + unsigned int tmp, crtc_id; + int xorign = 0, yorign = 0; + volatile void __iomem *base; + struct loongson_crtc *loongson_crtc = to_loongson_crtc(crtc); + struct loongson_drm_device *ldev = (struct loongson_drm_device *)crtc->dev->dev_private; + + base = ldev->mmio; + crtc_id = loongson_crtc->crtc_id; + + /* upper edge condition */ + yorign = y + crtc->y; + if (yorign < 0) + y = 0; + + /* left edge conditon */ + xorign = x + crtc->x; + if (xorign < 0) + x = 0; + + /* move from one crtc to another, check which crtc should he shown + * the x or y < 0, it means the cursor it out of current review, + * && xorign/ yorign > 0, it means the cursor is in the framebuffer + * but not in curren review */ + if ((x < 0 && xorign > 0) || (y < 0 && yorign > 0)) { + if(ldev->cursor_crtc_id == crtc_id && !clone_mode(ldev)) + /*the cursor is not show, so hide if the (x,y) is in active crtc*/ + loongson_hide_cursor(crtc); + return 0; + } + + if (x < 0) + x = 0; + if (y < 0) + y = 0; + + tmp = x & 0xffff; + tmp |= y << 16; + spin_lock_irqsave(&loongson_reglock, flags); + writel(tmp, base + FB_CUR_LOC_ADDR_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); + if (ldev->cursor_crtc_id != crtc_id && !clone_mode(ldev)) { + ldev->cursor_crtc_id = crtc_id; + ldev->cursor_showed = false; + } + if (!ldev->cursor_showed) + loongson_show_cursor(crtc); + + return 0; +} diff --git a/drivers/gpu/drm/loongson/loongson_drv.c b/drivers/gpu/drm/loongson/loongson_drv.c new file mode 100644 index 000000000000..b705045d50e7 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_drv.c @@ -0,0 +1,656 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "loongson_drv.h" + +#define DEVICE_NAME "loongson-drm" +#define DRIVER_NAME "loongson-drm" +#define DRIVER_DESC "Loongson DRM Driver" +#define DRIVER_DATE "20201201" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 1 + +bool hw_cursor = false; +module_param_named(cursor, hw_cursor, bool, 0600); + +bool poll_connector = false; +module_param_named(poll, poll_connector, bool, 0600); + +DEFINE_SPINLOCK(loongson_reglock); + +/** + * loongson_mode_funcs---basic driver provided mode setting functions + * + * Some global (i.e. not per-CRTC, connector, etc) mode setting functions that + * involve drivers. + */ +static const struct drm_mode_config_funcs loongson_mode_funcs = { + .fb_create = drm_gem_fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, + .output_poll_changed = drm_fb_helper_output_poll_changed +}; + +/** + * loongson_drm_device_init ----init drm device + * + * @dev pointer to drm_device structure + * @flags start up flag + * + * RETURN + * drm device init result + */ +static int loongson_drm_device_init(struct drm_device *dev, uint32_t flags) +{ + int ret = 0; + struct loongson_drm_device *ldev = dev->dev_private; + + ldev->num_crtc = 2; + loongson_vbios_init(ldev); + + /*BAR 0 contains registers */ + ldev->mmio_base = pci_resource_start(ldev->dev->pdev, 0); + ldev->mmio_size = pci_resource_len(ldev->dev->pdev, 0); + + ldev->mmio = pcim_iomap(dev->pdev, 0, 0); + if (ldev->mmio == NULL) + return -ENOMEM; + + DRM_INFO("ldev->mmio_base = 0x%llx, ldev->mmio_size = 0x%llx\n", + ldev->mmio_base, ldev->mmio_size); + + if (!devm_request_mem_region(ldev->dev->dev, ldev->mmio_base, ldev->mmio_size, + "loongson_drmfb_mmio")) { + DRM_ERROR("Can't reserve mmio registers\n"); + return -ENOMEM; + } + + ret = loongson_gpio_init(ldev); + if (ret < 0) + DRM_ERROR("Failed to initialize dc gpios\n"); + + return ret; +} + +/** + * loongson_modeset_init --- init kernel mode setting + * + * @ldev: pointer to loongson_drm_device structure + * + * RETURN + * return init result + */ +int loongson_modeset_init(struct loongson_drm_device *ldev) +{ + int i, ret; + struct drm_encoder *encoder; + struct drm_connector *connector; + + ldev->mode_info[0].mode_config_initialized = true; + ldev->mode_info[1].mode_config_initialized = true; + + ldev->dev->mode_config.max_width = LOONGSON_MAX_FB_WIDTH; + ldev->dev->mode_config.max_height = LOONGSON_MAX_FB_HEIGHT; + + ldev->dev->mode_config.cursor_width = 32; + ldev->dev->mode_config.cursor_height = 32; + + ldev->dev->mode_config.allow_fb_modifiers = true; + + ret = loongson_i2c_init(ldev); + if (ret < 0) { + DRM_ERROR("Failed to initialize i2c\n"); + return ret; + } + + loongson_crtc_init(ldev); + + for (i=0; inum_crtc; i++) { + DRM_DEBUG("loongson drm encoder init\n"); + ldev->mode_info[i].crtc = &ldev->lcrtc[i]; + encoder = loongson_encoder_init(ldev->dev, i); + if (!encoder) { + DRM_ERROR("loongson_encoder_init failed\n"); + return -1; + } + ldev->mode_info[i].encoder = to_loongson_encoder(encoder); + + DRM_DEBUG("loongson drm connector init\n"); + connector = loongson_connector_init(ldev->dev, i); + if (!connector) { + DRM_ERROR("loongson_connector_init failed\n"); + return -1; + } + ldev->mode_info[i].connector = to_loongson_connector(connector); + + drm_connector_attach_encoder(connector, encoder); + if (poll_connector) + connector->polled = DRM_CONNECTOR_POLL_CONNECT | DRM_CONNECTOR_POLL_DISCONNECT; + } + + return 0; +} + +/** + * loongson_modeset_fini --- deinit kernel mode setting + * + * @ldev: pointer to loongson_drm_device structure + * + * RETURN + */ +void loongson_modeset_fini(struct loongson_drm_device *ldev) +{ +} + +static int loongson_detect_chip(struct loongson_drm_device *ldev) +{ + struct pci_dev *pdev; + + pdev = pci_get_device(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, NULL); + if (pdev) { + ldev->chip = dc_7a1000; + ldev->gpu_pdev = pci_get_device(PCI_VENDOR_ID_LOONGSON, 0x7a15, NULL); + DRM_INFO("Set LS7A1000 DC device\n"); + return 0; + } + + pdev = pci_get_device(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, NULL); + if (pdev) { + ldev->chip = dc_7a2000; + ldev->gpu_pdev = pci_get_device(PCI_VENDOR_ID_LOONGSON, 0x7a25, NULL); + DRM_INFO("Set LS7A2000 DC device\n"); + return 0; + } + + return -1; +} + +/** + * loongson_vga_load - setup chip and create an initial config + * @dev: DRM device + * @flags: startup flags + * + * The driver load routine has to do several things: + * - initialize the memory manager + * - allocate initial config memory + * - setup the DRM framebuffer with the allocated memory + */ +static int loongson_drm_load(struct drm_device *dev, unsigned long flags) +{ + int r, ret, irq; + struct loongson_drm_device *ldev; + + dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)); + + ldev = devm_kzalloc(dev->dev, sizeof(struct loongson_drm_device), GFP_KERNEL); + if (ldev == NULL) + return -ENOMEM; + dev->dev_private = (void *)ldev; + ldev->dev = dev; + + ret = loongson_detect_chip(ldev); + if (ret) + dev_err(dev->dev, "Fatal error during detect chip: %d\n", ret); + + ret = loongson_drm_device_init(dev, flags); + DRM_DEBUG("end loongson drm device init.\n"); + + drm_mode_config_init(dev); + dev->mode_config.funcs = (void *)&loongson_mode_funcs; + dev->mode_config.preferred_depth = 24; + dev->mode_config.prefer_shadow = 1; + + irq = dev->pdev->irq; + dev_set_drvdata(dev->dev, dev); + pci_set_drvdata(dev->pdev, dev); + + r = loongson_modeset_init(ldev); + if (r) + dev_err(dev->dev, "Fatal error during modeset init: %d\n", r); + + r = drm_irq_install(dev, irq); + if (r) + dev_err(dev->dev, "Fatal error during irq install: %d\n", r); + + ldev->inited = true; + drm_mode_config_reset(dev); + + r = drm_vblank_init(dev, ldev->num_crtc); + if (r) + dev_err(dev->dev, "Fatal error during vblank init: %d\n", r); + + /* Make small buffers to store a hardware cursor (double buffered icon updates) */ + ldev->cursor = drm_gem_cma_create(dev, roundup(32*32*4, PAGE_SIZE)); + + drm_kms_helper_poll_init(dev); + + drm_fb_helper_remove_conflicting_framebuffers(NULL, "loongson-drmfb", false); + + return 0; +} + +/** + * loongson_drm_unload--release drm resource + * + * @dev: pointer to drm_device + * + */ +static void loongson_drm_unload(struct drm_device *dev) +{ + struct loongson_drm_device *ldev = dev->dev_private; + + if (ldev == NULL) + return; + + loongson_modeset_fini(ldev); + drm_mode_config_cleanup(dev); + dev->dev_private = NULL; + dev_set_drvdata(dev->dev, NULL); + ldev->inited = false; + + return; +} + +/** + * loongson_drm_open -Driver callback when a new struct drm_file is opened. + * Useful for setting up driver-private data structures like buffer allocators, + * execution contexts or similar things. + * + * @dev DRM device + * @file DRM file private date + * + * RETURN + * 0 on success, a negative error code on failure, which will be promoted to + * userspace as the result of the open() system call. + */ +static int loongson_drm_open(struct drm_device *dev, struct drm_file *file) +{ + file->driver_priv = NULL; + + DRM_DEBUG("open: dev=%p, file=%p", dev, file); + + return 0; +} + +DEFINE_DRM_GEM_CMA_FOPS(fops); + +/** + * loongson_drm_driver - DRM device structure + * + * .load: driver callback to complete initialization steps after the driver is registered + * .unload:Reverse the effects of the driver load callback + * .open:Driver callback when a new struct drm_file is opened + * .fops:File operations for the DRM device node. + * .gem_free_object:deconstructor for drm_gem_objects + * .dumb_create:This creates a new dumb buffer in the driver’s backing storage manager + * (GEM, TTM or something else entirely) and returns the resulting buffer handle. + * This handle can then be wrapped up into a framebuffer modeset object + * .dumb_map_offset:Allocate an offset in the drm device node’s address space + * to be able to memory map a dumb buffer + * .dump_destory:This destroys the userspace handle for the given dumb backing storage buffer + */ +static struct drm_driver loongson_drm_driver = { + .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_ATOMIC, + .open = loongson_drm_open, + .fops = &fops, + + .dumb_create = drm_gem_cma_dumb_create, + .gem_free_object_unlocked = drm_gem_cma_free_object, + .gem_vm_ops = &drm_gem_cma_vm_ops, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + + .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, + .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_vmap = drm_gem_cma_prime_vmap, + .gem_prime_vunmap = drm_gem_cma_prime_vunmap, + .gem_prime_mmap = drm_gem_cma_prime_mmap, + + .irq_handler = loongson_irq_handler, + .irq_preinstall = loongson_irq_preinstall, + .irq_postinstall = loongson_irq_postinstall, + .irq_uninstall = loongson_irq_uninstall, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +/** + * loongson_drm_pci_devices -- pci device id info + * + * __u32 vendor, device Vendor and device ID or PCI_ANY_ID + * __u32 subvendor, subdevice Subsystem ID's or PCI_ANY_ID + * __u32 class, class_mask (class,subclass,prog-if) triplet + * kernel_ulong_t driver_data Data private to the driver + */ +static struct pci_device_id loongson_drm_pci_devices[] = { + {PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1)}, + {PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2)}, + {0, 0, 0, 0, 0, 0, 0} +}; + +MODULE_DEVICE_TABLE(pci, loongson_drm_pci_devices); + +/** + * loongson_drm_pci_register -- add pci device + * + * @pdev PCI device + * @ent pci device id + */ +static int loongson_drm_pci_register(struct pci_dev *pdev, + const struct pci_device_id *ent) + +{ + int ret; + struct drm_device *dev; + + dev = drm_dev_alloc(&loongson_drm_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; + + loongson_drm_load(dev, 0x0); + + ret = drm_dev_register(dev, 0); + if (ret) + goto err_pdev; + + drm_fbdev_generic_setup(dev, 32); + + return 0; + +err_pdev: + pci_disable_device(pdev); +err_free: + drm_dev_put(dev); + return ret; +} + +/** + * loongson_drm_pci_unregister -- release drm device + * + * @pdev PCI device + */ +static void loongson_drm_pci_unregister(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + loongson_drm_unload(dev); + drm_dev_put(dev); +} + +/* + * Suspend & resume. + */ +/* + * loongson_drm_suspend - initiate device suspend + * + * @pdev: drm dev pointer + * @state: suspend state + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int loongson_drm_suspend(struct drm_device *dev, bool suspend) +{ + struct loongson_drm_device *ldev; + + if (dev == NULL || dev->dev_private == NULL) + return -ENODEV; + + ldev = dev->dev_private; + + drm_kms_helper_poll_disable(dev); + ldev->state = drm_atomic_helper_suspend(dev); + + pci_save_state(dev->pdev); + if (suspend) { + /* Shut down the device */ + pci_disable_device(dev->pdev); + pci_set_power_state(dev->pdev, PCI_D3hot); + } + + console_lock(); + drm_fb_helper_set_suspend(ldev->dev->fb_helper, 1); + console_unlock(); + + return 0; +} + +/* + * * loongson_drm_resume - initiate device suspend + * + * @pdev: drm dev pointer + * @state: suspend state + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ + +int loongson_drm_resume(struct drm_device *dev, bool resume) +{ + struct loongson_drm_device *ldev = dev->dev_private; + + console_lock(); + + if (resume) { + pci_set_power_state(dev->pdev, PCI_D0); + pci_restore_state(dev->pdev); + if (pci_enable_device(dev->pdev)) { + console_unlock(); + return -1; + } + } + + /* blat the mode back in */ + drm_atomic_helper_resume(dev, ldev->state); + + drm_kms_helper_poll_enable(dev); + + drm_fb_helper_set_suspend(ldev->dev->fb_helper, 0); + + console_unlock(); + + return 0; +} + +/** + * loongson_drm_pm_suspend + * + * @dev pointer to the device + * + * Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. + */ +static int loongson_drm_pm_suspend(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_suspend(drm_dev, true); +} + +/** + * loongson_drm_pm_resume + * + * @dev pointer to the device + * + * Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. + */ +static int loongson_drm_pm_resume(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_resume(drm_dev, true); +} + +/** + * loongson_drm_pm_freeze + * + * @dev pointer to device + * + * Hibernation-specific, executed before creating a hibernation image. + * Analogous to @suspend(), but it should not enable the device to signal + * wakeup events or change its power state. + */ +static int loongson_drm_pm_freeze(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_suspend(drm_dev, false); +} + +/** + * loongson_drm_pm_draw + * + * @dev pointer to device + * + * Hibernation-specific, executed after creating a hibernation image OR + * if the creation of an image has failed. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + */ +static int loongson_drm_pm_thaw(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + return loongson_drm_resume(drm_dev, false); +} + +#define loongson_drm_pm_poweroff loongson_drm_pm_freeze +#define loongson_drm_pm_restore loongson_drm_pm_resume + +/* + * * struct dev_pm_ops - device PM callbacks + * + *@suspend: Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. + *@resume: Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. + *@freeze: Hibernation-specific, executed before creating a hibernation image. + * Analogous to @suspend(), but it should not enable the device to signal + * wakeup events or change its power state. The majority of subsystems + * (with the notable exception of the PCI bus type) expect the driver-level + * @freeze() to save the device settings in memory to be used by @restore() + * during the subsequent resume from hibernation. + *@thaw: Hibernation-specific, executed after creating a hibernation image OR + * if the creation of an image has failed. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + *@poweroff: Hibernation-specific, executed after saving a hibernation image. + * Analogous to @suspend(), but it need not save the device's settings in + * memory. + *@restore: Hibernation-specific, executed after restoring the contents of main + * memory from a hibernation image, analogous to @resume(). + */ +static const struct dev_pm_ops loongson_drm_pm_ops = { + .suspend = loongson_drm_pm_suspend, + .resume = loongson_drm_pm_resume, + .freeze = loongson_drm_pm_freeze, + .thaw = loongson_drm_pm_thaw, + .poweroff = loongson_drm_pm_poweroff, + .restore = loongson_drm_pm_restore, +}; + +/** + * loongson_drm_pci_driver -- pci driver structure + * + * .id_table : must be non-NULL for probe to be called + * .probe: New device inserted + * .remove: Device removed + * .resume: Device suspended + * .suspend: Device woken up + */ +static struct pci_driver loongson_drm_pci_driver = { + .name = DRIVER_NAME, + .id_table = loongson_drm_pci_devices, + .probe = loongson_drm_pci_register, + .remove = loongson_drm_pci_unregister, + .driver.pm = &loongson_drm_pm_ops, +}; + +/** + * loongson_drm_pci_init() -- kernel module init function + */ +static int __init loongson_drm_init(void) +{ + int ret; + struct pci_dev *pdev = NULL; + + /* If external graphics card exist, use it as default */ + while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev))) { + if (pdev->vendor == PCI_VENDOR_ID_ATI) + return 0; + if (pdev->vendor == 0x1a03) /* ASpeed */ + return 0; + } + + ret = pci_register_driver(&loongson_drm_pci_driver); + + return ret; +} + +/** + * loongson_drm_pci_exit() -- kernel module exit function + */ +static void __exit loongson_drm_exit(void) +{ + pci_unregister_driver(&loongson_drm_pci_driver); +} + +module_init(loongson_drm_init); +module_exit(loongson_drm_exit); + +MODULE_AUTHOR("Chen Zhu "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("Loongson LS7A DRM Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/loongson/loongson_drv.h b/drivers/gpu/drm/loongson/loongson_drv.h new file mode 100644 index 000000000000..dbe3ebf951cd --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_drv.h @@ -0,0 +1,241 @@ +#ifndef __LOONGSON_DRV_H__ +#define __LOONGSON_DRV_H__ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "loongson_i2c.h" +#include "loongson_vbios.h" + +#define to_loongson_crtc(x) container_of(x, struct loongson_crtc, base) +#define to_loongson_encoder(x) container_of(x, struct loongson_encoder, base) +#define to_loongson_connector(x) container_of(x, struct loongson_connector, base) + +#define LOONGSON_MAX_FB_HEIGHT 4096 +#define LOONGSON_MAX_FB_WIDTH 4096 + +#define CUR_WIDTH_SIZE 32 +#define CUR_HEIGHT_SIZE 32 + +#define LO_OFF 0 +#define HI_OFF 8 + +#define LS7A_PIX0_PLL (void *)TO_UNCACHE(LS7A_CHIPCFG_REG_BASE + 0x04b0) +#define LS7A_PIX1_PLL (void *)TO_UNCACHE(LS7A_CHIPCFG_REG_BASE + 0x04c0) + +#define CURIOSET_CORLOR 0x4607 +#define CURIOSET_POSITION 0x4608 +#define CURIOLOAD_ARGB 0x4609 +#define CURIOLOAD_IMAGE 0x460A +#define CURIOHIDE_SHOW 0x460B +#define FBEDID_GET 0X860C + +#define CRTC_REG_OFFSET 0x10 + +#define CFG_FMT GENMASK(2,0) +#define CFG_FBSWITCH BIT(7) +#define CFG_ENABLE BIT(8) +#define CFG_PANELSWITCH BIT(9) +#define CFG_FBNUM_BIT 11 +#define CFG_FBNUM BIT(11) +#define CFG_GAMMAR BIT(12) +#define CFG_RESET BIT(20) + +#define FB_CFG_DVO_REG (0x1240) +#define FB_ADDR0_DVO_REG (0x1260) +#define FB_ADDR1_DVO_REG (0x1580) +#define FB_STRI_DVO_REG (0x1280) +#define FB_DITCFG_DVO_REG (0x1360) +#define FB_DITTAB_LO_DVO_REG (0x1380) +#define FB_DITTAB_HI_DVO_REG (0x13a0) +#define FB_PANCFG_DVO_REG (0x13c0) +#define FB_PANTIM_DVO_REG (0x13e0) +#define FB_HDISPLAY_DVO_REG (0x1400) +#define FB_HSYNC_DVO_REG (0x1420) +#define FB_VDISPLAY_DVO_REG (0x1480) +#define FB_VSYNC_DVO_REG (0x14a0) +#define FB_GAMINDEX_DVO_REG (0x14e0) +#define FB_GAMDATA_DVO_REG (0x1500) + +#define HDMI_ZONEIDLE_REG (0x1700) +#define HDMI_CTRL_REG (0x1720) + +#define HDMI_AUDIO_BUF_REG (0x1740) +#define HDMI_AUDIO_NCFG_REG (0x1760) +#define HDMI_AUDIO_CTSCFG_REG (0x1780) +#define HDMI_AUDIO_CTSCALCFG_REG (0x17a0) +#define HDMI_AUDIO_INFOFRAME_REG (0x17c0) +#define HDMI_AUDIO_SAMPLE_REG (0x17e0) + +#define HDMI_PHY_CTRL_REG (0x1800) +#define HDMI_PHY_PLLCFG_REG (0x1820) + +#define FB_CUR_CFG_REG (0x1520) +#define FB_CUR_ADDR_REG (0x1530) +#define FB_CUR_LOC_ADDR_REG (0x1540) +#define FB_CUR_BACK_REG (0x1550) +#define FB_CUR_FORE_REG (0x1560) +#define FB_INT_REG (0x1570) + +#define INT_DVO1_VSYNC 0 +#define INT_DVO1_HSYNC 1 +#define INT_DVO0_VSYNC 2 +#define INT_DVO0_HSYNC 3 +#define INT_CURSOR_FB_END 4 +#define INT_DVO1_FB_END 5 +#define INT_DVO0_FB_END 6 + +#define MAX_CRTC 2 + +enum loongson_chip { + dc_7a1000, + dc_7a2000 +}; + +struct pix_pll { + unsigned int l2_div; + unsigned int l1_loopc; + unsigned int l1_frefc; +}; + +struct config_reg { + u8 dev_addr; + u8 reg; + u8 value; +} __packed; + +struct cfg_encoder { + u8 reg_num; + u32 hdisplay; + u32 vdisplay; + struct config_reg config_regs[256]; +}; + +struct loongson_crtc { + struct drm_crtc base; + unsigned int crtc_id; + uint32_t cfg_reg; + struct drm_plane *primary; /* Primary panel belongs to this crtc */ + struct drm_pending_vblank_event *event; + struct loongson_drm_device *ldev; +}; + +struct loongson_encoder { + struct drm_encoder base; + u32 type; + u32 i2c_id; + int encoder_id; + struct loongson_i2c *i2c; + struct loongson_crtc *lcrtc; /* Binding crtc, not actual one */ +}; + +struct loongson_connector { + struct drm_connector base; + u16 id; + u32 type; + u16 i2c_id; + u16 hotplug; + u16 edid_method; + u8 *vbios_edid; + struct loongson_i2c *i2c; + struct loongson_drm_device *ldev; +}; + +struct loongson_mode_info { + bool mode_config_initialized; + struct loongson_crtc *crtc; + struct loongson_encoder *encoder; + struct loongson_connector *connector; +}; + +struct loongson_drm_device { + struct drm_device *dev; + struct drm_atomic_state *state; + struct pci_dev *gpu_pdev; /* LS7A gpu device info */ + + resource_size_t mmio_base; + resource_size_t mmio_size; + void __iomem *mmio; + uint32_t int_reg; + + struct drm_display_mode mode; + struct loongson_mode_info mode_info[2]; + struct drm_gem_cma_object *cursor; + + int num_crtc; + struct loongson_crtc lcrtc[MAX_CRTC]; + struct loongson_i2c i2c_bus[DC_I2C_BUS_MAX]; + + void *vbios; + struct list_head desc_list; + + bool inited; + bool suspended; + bool cursor_showed; + int cursor_crtc_id; + + int connector_active0; + int connector_active1; + + enum loongson_chip chip; +}; + +extern spinlock_t loongson_reglock; + +/* FIXME: LS7A2000's switch_panel is not available, just return false. */ +static inline bool clone_mode(struct loongson_drm_device *ldev) +{ + return false; + + if (ldev->num_crtc < 2) + return true; + if (ldev->mode_info[0].connector->base.status != connector_status_connected) + return true; + if (ldev->mode_info[1].connector->base.status != connector_status_connected) + return true; + if (ldev->lcrtc[0].base.x || ldev->lcrtc[0].base.y) + return false; + if (ldev->lcrtc[1].base.x || ldev->lcrtc[1].base.y) + return false; + + return true; +} + +int loongson_irq_enable_vblank(struct drm_device *dev,unsigned int crtc_id); +void loongson_irq_disable_vblank(struct drm_device *dev,unsigned int crtc_id); +irqreturn_t loongson_irq_handler(int irq,void *arg); +void loongson_irq_preinstall(struct drm_device *dev); +int loongson_irq_postinstall(struct drm_device *dev); +void loongson_irq_uninstall(struct drm_device *dev); + +u32 crtc_read(struct loongson_crtc *lcrtc, u32 offset); +void crtc_write(struct loongson_crtc *lcrtc, u32 offset, u32 val); + +int loongson_gpio_init(struct loongson_drm_device *ldev); +int loongson_crtc_init(struct loongson_drm_device *ldev); +struct drm_encoder *loongson_encoder_init(struct drm_device *dev, unsigned int index); +struct drm_connector *loongson_connector_init(struct drm_device *dev, unsigned int index); + +int loongson_fbdev_init(struct loongson_drm_device *ldev); +void loongson_fbdev_fini(struct loongson_drm_device *ldev); +void loongson_fbdev_restore_mode(struct loongson_drm_device *ldev); + +int loongson_drm_drm_suspend(struct drm_device *dev, bool suspend, + bool fbcon, bool freeze); +int loongson_drm_drm_resume(struct drm_device *dev, bool resume, bool fbcon); + /* loongson_cursor.c */ +int loongson_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, + uint32_t handle, uint32_t width, uint32_t height, int32_t hot_x, int32_t hot_y); +int loongson_crtc_cursor_move(struct drm_crtc *crtc, int x, int y); + +#endif diff --git a/drivers/gpu/drm/loongson/loongson_encoder.c b/drivers/gpu/drm/loongson/loongson_encoder.c new file mode 100644 index 000000000000..f2d4c49f5319 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_encoder.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Copyright (C) 2019 Lemote Inc. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * Jiaxun Yang + */ + +#include +#include "loongson_drv.h" + +/** + * loongson_encoder_destroy + * + * @encoder: encoder object + * + * Clean up encoder resources + */ +static void loongson_encoder_destroy(struct drm_encoder *encoder) +{ + struct loongson_encoder *loongson_encoder = to_loongson_encoder(encoder); + drm_encoder_cleanup(encoder); + kfree(loongson_encoder); +} + +static int loongson_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + return 0; +} + +static void loongson_encoder_atomic_mode_set(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + unsigned long flags; + struct loongson_encoder *lenc = to_loongson_encoder(encoder); + struct loongson_crtc *lcrtc_origin = lenc->lcrtc; + struct loongson_crtc *lcrtc_current = to_loongson_crtc(crtc_state->crtc); + + if (lcrtc_origin->crtc_id != lcrtc_current->crtc_id) + lcrtc_origin->cfg_reg |= CFG_PANELSWITCH; + else + lcrtc_origin->cfg_reg &= ~CFG_PANELSWITCH; + + spin_lock_irqsave(&loongson_reglock, flags); + crtc_write(lcrtc_origin, FB_CFG_DVO_REG, lcrtc_origin->cfg_reg); + spin_unlock_irqrestore(&loongson_reglock, flags); +} + +/** + * These provide the minimum set of functions required to handle a encoder + * + * Helper operations for encoders + */ +static const struct drm_encoder_helper_funcs loongson_encoder_helper_funcs = { + .atomic_check = loongson_encoder_atomic_check, + .atomic_mode_set = loongson_encoder_atomic_mode_set, +}; + +/** + * These provide the minimum set of functions required to handle a encoder + * + * Encoder controls,encoder sit between CRTCs and connectors + */ +static const struct drm_encoder_funcs loongson_encoder_encoder_funcs = { + .destroy = loongson_encoder_destroy, +}; + +static void loongson_hdmi_init(struct loongson_drm_device *ldev, int index) +{ + u32 val; + int offset = index * 0x10; + volatile void __iomem *base = ldev->mmio; + + spin_lock(&loongson_reglock); + writel(0x287, base + HDMI_CTRL_REG + offset); + + writel(0x00400040, base + HDMI_ZONEIDLE_REG + offset); + + writel(6272, base + HDMI_AUDIO_NCFG_REG + offset); + writel(0x80000000, base + HDMI_AUDIO_CTSCFG_REG + offset); + + writel(0x11, base + HDMI_AUDIO_INFOFRAME_REG + offset); + val = readl(base + HDMI_AUDIO_INFOFRAME_REG + offset) | 0x4; + writel(val, base + HDMI_AUDIO_INFOFRAME_REG + offset); + + writel(0x1, base + HDMI_AUDIO_SAMPLE_REG + offset); + spin_unlock(&loongson_reglock); + + DRM_DEBUG("Loongson HDMI init finish.\n"); +} + +/** + * loongson_encoder_init + * + * @dev: point to the drm_device structure + * + * Init encoder + */ +struct drm_encoder *loongson_encoder_init(struct drm_device *dev, unsigned int index) +{ + struct drm_encoder *encoder; + struct loongson_encoder *loongson_encoder; + struct loongson_drm_device *ldev = dev->dev_private; + + loongson_encoder = kzalloc(sizeof(struct loongson_encoder), GFP_KERNEL); + if (!loongson_encoder) + return NULL; + + loongson_encoder->encoder_id = index; + loongson_encoder->i2c = &ldev->i2c_bus[index]; + loongson_encoder->lcrtc = &ldev->lcrtc[index]; + loongson_encoder->type = get_encoder_type(ldev, index); + encoder = &loongson_encoder->base; + + if (loongson_encoder->type == DRM_MODE_ENCODER_TMDS) + loongson_hdmi_init(ldev, index); + + encoder->possible_crtcs = BIT(index); + encoder->possible_clones = BIT(1) | BIT(0); + /* encoder->possible_crtcs = BIT(1) | BIT(0); */ + + drm_encoder_helper_add(encoder, &loongson_encoder_helper_funcs); + drm_encoder_init(dev, encoder, &loongson_encoder_encoder_funcs, + loongson_encoder->type, NULL); + + return encoder; +} diff --git a/drivers/gpu/drm/loongson/loongson_i2c.c b/drivers/gpu/drm/loongson/loongson_i2c.c new file mode 100644 index 000000000000..55a59ccfd78e --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_i2c.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "loongson_i2c.h" +#include "loongson_drv.h" +#include "loongson_vbios.h" + +u32 ls7a_mm_rreg(struct loongson_drm_device *ldev, u32 offset) +{ + return readl(ldev->mmio + offset); +} + +void ls7a_mm_wreg(struct loongson_drm_device *ldev, u32 offset, u32 val) +{ + writel(val, ldev->mmio + offset); +} + +static inline void __dc_gpio_set_dir(struct loongson_drm_device *ldev, + unsigned int pin, int input) +{ + u32 temp; + + temp = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_CFG_OFFSET); + if (input) + temp |= 1UL << pin; + else + temp &= ~(1UL << pin); + ls7a_mm_wreg(ldev, LS7A_DC_GPIO_CFG_OFFSET, temp); +} + +static void __dc_gpio_set_val(struct loongson_drm_device *ldev, unsigned int pin, + int high) +{ + u32 temp; + + temp = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_OUT_OFFSET); + if (high) + temp |= 1UL << pin; + else + temp &= ~(1UL << pin); + ls7a_mm_wreg(ldev, LS7A_DC_GPIO_OUT_OFFSET, temp); +} + +static void loongson_i2c_set_data(void *i2c, int value) +{ + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->data; + + if (value) + __dc_gpio_set_dir(ldev, pin, 1); + else { + __dc_gpio_set_val(ldev, pin, 0); + __dc_gpio_set_dir(ldev, pin, 0); + } +} + +static void loongson_i2c_set_clock(void *i2c, int value) +{ + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->clock; + + if (value) + __dc_gpio_set_dir(ldev, pin, 1); + else { + __dc_gpio_set_val(ldev, pin, 0); + __dc_gpio_set_dir(ldev, pin, 0); + } +} + +static int loongson_i2c_get_data(void *i2c) +{ + int val; + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->data; + + val = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_IN_OFFSET); + + return (val >> pin) & 1; +} + +static int loongson_i2c_get_clock(void *i2c) +{ + int val; + struct loongson_i2c *li2c = i2c; + struct loongson_drm_device *ldev = li2c->ldev; + unsigned int pin = li2c->clock; + + val = ls7a_mm_rreg(ldev, LS7A_DC_GPIO_IN_OFFSET); + + return (val >> pin) & 1; +} + +static int loongson_i2c_create(struct loongson_drm_device *ldev, + struct loongson_i2c *li2c, const char *name) +{ + int ret; + unsigned int i2c_num; + struct i2c_adapter *i2c_adapter; + struct i2c_algo_bit_data *i2c_algo_data; + + i2c_num = li2c->i2c_id; + i2c_adapter = kzalloc(sizeof(struct i2c_adapter), GFP_KERNEL); + if (!i2c_adapter) + return -ENOMEM; + + i2c_algo_data = kzalloc(sizeof(struct i2c_algo_bit_data), GFP_KERNEL); + if (!i2c_algo_data) { + ret = -ENOMEM; + goto free_adapter; + } + + i2c_adapter->owner = THIS_MODULE; + i2c_adapter->class = I2C_CLASS_DDC; + i2c_adapter->algo_data = i2c_algo_data; + i2c_adapter->dev.parent = ldev->dev->dev; + i2c_adapter->nr = -1; + snprintf(i2c_adapter->name, sizeof(i2c_adapter->name), "%s%d", name, + i2c_num); + + li2c->data = i2c_num * 2; + li2c->clock = i2c_num * 2 + 1; + DRM_INFO("Created dc-i2c%d, sda=%d, scl=%d\n", i2c_num, li2c->data, + li2c->clock); + + i2c_algo_data->setsda = loongson_i2c_set_data; + i2c_algo_data->setscl = loongson_i2c_set_clock; + i2c_algo_data->getsda = loongson_i2c_get_data; + i2c_algo_data->getscl = loongson_i2c_get_clock; + i2c_algo_data->udelay = DC_I2C_TON; + i2c_algo_data->timeout = usecs_to_jiffies(2200); /* from VESA */ + + ret = i2c_bit_add_numbered_bus(i2c_adapter); + if (ret) + goto free_algo_data; + + li2c->adapter = i2c_adapter; + i2c_algo_data->data = li2c; + i2c_set_adapdata(li2c->adapter, li2c); + li2c->init = true; + li2c->ldev = ldev; + DRM_INFO("Register i2c algo-bit adapter [%s]\n", i2c_adapter->name); + + return 0; + +free_algo_data: + DRM_ERROR("Failed to register i2c adapter %s\n", i2c_adapter->name); + kfree(i2c_algo_data); +free_adapter: + kfree(i2c_adapter); + + return ret; +} + +int loongson_gpio_init(struct loongson_drm_device *ldev) +{ + int pin; + + /* set gpio dir output 0-3 */ + for (pin = 0; pin < 4; pin++) { + __dc_gpio_set_val(ldev, pin, 0); + __dc_gpio_set_dir(ldev, pin, 0); + } + + return 0; +} + +int loongson_i2c_init(struct loongson_drm_device *ldev) +{ + int ret, i; + + ret = get_loongson_i2c(ldev); + if (ret != true) { + DRM_ERROR("Failed to get i2c_id form vbios\n"); + return -ENODEV; + } + + for (i = 0; i < DC_I2C_BUS_MAX; i++) { + if (!ldev->i2c_bus[i].use) + continue; + ldev->i2c_bus[i].i2c_id = i; + ret = loongson_i2c_create(ldev, &ldev->i2c_bus[i], DC_I2C_NAME); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/gpu/drm/loongson/loongson_i2c.h b/drivers/gpu/drm/loongson/loongson_i2c.h new file mode 100644 index 000000000000..97d3d139ca73 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_i2c.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2020 Loongson Technology Co., Ltd. + * Authors: + * sunhao + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LOONGSON_I2C_H__ +#define __LOONGSON_I2C_H__ + +#include +#include +#include + +/* Modify this marco to config i2c bus speed, bus_freq = 500 / T */ +/* Eg: i2c_bus_freq=100k when T=5 */ +#define DC_I2C_TON 5 +#define DC_I2C_NAME "ls7a_dc_i2c" +#define DC_I2C_BUS_MAX 2 + +/* Loongson 7A display controller proprietary GPIOs */ +#define LS7A_DC_GPIO_CFG_OFFSET (0x1660) +#define LS7A_DC_GPIO_IN_OFFSET (0x1650) +#define LS7A_DC_GPIO_OUT_OFFSET (0x1650) + +struct loongson_drm_device; + +struct loongson_i2c { + struct loongson_drm_device *ldev; + struct i2c_client *ddc_client; + struct i2c_adapter *adapter; + u32 data, clock; + bool use, init; + u32 i2c_id; +}; + +int loongson_i2c_init(struct loongson_drm_device *ldev); + +#endif diff --git a/drivers/gpu/drm/loongson/loongson_irq.c b/drivers/gpu/drm/loongson/loongson_irq.c new file mode 100644 index 000000000000..aeed283a1a54 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_irq.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include "loongson_drv.h" + +irqreturn_t loongson_irq_handler(int irq, void *arg) +{ + unsigned int val; + struct drm_device *dev = (struct drm_device *) arg; + struct loongson_drm_device *ldev = dev->dev_private; + volatile void __iomem *base = ldev->mmio; + + val = readl(base + FB_INT_REG); + spin_lock(&loongson_reglock); + writel(val, base + FB_INT_REG); + spin_unlock(&loongson_reglock); + + if (val & BIT(INT_DVO0_FB_END)){ + drm_crtc_handle_vblank(&ldev->lcrtc[0].base); + } + + if (val & BIT(INT_DVO1_FB_END)){ + drm_crtc_handle_vblank(&ldev->lcrtc[1].base); + } + + spin_lock(&loongson_reglock); + writel(ldev->int_reg, base + FB_INT_REG); + spin_unlock(&loongson_reglock); + + return IRQ_HANDLED; +} + +void loongson_irq_preinstall(struct drm_device *dev) +{ + unsigned long flags; + struct loongson_drm_device *ldev = dev->dev_private; + volatile void __iomem *base = ldev->mmio; + + /* disable interupt */ + spin_lock_irqsave(&loongson_reglock, flags); + writel(0x0000 << 16, base + FB_INT_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); +} + +int loongson_irq_postinstall(struct drm_device *dev) +{ + return 0; +} + +void loongson_irq_uninstall(struct drm_device *dev) +{ + unsigned long flags; + struct loongson_drm_device *ldev = dev->dev_private; + volatile void __iomem *base = ldev->mmio; + + /* disable interupt */ + spin_lock_irqsave(&loongson_reglock, flags); + writel(0x0000 << 16, base + FB_INT_REG); + spin_unlock_irqrestore(&loongson_reglock, flags); +} diff --git a/drivers/gpu/drm/loongson/loongson_vbios.c b/drivers/gpu/drm/loongson/loongson_vbios.c new file mode 100644 index 000000000000..38bf164f7b16 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_vbios.c @@ -0,0 +1,892 @@ +/* + * Copyright (c) 2018 Loongson Technology Co., Ltd. + * Authors: + * Chen Zhu + * Yaling Fang + * Dandan Zhang + * Huacai Chen + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include "loongson_drv.h" +#include "loongson_vbios.h" + +#define VBIOS_START 0x1000 +#define VBIOS_SIZE 0x40000 +#define VBIOS_OFFSET 0x100000 +#define VBIOS_DESC_OFFSET 0x6000 +#define VBIOS_TITLE "Loongson-VBIOS" + +/* VBIOS INFO ADDRESS TABLE */ +struct acpi_viat_table { + struct acpi_table_header header; + unsigned long vbios_addr; +} __packed; + +static u32 get_vbios_version(struct loongson_vbios *vbios) +{ + u32 minor, major, version; + + minor = vbios->version_minor; + major = vbios->version_major; + version = major * 10 + minor; + + return version; +} + +static bool parse_vbios_i2c(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + int i, num, size; + struct vbios_i2c *i2c; + struct vbios_i2c *vbios_i2c = NULL; + struct loongson_i2c *val = (struct loongson_i2c *)cmd->res; + + size = this->desc->size; + vbios_i2c = kzalloc(size, GFP_KERNEL); + if (!vbios_i2c) + return false; + + memset(vbios_i2c, 0xff, size); + memcpy(vbios_i2c, this->data, size); + num = size / sizeof(*vbios_i2c); + + i2c = vbios_i2c; + for (i = 0; (i < num && i < DC_I2C_BUS_MAX); i++) { + val->i2c_id = (u32)i2c->id; + val->use = true; + val++; + i2c++; + } + + kfree(vbios_i2c); + return ret; +} + +static bool parse_vbios_crtc(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_crtc crtc; + + memset(&crtc, 0xff, sizeof(crtc)); + memcpy(&crtc, this->data, min_t(u32, this->desc->size, sizeof(crtc))); + + switch (request) { + case VBIOS_CRTC_ID: + *val = crtc.crtc_id; + break; + case VBIOS_CRTC_ENCODER_ID: + *val = crtc.encoder_id; + break; + case VBIOS_CRTC_MAX_FREQ: + *val = crtc.max_freq; + break; + case VBIOS_CRTC_MAX_WIDTH: + *val = crtc.max_width; + break; + case VBIOS_CRTC_MAX_HEIGHT: + *val = crtc.max_height; + break; + case VBIOS_CRTC_IS_VB_TIMING: + *val = crtc.is_vb_timing; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_encoder(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_encoder encoder; + + memset(&encoder, 0xff, sizeof(encoder)); + memcpy(&encoder, this->data, + min_t(u32, this->desc->size, sizeof(encoder))); + + switch (request) { + case VBIOS_ENCODER_I2C_ID: + *val = encoder.i2c_id; + break; + case VBIOS_ENCODER_CONNECTOR_ID: + *val = encoder.connector_id; + break; + case VBIOS_ENCODER_TYPE: + *val = encoder.type; + break; + case VBIOS_ENCODER_CONFIG_TYPE: + *val = encoder.config_type; + break; + case VBIOS_ENCODER_CHIP: + *val = encoder.chip; + break; + case VBIOS_ENCODER_CHIP_ADDR: + *val = encoder.chip_addr; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_cfg_encoder(struct desc_node *this, + struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct cfg_encoder *cfg_encoder; + struct cfg_encoder *cfg; + struct vbios_cfg_encoder *vbios_cfg_encoder; + u32 num, size, i = 0; + + vbios_cfg_encoder = (struct vbios_cfg_encoder *)this->data; + size = sizeof(struct vbios_cfg_encoder); + num = this->desc->size / size; + + switch (request) { + case VBIOS_ENCODER_CONFIG_PARAM: + cfg_encoder = (struct cfg_encoder *)kzalloc( + sizeof(struct cfg_encoder) * num, GFP_KERNEL); + cfg = cfg_encoder; + for (i = 0; i < num; i++) { + cfg->reg_num = vbios_cfg_encoder->reg_num; + cfg->hdisplay = vbios_cfg_encoder->hdisplay; + cfg->vdisplay = vbios_cfg_encoder->vdisplay; + memcpy(&cfg->config_regs, + &vbios_cfg_encoder->config_regs, + sizeof(struct vbios_conf_reg) * 256); + + cfg++; + vbios_cfg_encoder++; + } + cmd->res = (void *)cfg_encoder; + break; + case VBIOS_ENCODER_CONFIG_NUM: + *val = num; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_connector(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_connector connector; + + memset(&connector, 0xff, sizeof(connector)); + memcpy(&connector, this->data, + min_t(u32, this->desc->size, sizeof(connector))); + + switch (request) { + case VBIOS_CONNECTOR_I2C_ID: + *val = connector.i2c_id; + break; + case VBIOS_CONNECTOR_INTERNAL_EDID: + memcpy((u8 *)(ulong)val, connector.internal_edid, + EDID_LENGTH * 2); + break; + case VBIOS_CONNECTOR_TYPE: + *val = connector.type; + break; + case VBIOS_CONNECTOR_HOTPLUG: + *val = connector.hotplug; + break; + case VBIOS_CONNECTOR_EDID_METHOD: + *val = connector.edid_method; + break; + case VBIOS_CONNECTOR_IRQ_GPIO: + *val = connector.irq_gpio; + break; + case VBIOS_CONNECTOR_IRQ_PLACEMENT: + *val = connector.gpio_placement; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_backlight(struct desc_node *this, struct vbios_cmd *cmd) +{ + return 0; +} + +static bool parse_vbios_pwm(struct desc_node *this, struct vbios_cmd *cmd) +{ + bool ret = true; + u64 request = (u64)cmd->req; + u32 *val = (u32 *)cmd->res; + struct vbios_pwm *pwm = (struct vbios_pwm *)this->data; + + switch (request) { + case VBIOS_PWM_ID: + *val = pwm->pwm; + break; + case VBIOS_PWM_PERIOD: + *val = pwm->peroid; + break; + case VBIOS_PWM_POLARITY: + *val = pwm->polarity; + break; + default: + ret = false; + break; + } + + return ret; +} + +static bool parse_vbios_header(struct desc_node *this, struct vbios_cmd *cmd) +{ + return true; +} + +static bool parse_vbios_default(struct desc_node *this, struct vbios_cmd *cmd) +{ + struct vbios_desc *vb_desc; + + vb_desc = this->desc; + DRM_WARN("Current descriptor[T-%d][V-%d] cannot be interprete.\n", + vb_desc->type, vb_desc->ver); + return false; +} + +#define FUNC(t, v, f) \ + { \ + .type = t, .ver = v, .func = f, \ + } + +static struct desc_func tables[] = { + FUNC(desc_header, ver_v1, parse_vbios_header), + FUNC(desc_i2c, ver_v1, parse_vbios_i2c), + FUNC(desc_crtc, ver_v1, parse_vbios_crtc), + FUNC(desc_encoder, ver_v1, parse_vbios_encoder), + FUNC(desc_connector, ver_v1, parse_vbios_connector), + FUNC(desc_cfg_encoder, ver_v1, parse_vbios_cfg_encoder), + FUNC(desc_backlight, ver_v1, parse_vbios_backlight), + FUNC(desc_pwm, ver_v1, parse_vbios_pwm), +}; + +static inline parse_func *get_parse_func(struct vbios_desc *vb_desc) +{ + int i; + u32 type = vb_desc->type; + u32 ver = vb_desc->ver; + parse_func *func = parse_vbios_default; + u32 tt_num = ARRAY_SIZE(tables); + + for (i = 0; i < tt_num; i++) { + if ((tables[i].ver == ver) && (tables[i].type == type)) { + func = tables[i].func; + break; + } + } + + return func; +} + +static inline u32 insert_desc_list(struct loongson_drm_device *ldev, + struct vbios_desc *vb_desc) +{ + struct desc_node *node; + parse_func *func = NULL; + + WARN_ON(!ldev || !vb_desc); + node = (struct desc_node *)kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + func = get_parse_func(vb_desc); + node->parse = func; + node->desc = (void *)vb_desc; + node->data = ((u8 *)ldev->vbios + vb_desc->offset); + list_add_tail(&node->head, &ldev->desc_list); + + return 0; +} + +static inline void free_desc_list(struct loongson_drm_device *ldev) +{ + struct desc_node *node, *tmp; + + list_for_each_entry_safe (node, tmp, &ldev->desc_list, head) { + list_del(&node->head); + kfree(node); + } +} + +#define DESC_LIST_MAX 1024 + +static u32 parse_vbios_desc(struct loongson_drm_device *ldev) +{ + u32 i, ret = 0; + struct vbios_desc *desc; + enum desc_type type = 0; + u8 *vbios = (u8 *)ldev->vbios; + + WARN_ON(!vbios); + + desc = (struct vbios_desc *)(vbios + VBIOS_DESC_OFFSET); + for (i = 0; i < DESC_LIST_MAX; i++) { + type = desc->type; + if (type == desc_max) + break; + + ret = insert_desc_list(ldev, desc); + if (ret) + DRM_DEBUG_KMS("Parse T-%d V-%d failed[%d]\n", desc->ver, + desc->type, ret); + + desc++; + } + + return ret; +} + +static inline struct desc_node *get_desc_node(struct loongson_drm_device *ldev, + u16 type, u8 index) +{ + struct desc_node *node, *tmp; + struct vbios_desc *vb_desc; + + list_for_each_entry_safe (node, tmp, &ldev->desc_list, head) { + vb_desc = node->desc; + if (vb_desc->type == type && vb_desc->index == index) + return node; + } + + return NULL; +} + +static bool vbios_get_data(struct loongson_drm_device *ldev, struct vbios_cmd *cmd) +{ + struct desc_node *node; + + WARN_ON(!cmd); + + node = get_desc_node(ldev, cmd->type, cmd->index); + if (node && node->parse) + return node->parse(node, cmd); + + DRM_DEBUG_DRIVER("Failed to get node(%d,%d)\n", cmd->type, cmd->index); + + return false; +} + +u32 get_connector_type(struct loongson_drm_device *ldev, u32 index) +{ + u32 type = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_TYPE; + vbt_cmd.res = (void *)(ulong)&type; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + type = -1; + + return type; +} + +u16 get_connector_i2cid(struct loongson_drm_device *ldev, u32 index) +{ + u16 i2c_id = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_I2C_ID; + vbt_cmd.res = (void *)(ulong)&i2c_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + i2c_id = -1; + + return i2c_id; +} + +u32 get_connector_irq_gpio(struct loongson_drm_device *ldev, u32 index) +{ + int ret; + u32 irq_gpio; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_IRQ_GPIO; + vbt_cmd.res = (void *)(ulong)&irq_gpio; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return -1; + + return irq_gpio; +} + +enum gpio_placement get_connector_gpio_placement(struct loongson_drm_device *ldev, + u32 index) +{ + int ret; + enum gpio_placement irq_placement; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_IRQ_PLACEMENT; + vbt_cmd.res = (void *)(ulong)&irq_placement; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return -1; + + return irq_placement; +} + +u16 get_hotplug_mode(struct loongson_drm_device *ldev, u32 index) +{ + u16 mode = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_HOTPLUG; + vbt_cmd.res = (void *)(ulong)&mode; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + mode = -1; + + return mode; +} + +u16 get_edid_method(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u16 method = via_null; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_EDID_METHOD; + vbt_cmd.res = (void *)(ulong)&method; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + method = via_null; + + return method; +} + +u8 *get_vbios_edid(struct loongson_drm_device *ldev, u32 index) +{ + u8 *edid = NULL; + bool ret = false; + struct vbios_cmd vbt_cmd; + + edid = kzalloc(sizeof(u8) * EDID_LENGTH * 2, GFP_KERNEL); + if (!edid) + return edid; + + vbt_cmd.index = index; + vbt_cmd.type = desc_connector; + vbt_cmd.req = (void *)(ulong)VBIOS_CONNECTOR_INTERNAL_EDID; + vbt_cmd.res = (void *)(ulong)edid; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return NULL; + + return edid; +} + +u32 get_vbios_pwm(struct loongson_drm_device *ldev, u32 index, u16 request) +{ + u32 value = -1; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_pwm; + vbt_cmd.req = (void *)(ulong)request; + vbt_cmd.res = (void *)(ulong)&value; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + value = 0xffffffff; + + return value; +} + +u32 get_crtc_id(struct loongson_drm_device *ldev, u32 index) +{ + u32 crtc_id = 0; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_ID; + vbt_cmd.res = (void *)(ulong)&crtc_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + crtc_id = 0; + + return crtc_id; +} + +u32 get_crtc_max_freq(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 max_freq = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_MAX_FREQ; + vbt_cmd.res = (void *)(ulong)&max_freq; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + max_freq = 0; + + return max_freq; +} + +u32 get_crtc_max_width(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 max_width = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_MAX_WIDTH; + vbt_cmd.res = (void *)(ulong)&max_width; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + max_width = LOONGSON_MAX_FB_WIDTH; + + return max_width; +} + +u32 get_crtc_max_height(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 max_height = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_MAX_HEIGHT; + vbt_cmd.res = (void *)(ulong)&max_height; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + max_height = LOONGSON_MAX_FB_HEIGHT; + + return max_height; +} + +u32 get_crtc_encoder_id(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 encoder_id = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_ENCODER_ID; + vbt_cmd.res = (void *)(ulong)&encoder_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + encoder_id = 0; + + return encoder_id; +} + +bool get_crtc_is_vb_timing(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + bool vb_timing = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_crtc; + vbt_cmd.req = (void *)(ulong)VBIOS_CRTC_IS_VB_TIMING; + vbt_cmd.res = (void *)(ulong)&vb_timing; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + vb_timing = false; + + return vb_timing; +} + +struct crtc_timing *get_crtc_timing(struct loongson_drm_device *ldev, u32 index) +{ + return NULL; +} + +u32 get_encoder_connector_id(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + u32 connector_id = 0; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONNECTOR_ID; + vbt_cmd.res = (void *)(ulong)&connector_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + connector_id = 0; + + return connector_id; +} + +u32 get_encoder_i2c_id(struct loongson_drm_device *ldev, u32 index) +{ + u32 i2c_id = 0; + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_I2C_ID; + vbt_cmd.res = (void *)(ulong)&i2c_id; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + i2c_id = 0; + + return i2c_id; +} + +struct cfg_encoder *get_encoder_config(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + struct vbios_cmd vbt_cmd; + struct cfg_encoder *encoder_config = NULL; + + vbt_cmd.index = index; + vbt_cmd.type = desc_cfg_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONFIG_PARAM; + ret = vbios_get_data(ldev, &vbt_cmd); + if (ret) + encoder_config = (struct cfg_encoder *)vbt_cmd.res; + + return encoder_config; +} + +u32 get_encoder_cfg_num(struct loongson_drm_device *ldev, u32 index) +{ + struct vbios_cmd vbt_cmd; + bool ret = false; + u32 cfg_num = 0; + + vbt_cmd.index = index; + vbt_cmd.type = desc_cfg_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONFIG_NUM; + vbt_cmd.res = (void *)(ulong)&cfg_num; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + cfg_num = 0; + + return cfg_num; +} + +enum encoder_config get_encoder_config_type(struct loongson_drm_device *ldev, + u32 index) +{ + bool ret = false; + enum encoder_config config_type = encoder_bios_config; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CONFIG_TYPE; + vbt_cmd.res = (void *)(ulong)&config_type; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + config_type = encoder_bios_config; + + return config_type; +} + +enum encoder_object get_encoder_chip(struct loongson_drm_device *ldev, u32 index) +{ + int ret; + enum encoder_object chip; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CHIP; + vbt_cmd.res = (void *)(ulong)&chip; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return Unknown; + + return chip; +} + +u8 get_encoder_chip_addr(struct loongson_drm_device *ldev, u32 index) +{ + int ret; + u8 chip_addr; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_CHIP_ADDR; + vbt_cmd.res = (void *)(ulong)&chip_addr; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + return Unknown; + + return chip_addr; +} + +enum encoder_type get_encoder_type(struct loongson_drm_device *ldev, u32 index) +{ + bool ret = false; + enum encoder_type type = encoder_dac; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = index; + vbt_cmd.type = desc_encoder; + vbt_cmd.req = (void *)(ulong)VBIOS_ENCODER_TYPE; + vbt_cmd.res = (void *)(ulong)&type; + ret = vbios_get_data(ldev, &vbt_cmd); + if (!ret) + type = encoder_dac; + + return type; +} + +bool get_loongson_i2c(struct loongson_drm_device *ldev) +{ + bool ret = false; + struct vbios_cmd vbt_cmd; + + vbt_cmd.index = 0; + vbt_cmd.type = desc_i2c; + vbt_cmd.res = (void *)&ldev->i2c_bus; + ret = vbios_get_data(ldev, &vbt_cmd); + + if (!ret) { + ldev->i2c_bus[0].use = true; + ldev->i2c_bus[1].use = true; + } + + return true; +} + +static void *get_vbios_from_acpi(struct loongson_drm_device *ldev) +{ + void *vbios = NULL; +#ifdef CONFIG_ACPI + struct acpi_viat_table *viat; + struct acpi_table_header *hdr; + + if (!ACPI_SUCCESS(acpi_get_table("VIAT", 1, &hdr))) + return NULL; + + if (hdr->length != sizeof(struct acpi_viat_table)) { + DRM_WARN("ACPI VIAT table present but broken (length error)\n"); + return NULL; + } + + vbios = kmalloc(VBIOS_SIZE, GFP_KERNEL); + if (!vbios) + return NULL; + + viat = (struct acpi_viat_table *)hdr; + memcpy(vbios, phys_to_virt(viat->vbios_addr), VBIOS_SIZE); + + DRM_INFO("Get VBIOS from ACPI success!\n"); +#endif + return vbios; +} + +static void *get_vbios_from_vram(struct loongson_drm_device *ldev) +{ + void *bios, *vbios = NULL; + u64 vram_base = pci_resource_start(ldev->gpu_pdev, 2); + u64 vram_size = pci_resource_len(ldev->gpu_pdev, 2); + u64 vbios_addr = vram_base + vram_size - VBIOS_OFFSET; + + bios = ioremap(vbios_addr, VBIOS_SIZE); + if (!bios) + return NULL; + + vbios = kmalloc(VBIOS_SIZE, GFP_KERNEL); + if (!vbios) + return NULL; + + memcpy(vbios, bios, VBIOS_SIZE); + + iounmap(bios); + + if (memcmp(vbios, VBIOS_TITLE, strlen(VBIOS_TITLE))) { + kfree(vbios); + return NULL; + } + + DRM_INFO("Get VBIOS from VRAM success!\n"); + + return vbios; +} + +bool loongson_vbios_init(struct loongson_drm_device *ldev) +{ + void *vbios = NULL; + struct loongson_vbios *header; + + vbios = get_vbios_from_vram(ldev); + if (vbios) + goto success; + + vbios = get_vbios_from_acpi(ldev); + if (vbios) + goto success; + + vbios = kzalloc(256 * 1024, GFP_KERNEL); + if (!vbios) + return false; + + header = vbios; + header->crtc_num = 2; + +success: + header = ldev->vbios = vbios; + ldev->num_crtc = header->crtc_num; + + DRM_INFO("Loongson VBIOS version %d.%d\n", header->version_major, + header->version_minor); + + INIT_LIST_HEAD(&ldev->desc_list); + parse_vbios_desc(ldev); + + return true; +} + +void loongson_vbios_exit(struct loongson_drm_device *ldev) +{ + free_desc_list(ldev); + kfree(ldev->vbios); +} diff --git a/drivers/gpu/drm/loongson/loongson_vbios.h b/drivers/gpu/drm/loongson/loongson_vbios.h new file mode 100644 index 000000000000..25ae943a6c95 --- /dev/null +++ b/drivers/gpu/drm/loongson/loongson_vbios.h @@ -0,0 +1,283 @@ +#ifndef __LOONGSON_VBIOS_H__ +#define __LOONGSON_VBIOS_H__ + +#define VBIOS_PWM_ID 0x0 +#define VBIOS_PWM_PERIOD 0x1 +#define VBIOS_PWM_POLARITY 0x2 + +#define VBIOS_CRTC_ID 0x1 +#define VBIOS_CRTC_ENCODER_ID 0x2 +#define VBIOS_CRTC_MAX_FREQ 0x3 +#define VBIOS_CRTC_MAX_WIDTH 0x4 +#define VBIOS_CRTC_MAX_HEIGHT 0x5 +#define VBIOS_CRTC_IS_VB_TIMING 0x6 + +#define VBIOS_ENCODER_I2C_ID 0x1 +#define VBIOS_ENCODER_CONNECTOR_ID 0x2 +#define VBIOS_ENCODER_TYPE 0x3 +#define VBIOS_ENCODER_CONFIG_TYPE 0x4 +#define VBIOS_ENCODER_CONFIG_PARAM 0x1 +#define VBIOS_ENCODER_CONFIG_NUM 0x2 +#define VBIOS_ENCODER_CHIP 0x05 +#define VBIOS_ENCODER_CHIP_ADDR 0x06 + +#define VBIOS_CONNECTOR_I2C_ID 0x1 +#define VBIOS_CONNECTOR_INTERNAL_EDID 0x2 +#define VBIOS_CONNECTOR_TYPE 0x3 +#define VBIOS_CONNECTOR_HOTPLUG 0x4 +#define VBIOS_CONNECTOR_EDID_METHOD 0x5 +#define VBIOS_CONNECTOR_IRQ_PLACEMENT 0x06 +#define VBIOS_CONNECTOR_IRQ_GPIO 0x07 + +struct desc_node; +struct vbios_cmd; +typedef bool(parse_func)(struct desc_node *, struct vbios_cmd *); + +enum desc_type { + desc_header = 0, + desc_crtc, + desc_encoder, + desc_connector, + desc_i2c, + desc_pwm, + desc_gpio, + desc_backlight, + desc_fan, + desc_irq_vblank, + desc_cfg_encoder, + desc_max = 0xffff +}; + +enum desc_ver { + ver_v1, +}; + +enum hotplug { + disable = 0, + polling, + irq, + hotplug_max = 0xffffffff, +}; + +enum loongson_edid_method { + via_null = 0, + via_i2c, + via_vbios, + via_encoder, + via_max = 0xffffffff, +}; + +enum i2c_type { i2c_cpu, i2c_gpio, i2c_max = -1 }; + +enum vbios_backlight_type { bl_unuse, bl_ec, bl_pwm }; + +enum encoder_config { + encoder_transparent = 0, + encoder_os_config, + encoder_bios_config, + encoder_timing_filling, + encoder_kernel_driver, + encoder_type_max = 0xffffffff, +}; + +enum encoder_type { + encoder_none, + encoder_dac, + encoder_tmds, + encoder_lvds, + encoder_tvdac, + encoder_virtual, + encoder_dsi, + encoder_dpmst, + encoder_dpi +}; + +enum connector_type { + connector_unknown, + connector_vga, + connector_dvii, + connector_dvid, + connector_dvia, + connector_composite, + connector_svideo, + connector_lvds, + connector_component, + connector_9pindin, + connector_displayport, + connector_hdmia, + connector_hdmib, + connector_tv, + connector_edp, + connector_virtual, + connector_dsi, + connector_dpi +}; + +enum gpio_placement { + GPIO_PLACEMENT_LS3A = 0, + GPIO_PLACEMENT_LS7A, +}; + +enum encoder_object { + Unknown = 0x00, + INTERNAL_DVO = 0x01, + INTERNAL_HDMI = 0x02, + VGA_CH7055 = 0x10, + VGA_ADV7125 = 0x11, + DVI_TFP410 = 0x20, + HDMI_IT66121 = 0x30, + HDMI_SIL9022 = 0x31, + HDMI_LT8618 = 0x32, + HDMI_MS7210 = 0x33, + EDP_NCS8805 = 0x40 +}; + +struct loongson_vbios { + char title[16]; + uint32_t version_major; + uint32_t version_minor; + char information[20]; + uint32_t crtc_num; + uint32_t crtc_offset; + uint32_t connector_num; + uint32_t connector_offset; + uint32_t encoder_num; + uint32_t encoder_offset; +} __packed; + +struct vbios_header { + u32 feature; + u8 oem_vendor[32]; + u8 oem_product[32]; + u32 legacy_offset; + u32 legacy_size; + u32 desc_offset; + u32 desc_size; + u32 data_offset; + u32 data_size; +} __packed; + +struct vbios_backlight { + u32 feature; + u8 used; + enum vbios_backlight_type type; +} __packed; + +struct vbios_i2c { + u32 feature; + u16 id; + enum i2c_type type; +} __packed; + +struct vbios_pwm { + u32 feature; + u8 pwm; + u8 polarity; + u32 peroid; +} __packed; + +struct vbios_desc { + u16 type; + u8 ver; + u8 index; + u32 offset; + u32 size; + u64 ext[2]; +} __packed; + +struct vbios_cmd { + u8 index; + enum desc_type type; + u64 *req; + void *res; +}; + +struct desc_func { + enum desc_type type; + u16 ver; + s8 *name; + u8 index; + parse_func *func; +}; + +struct desc_node { + struct list_head head; + u8 *data; + struct vbios_desc *desc; + parse_func *parse; +}; + +struct vbios_crtc { + u32 feature; + u32 crtc_id; + u32 encoder_id; + u32 max_freq; + u32 max_width; + u32 max_height; + bool is_vb_timing; +} __packed; + +struct vbios_encoder { + u32 feature; + u32 i2c_id; + u32 connector_id; + enum encoder_type type; + enum encoder_config config_type; + enum encoder_object chip; + u8 chip_addr; +} __packed; + +struct vbios_connector { + u32 feature; + u32 i2c_id; + u8 internal_edid[256]; + enum connector_type type; + enum hotplug hotplug; + enum loongson_edid_method edid_method; + u32 irq_gpio; + enum gpio_placement gpio_placement; +} __packed; + +struct vbios_conf_reg { + u8 dev_addr; + u8 reg; + u8 value; +} __packed; + +struct vbios_cfg_encoder { + u32 hdisplay; + u32 vdisplay; + u8 reg_num; + struct vbios_conf_reg config_regs[256]; +} __packed; + +bool loongson_vbios_init(struct loongson_drm_device *ldev); +void loongson_vbios_exit(struct loongson_drm_device *ldev); +u32 get_connector_type(struct loongson_drm_device *ldev, u32 index); +u16 get_connector_i2cid(struct loongson_drm_device *ldev, u32 index); +u16 get_hotplug_mode(struct loongson_drm_device *ldev, u32 index); +u8 *get_vbios_edid(struct loongson_drm_device *ldev, u32 index); +u16 get_edid_method(struct loongson_drm_device *ldev, u32 index); +u32 get_vbios_pwm(struct loongson_drm_device *ldev, u32 index, u16 request); +u32 get_crtc_id(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_max_freq(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_max_width(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_max_height(struct loongson_drm_device *ldev, u32 index); +u32 get_crtc_encoder_id(struct loongson_drm_device *ldev, u32 index); +bool get_crtc_is_vb_timing(struct loongson_drm_device *ldev, u32 index); + +struct crtc_timing *get_crtc_timing(struct loongson_drm_device *ldev, u32 index); + +bool get_loongson_i2c(struct loongson_drm_device *ldev); +u32 get_encoder_i2c_id(struct loongson_drm_device *ldev, u32 index); +u32 get_encoder_connector_id(struct loongson_drm_device *ldev, u32 index); +enum encoder_config get_encoder_config_type(struct loongson_drm_device *ldev, u32 index); +enum encoder_type get_encoder_type(struct loongson_drm_device *ldev, u32 index); +struct cfg_encoder *get_encoder_config(struct loongson_drm_device *ldev, u32 index); +u32 get_encoder_cfg_num(struct loongson_drm_device *ldev, u32 index); +enum encoder_object get_encoder_chip(struct loongson_drm_device *ldev, u32 index); +u8 get_encoder_chip_addr(struct loongson_drm_device *ldev, u32 index); +u32 get_connector_irq_gpio(struct loongson_drm_device *ldev, u32 index); +enum gpio_placement get_connector_gpio_placement(struct loongson_drm_device *ldev, + u32 index); +#endif -- Gitee From 02dd903c76d0b94d98a403d4b2d4e36be874884e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 1 Dec 2016 09:51:35 +0800 Subject: [PATCH 232/241] alsa: Add Loongson LS7A HD-Audio support Change-Id: Ibbb3d319ab5d6dbe42c89df99982514a68788748 Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 1 + sound/hda/hdac_bus.c | 6 +- sound/hda/hdac_controller.c | 59 +- sound/hda/hdac_device.c | 1 + sound/hda/hdac_stream.c | 64 +- sound/pci/hda/Kconfig | 11 + sound/pci/hda/Makefile | 2 + sound/pci/hda/hda_controller.c | 44 +- sound/pci/hda/hda_controller.h | 4 + sound/pci/hda/hda_loongson.c | 850 +++++++++++++++++++++ sound/pci/hda/patch_hdmi.c | 1 + sound/pci/hda/patch_realtek.c | 12 +- 12 files changed, 1026 insertions(+), 29 deletions(-) create mode 100644 sound/pci/hda/hda_loongson.c diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 17462c17f978..5e13d898fc10 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -598,6 +598,7 @@ CONFIG_SND_SEQ_DUMMY=m CONFIG_SND_BT87X=m CONFIG_SND_BT87X_OVERCLOCK=y CONFIG_SND_HDA_INTEL=y +CONFIG_SND_HDA_LOONGSON=y CONFIG_SND_HDA_HWDEP=y CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_PATCH_LOADER=y diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c index 9766f6af8743..6d4c780cf818 100644 --- a/sound/hda/hdac_bus.c +++ b/sound/hda/hdac_bus.c @@ -11,6 +11,7 @@ #include #include "local.h" #include "trace.h" +#include "../pci/hda/hda_controller.h" static void snd_hdac_bus_process_unsol_events(struct work_struct *work); @@ -108,6 +109,7 @@ int snd_hdac_bus_exec_verb_unlocked(struct hdac_bus *bus, unsigned int addr, { unsigned int tmp; int err; + struct azx *chip = bus_to_azx(bus); if (cmd == ~0) return -EINVAL; @@ -116,7 +118,9 @@ int snd_hdac_bus_exec_verb_unlocked(struct hdac_bus *bus, unsigned int addr, *res = -1; else if (bus->sync_write) res = &tmp; - for (;;) { + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + err = bus->ops->command(bus, cmd); + else for (;;) { trace_hda_send_cmd(bus, cmd); err = bus->ops->command(bus, cmd); if (err != -EAGAIN) diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 522d1897659c..da047c8bb239 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -10,6 +10,7 @@ #include #include #include "local.h" +#include "../pci/hda/hda_controller.h" /* clear CORB read pointer properly */ static void azx_clear_corbrp(struct hdac_bus *bus) @@ -42,6 +43,8 @@ static void azx_clear_corbrp(struct hdac_bus *bus) */ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) { + struct azx *chip = bus_to_azx(bus); + WARN_ON_ONCE(!bus->rb.area); spin_lock_irq(&bus->reg_lock); @@ -58,11 +61,15 @@ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) /* reset the corb hw read pointer */ snd_hdac_chip_writew(bus, CORBRP, AZX_CORBRP_RST); - if (!bus->corbrp_self_clear) + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_chip_writew(bus, CORBRP, 0); + else if (!bus->corbrp_self_clear) azx_clear_corbrp(bus); /* enable corb dma */ snd_hdac_chip_writeb(bus, CORBCTL, AZX_CORBCTL_RUN); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_chip_readb(bus, CORBCTL); /* RIRB set up */ bus->rirb.addr = bus->rb.addr + 2048; @@ -79,7 +86,12 @@ void snd_hdac_bus_init_cmd_io(struct hdac_bus *bus) /* set N=1, get RIRB response interrupt for new entry */ snd_hdac_chip_writew(bus, RINTCNT, 1); /* enable rirb dma and response irq */ - snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN | AZX_RBCTL_IRQ_EN); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN); + snd_hdac_chip_readb(bus, RIRBCTL); + } + else + snd_hdac_chip_writeb(bus, RIRBCTL, AZX_RBCTL_DMA_EN | AZX_RBCTL_IRQ_EN); /* Accept unsolicited responses */ snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, AZX_GCTL_UNSOL); spin_unlock_irq(&bus->reg_lock); @@ -132,6 +144,18 @@ static unsigned int azx_command_addr(u32 cmd) return addr; } +static unsigned int azx_response_addr(u32 res) +{ + unsigned int addr = res & 0xf; + + if (addr >= AZX_MAX_CODECS) { + snd_BUG(); + addr = 0; + } + + return addr; +} + /** * snd_hdac_bus_send_cmd - send a command verb via CORB * @bus: HD-audio core bus @@ -207,13 +231,8 @@ void snd_hdac_bus_update_rirb(struct hdac_bus *bus) rp = bus->rirb.rp << 1; /* an RIRB entry is 8-bytes */ res_ex = le32_to_cpu(bus->rirb.buf[rp + 1]); res = le32_to_cpu(bus->rirb.buf[rp]); - addr = res_ex & 0xf; - if (addr >= HDA_MAX_CODECS) { - dev_err(bus->dev, - "spurious response %#x:%#x, rp = %d, wp = %d", - res, res_ex, bus->rirb.rp, wp); - snd_BUG(); - } else if (res_ex & AZX_RIRB_EX_UNSOL_EV) + addr = azx_response_addr(res_ex); + if (res_ex & AZX_RIRB_EX_UNSOL_EV) snd_hdac_bus_queue_event(bus, res, res_ex); else if (bus->rirb.cmds[addr]) { bus->rirb.res[addr] = res; @@ -485,16 +504,24 @@ static void azx_int_disable(struct hdac_bus *bus) static void azx_int_clear(struct hdac_bus *bus) { struct hdac_stream *azx_dev; + struct azx *chip = bus_to_azx(bus); /* clear stream status */ - list_for_each_entry(azx_dev, &bus->stream_list, list) - snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); + list_for_each_entry(azx_dev, &bus->stream_list, list) { + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_stream_updateb(azx_dev, SD_STS, 0, 0); + else + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); + } /* clear STATESTS */ snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); /* clear rirb status */ - snd_hdac_chip_writeb(bus, RIRBSTS, RIRB_INT_MASK); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_chip_updateb(bus, RIRBSTS, ~RIRB_INT_MASK, 0); + else + snd_hdac_chip_writeb(bus, RIRBSTS, RIRB_INT_MASK); /* clear int status */ snd_hdac_chip_writel(bus, INTSTS, AZX_INT_CTRL_EN | AZX_INT_ALL_STREAM); @@ -575,11 +602,17 @@ int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status, struct hdac_stream *azx_dev; u8 sd_status; int handled = 0; + struct azx *chip = bus_to_azx(bus); list_for_each_entry(azx_dev, &bus->stream_list, list) { if (status & azx_dev->sd_int_sta_mask) { sd_status = snd_hdac_stream_readb(azx_dev, SD_STS); - snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + snd_hdac_stream_writeb(azx_dev, SD_STS, sd_status); + snd_hdac_stream_readb(azx_dev, SD_STS); + } + else + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); handled |= 1 << azx_dev->index; if (!azx_dev->substream || !azx_dev->running || !(sd_status & SD_INT_COMPLETE)) diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c index bfd858577676..5315a3eb114e 100644 --- a/sound/hda/hdac_device.c +++ b/sound/hda/hdac_device.c @@ -645,6 +645,7 @@ struct hda_vendor_id { }; static const struct hda_vendor_id hda_vendor_ids[] = { + { 0x0014, "Loongson" }, { 0x1002, "ATI" }, { 0x1013, "Cirrus Logic" }, { 0x1057, "Motorola" }, diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index 1e0f61affd97..4c32aaeceb80 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -12,6 +12,7 @@ #include #include #include "trace.h" +#include "../pci/hda/hda_controller.h" /** * snd_hdac_get_stream_stripe_ctl - get stripe control value @@ -83,12 +84,13 @@ EXPORT_SYMBOL_GPL(snd_hdac_stream_init); void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start) { struct hdac_bus *bus = azx_dev->bus; + struct azx *chip = bus_to_azx(bus); int stripe_ctl; trace_snd_hdac_stream_start(bus, azx_dev); azx_dev->start_wallclk = snd_hdac_chip_readl(bus, WALLCLK); - if (!fresh_start) + if (!fresh_start && !(chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND)) azx_dev->start_wallclk -= azx_dev->period_wallclk; /* enable SIE */ @@ -101,11 +103,19 @@ void snd_hdac_stream_start(struct hdac_stream *azx_dev, bool fresh_start) stripe_ctl = snd_hdac_get_stream_stripe_ctl(bus, azx_dev->substream); else stripe_ctl = 0; - snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_stream_updatel(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, + stripe_ctl); + else + snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, stripe_ctl); } /* set DMA start and interrupt mask */ - snd_hdac_stream_updateb(azx_dev, SD_CTL, + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + snd_hdac_stream_updatel(azx_dev, SD_CTL, + 0, SD_CTL_DMA_START | SD_INT_MASK); + else + snd_hdac_stream_updateb(azx_dev, SD_CTL, 0, SD_CTL_DMA_START | SD_INT_MASK); azx_dev->running = true; } @@ -117,11 +127,31 @@ EXPORT_SYMBOL_GPL(snd_hdac_stream_start); */ void snd_hdac_stream_clear(struct hdac_stream *azx_dev) { - snd_hdac_stream_updateb(azx_dev, SD_CTL, - SD_CTL_DMA_START | SD_INT_MASK, 0); - snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */ - if (azx_dev->stripe) - snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0); + int stream; + struct azx *chip = bus_to_azx(azx_dev->bus); + struct snd_pcm_substream *substream = azx_dev->substream; + + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + snd_hdac_stream_updatel(azx_dev, SD_CTL, + SD_CTL_DMA_START | SD_INT_MASK, 0); + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */ + if (azx_dev->stripe) + snd_hdac_stream_updatel(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0); + + if (!substream) + stream_to_azx_dev(azx_dev)->fix_prvpos = 0; + else { + stream = substream->stream; + stream_to_azx_dev(azx_dev)->fix_prvpos = + chip->get_position[stream](chip, stream_to_azx_dev(azx_dev)); + } + } else { + snd_hdac_stream_updateb(azx_dev, SD_CTL, + SD_CTL_DMA_START | SD_INT_MASK, 0); + snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */ + if (azx_dev->stripe) + snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0); + } azx_dev->running = false; } EXPORT_SYMBOL_GPL(snd_hdac_stream_clear); @@ -178,6 +208,10 @@ void snd_hdac_stream_reset(struct hdac_stream *azx_dev) unsigned char val; int timeout; int dma_run_state; + struct azx *chip = bus_to_azx(azx_dev->bus); + + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + goto out; snd_hdac_stream_clear(azx_dev); @@ -209,6 +243,7 @@ void snd_hdac_stream_reset(struct hdac_stream *azx_dev) break; } while (--timeout); +out: /* reset first position - may not be synced with hw at this time */ if (azx_dev->posbuf) *azx_dev->posbuf = 0; @@ -224,6 +259,7 @@ int snd_hdac_stream_setup(struct hdac_stream *azx_dev) struct hdac_bus *bus = azx_dev->bus; struct snd_pcm_runtime *runtime; unsigned int val; + struct azx *chip = bus_to_azx(bus); if (azx_dev->substream) runtime = azx_dev->substream->runtime; @@ -240,7 +276,14 @@ int snd_hdac_stream_setup(struct hdac_stream *azx_dev) snd_hdac_stream_writel(azx_dev, SD_CTL, val); /* program the length of samples in cyclic buffer */ - snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + if(azx_dev->substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize - 64); + else + snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize - 16); + } + else + snd_hdac_stream_writel(azx_dev, SD_CBL, azx_dev->bufsize); /* program the stream format */ /* this value needs to be the same as the one programmed */ @@ -446,6 +489,7 @@ int snd_hdac_stream_setup_periods(struct hdac_stream *azx_dev) __le32 *bdl; int i, ofs, periods, period_bytes; int pos_adj, pos_align; + struct azx *chip = bus_to_azx(bus); /* reset BDL address */ snd_hdac_stream_writel(azx_dev, SD_BDLPL, 0); @@ -460,6 +504,8 @@ int snd_hdac_stream_setup_periods(struct hdac_stream *azx_dev) azx_dev->frags = 0; pos_adj = bus->bdl_pos_adj; + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + pos_adj = 0; if (!azx_dev->no_period_wakeup && pos_adj > 0) { pos_align = pos_adj; pos_adj = (pos_adj * runtime->rate + 47999) / 48000; diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 90759391cbac..77ee0b088050 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -42,6 +42,17 @@ config SND_HDA_TEGRA To compile this driver as a module, choose M here: the module will be called snd-hda-tegra. +config SND_HDA_LOONGSON + tristate "Loongson HD Audio" + depends on MACH_LOONGSON64 + select SND_HDA + help + Say Y here to include support for Loongson's LS7A "High + Definition Audio" controller. + + To compile this driver as a module, choose M here: the module + will be called snd-hda-loongson. + if SND_HDA config SND_HDA_HWDEP diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile index b57432f00056..2b6d1b32409c 100644 --- a/sound/pci/hda/Makefile +++ b/sound/pci/hda/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 snd-hda-intel-objs := hda_intel.o snd-hda-tegra-objs := hda_tegra.o +snd-hda-loongson-objs := hda_loongson.o snd-hda-codec-y := hda_bind.o hda_codec.o hda_jack.o hda_auto_parser.o hda_sysfs.o snd-hda-codec-y += hda_controller.o @@ -48,3 +49,4 @@ obj-$(CONFIG_SND_HDA_CODEC_HDMI) += snd-hda-codec-hdmi.o # when built in kernel obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-intel.o obj-$(CONFIG_SND_HDA_TEGRA) += snd-hda-tegra.o +obj-$(CONFIG_SND_HDA_LOONGSON) += snd-hda-loongson.o diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index ea76395d71d3..02279958a74a 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -295,6 +295,23 @@ unsigned int azx_get_position(struct azx *chip, int stream = substream->stream; int delay = 0; + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + pos = chip->get_position[stream](chip, azx_dev); + + if (pos >= azx_dev->fix_prvpos) { + pos = pos - azx_dev->fix_prvpos; + pos %= azx_dev->core.bufsize; + } else { + if (azx_dev->fix_prvpos > azx_dev->core.bufsize) + pos = (0x100000000ULL + pos-azx_dev->fix_prvpos) + % azx_dev->core.bufsize; + else + pos = pos + azx_dev->core.bufsize - azx_dev->fix_prvpos; + } + + return pos; + } + if (chip->get_position[stream]) pos = chip->get_position[stream](chip, azx_dev); else /* use the position buffer as default */ @@ -920,6 +937,8 @@ static int azx_send_cmd(struct hdac_bus *bus, unsigned int val) if (chip->disabled) return 0; + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + udelay(500); if (chip->single_cmd) return azx_single_send_cmd(bus, val); else @@ -1076,8 +1095,9 @@ static void stream_update(struct hdac_bus *bus, struct hdac_stream *s) irqreturn_t azx_interrupt(int irq, void *dev_id) { struct azx *chip = dev_id; + struct hdac_stream *azx_dev; struct hdac_bus *bus = azx_bus(chip); - u32 status; + u32 i = 0, status = 0; bool active, handled = false; int repeat = 0; /* count for avoiding endless loop */ @@ -1093,8 +1113,21 @@ irqreturn_t azx_interrupt(int irq, void *dev_id) goto unlock; do { - status = azx_readl(chip, INTSTS); - if (status == 0 || status == 0xffffffff) + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) { + i = 0; + status = 0; + list_for_each_entry(azx_dev, &bus->stream_list, list) { + status |= (snd_hdac_stream_readb(azx_dev, SD_STS) & SD_INT_MASK) ? + (1 << i) : 0; + i++; + } + status |= (status & ~0) ? (1 << 31) : 0; + } + else + status = azx_readl(chip, INTSTS); + + if (status == 0 || + (status == 0xffffffff && !(chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND))) break; handled = true; @@ -1112,7 +1145,10 @@ irqreturn_t azx_interrupt(int irq, void *dev_id) * remain unserviced by IRQ, eventually falling back * to polling mode in azx_rirb_get_response. */ - azx_writeb(chip, RIRBSTS, RIRB_INT_MASK); + if (chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND) + azx_writeb(chip, RIRBSTS, status & RIRB_INT_MASK); + else + azx_writeb(chip, RIRBSTS, RIRB_INT_MASK); active = true; if (status & RIRB_INT_RESPONSE) { if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND) diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 324cba13c7ba..c5cb3e5fd89a 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -45,6 +45,7 @@ #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28) /* CORBRP clears itself after reset */ #define AZX_DCAPS_NO_MSI64 (1 << 29) /* Stick to 32-bit MSIs */ #define AZX_DCAPS_SEPARATE_STREAM_TAG (1 << 30) /* capture and playback use separate stream tag */ +#define AZX_DCAPS_LS2X_WORKAROUND (1 << 31) /* Loongson-2H workaround */ enum { AZX_SNOOP_TYPE_NONE, @@ -63,6 +64,9 @@ struct azx_dev { * when link position is not greater than FIFO size */ unsigned int insufficient:1; + + /* For Loongson */ + unsigned int fix_prvpos; }; #define azx_stream(dev) (&(dev)->core) diff --git a/sound/pci/hda/hda_loongson.c b/sound/pci/hda/hda_loongson.c new file mode 100644 index 000000000000..eafde6cc74ec --- /dev/null +++ b/sound/pci/hda/hda_loongson.c @@ -0,0 +1,850 @@ +/* + * + * hda_loongson.c - Implementation of primary alsa driver code base + * for Intel HD Audio. + * + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * + * Copyright (c) 2004 Takashi Iwai + * PeiSen Hou + * + * Copyright (c) 2014 Huacai Chen + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * CONTACTS: + * + * Matt Jared matt.jared@intel.com + * Andy Kopp andy.kopp@intel.com + * Dan Kogan dan.d.kogan@intel.com + * + * CHANGES: + * + * 2004.12.01 Major rewrite by tiwai, merged the work of pshou + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "hda_controller.h" + +#define LS7A_NUM_CAPTURE 4 +#define LS7A_NUM_PLAYBACK 4 + +static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; +static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; +static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; +static char *model[SNDRV_CARDS]; +static int bdl_pos_adj[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = -1}; +static int probe_only[SNDRV_CARDS]; +static int jackpoll_ms[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = 1200}; +static bool single_cmd; +static int enable_msi = -1; +#ifdef CONFIG_SND_HDA_INPUT_BEEP +static bool beep_mode[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS-1)] = + CONFIG_SND_HDA_INPUT_BEEP_MODE}; +#endif + +module_param_array(index, int, NULL, 0444); +MODULE_PARM_DESC(index, "Index value for Intel HD audio interface."); +module_param_array(id, charp, NULL, 0444); +MODULE_PARM_DESC(id, "ID string for Intel HD audio interface."); +module_param_array(enable, bool, NULL, 0444); +MODULE_PARM_DESC(enable, "Enable Intel HD audio interface."); +module_param_array(model, charp, NULL, 0444); +MODULE_PARM_DESC(model, "Use the given board model."); +module_param_array(bdl_pos_adj, int, NULL, 0644); +MODULE_PARM_DESC(bdl_pos_adj, "BDL position adjustment offset."); +module_param_array(probe_only, int, NULL, 0444); +MODULE_PARM_DESC(probe_only, "Only probing and no codec initialization."); +module_param_array(jackpoll_ms, int, NULL, 0444); +MODULE_PARM_DESC(jackpoll_ms, "Ms between polling for jack events (default = 0, using unsol events only)"); +module_param(single_cmd, bool, 0444); +MODULE_PARM_DESC(single_cmd, "Use single command to communicate with codecs " + "(for debugging only)."); +module_param(enable_msi, bint, 0444); +MODULE_PARM_DESC(enable_msi, "Enable Message Signaled Interrupt (MSI)"); +#ifdef CONFIG_SND_HDA_INPUT_BEEP +module_param_array(beep_mode, bool, NULL, 0444); +MODULE_PARM_DESC(beep_mode, "Select HDA Beep registration mode " + "(0=off, 1=on) (default=1)."); +#endif + +#ifdef CONFIG_PM +static int param_set_xint(const char *val, const struct kernel_param *kp); +static struct kernel_param_ops param_ops_xint = { + .set = param_set_xint, + .get = param_get_int, +}; +#define param_check_xint param_check_int + +static int power_save = CONFIG_SND_HDA_POWER_SAVE_DEFAULT; +module_param(power_save, xint, 0644); +MODULE_PARM_DESC(power_save, "Automatic power-saving timeout " + "(in second, 0 = disable)."); + +/* reset the HD-audio controller in power save mode. + * this may give more power-saving, but will take longer time to + * wake up. + */ +static bool power_save_controller = 1; +module_param(power_save_controller, bool, 0644); +MODULE_PARM_DESC(power_save_controller, "Reset controller in power save mode."); +#else +#define power_save 0 +#endif /* CONFIG_PM */ + +static int align_buffer_size = -1; +module_param(align_buffer_size, bint, 0644); +MODULE_PARM_DESC(align_buffer_size, + "Force buffer and period sizes to be multiple of 128 bytes."); + +MODULE_LICENSE("GPL"); +MODULE_SUPPORTED_DEVICE("{Loongson, LS7A}"); +MODULE_DESCRIPTION("Loongson LS7A HDA driver"); + +/* driver types */ +enum { + AZX_DRIVER_LS7A, + AZX_DRIVER_HDMI, + AZX_NUM_DRIVERS, /* keep this as last entry */ +}; + +static char *driver_short_names[] = { + [AZX_DRIVER_LS7A] = "HD-Audio Loongson", + [AZX_DRIVER_HDMI] = "HD-Audio Loongson HDMI", +}; + +struct hda_loongson { + struct azx chip; + + /* for pending irqs */ + struct work_struct irq_pending_work; + + /* sync probing */ + struct completion probe_wait; + struct work_struct probe_work; + + /* card list (for power_save trigger) */ + struct list_head list; + + /* extra flags */ + unsigned int irq_pending_warned:1; + unsigned int probe_continued:1; +}; + +static int azx_acquire_irq(struct azx *chip, int do_disconnect); + +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev); + +/* called from IRQ */ +static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + int ok; + + ok = azx_position_ok(chip, azx_dev); + if (ok == 1) { + azx_dev->irq_pending = 0; + return ok; + } else if (ok == 0) { + /* bogus IRQ, process it later */ + azx_dev->irq_pending = 1; + schedule_work(&hda->irq_pending_work); + } + return 0; +} + +/* + * Check whether the current DMA position is acceptable for updating + * periods. Returns non-zero if it's OK. + * + * Many HD-audio controllers appear pretty inaccurate about + * the update-IRQ timing. The IRQ is issued before actually the + * data is processed. So, we need to process it afterwords in a + * workqueue. + */ +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) +{ + return 1; /* OK, it's fine */ +} + +/* + * The work for pending PCM period updates. + */ +static void azx_irq_pending_work(struct work_struct *work) +{ + struct hda_loongson *hda = container_of(work, struct hda_loongson, irq_pending_work); + struct azx *chip = &hda->chip; + struct hdac_bus *bus = azx_bus(chip); + struct hdac_stream *s; + int pending, ok; + + if (!hda->irq_pending_warned) { + dev_info(chip->card->dev, + "IRQ timing workaround is activated for card #%d. Suggest a bigger bdl_pos_adj.\n", + chip->card->number); + hda->irq_pending_warned = 1; + } + + for (;;) { + pending = 0; + spin_lock_irq(&bus->reg_lock); + list_for_each_entry(s, &bus->stream_list, list) { + struct azx_dev *azx_dev = stream_to_azx_dev(s); + if (!azx_dev->irq_pending || + !s->substream || + !s->running) + continue; + ok = azx_position_ok(chip, azx_dev); + if (ok > 0) { + azx_dev->irq_pending = 0; + spin_unlock(&bus->reg_lock); + snd_pcm_period_elapsed(s->substream); + spin_lock(&bus->reg_lock); + } else if (ok < 0) { + pending = 0; /* too early */ + } else + pending++; + } + spin_unlock_irq(&bus->reg_lock); + if (!pending) + return; + msleep(1); + } +} + +/* clear irq_pending flags and assure no on-going workq */ +static void azx_clear_irq_pending(struct azx *chip) +{ + struct hdac_bus *bus = azx_bus(chip); + struct hdac_stream *s; + + spin_lock_irq(&bus->reg_lock); + list_for_each_entry(s, &bus->stream_list, list) { + struct azx_dev *azx_dev = stream_to_azx_dev(s); + azx_dev->irq_pending = 0; + } + spin_unlock_irq(&bus->reg_lock); +} + +static int azx_acquire_irq(struct azx *chip, int do_disconnect) +{ + struct hdac_bus *bus = azx_bus(chip); + int irq; + + if (chip->pci) + irq = chip->pci->irq; + else + irq = platform_get_irq(to_platform_device(chip->card->dev), 0); + + if (request_irq(irq, azx_interrupt, chip->msi ? 0 : IRQF_SHARED, + KBUILD_MODNAME, chip)) { + dev_err(chip->card->dev, + "unable to grab IRQ %d, disabling device\n", irq); + if (do_disconnect) + snd_card_disconnect(chip->card); + return -1; + } + bus->irq = irq; + return 0; +} + +#ifdef CONFIG_PM +static DEFINE_MUTEX(card_list_lock); +static LIST_HEAD(card_list); + +static void azx_add_card_list(struct azx *chip) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + mutex_lock(&card_list_lock); + list_add(&hda->list, &card_list); + mutex_unlock(&card_list_lock); +} + +static void azx_del_card_list(struct azx *chip) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + mutex_lock(&card_list_lock); + list_del_init(&hda->list); + mutex_unlock(&card_list_lock); +} + +/* trigger power-save check at writing parameter */ +static int param_set_xint(const char *val, const struct kernel_param *kp) +{ + struct hda_loongson *hda; + struct azx *chip; + int prev = power_save; + int ret = param_set_int(val, kp); + + if (ret || prev == power_save) + return ret; + + mutex_lock(&card_list_lock); + list_for_each_entry(hda, &card_list, list) { + chip = &hda->chip; + if (!hda->probe_continued || chip->disabled) + continue; + snd_hda_set_power_save(&chip->bus, power_save * 1000); + } + mutex_unlock(&card_list_lock); + return 0; +} +#else +#define azx_add_card_list(chip) /* NOP */ +#define azx_del_card_list(chip) /* NOP */ +#endif /* CONFIG_PM */ + +#if defined(CONFIG_PM_SLEEP) +/* + * power management + */ +static int azx_suspend(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + struct hdac_bus *bus; + + if (chip->disabled) + return 0; + + bus = azx_bus(chip); + snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); + azx_clear_irq_pending(chip); + azx_stop_chip(chip); + azx_enter_link_reset(chip); + if (bus->irq >= 0) { + free_irq(bus->irq, chip); + bus->irq = -1; + } + return 0; +} + +static int azx_resume(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + + if (chip->disabled) + return 0; + + chip->msi = 0; + if (azx_acquire_irq(chip, 1) < 0) + return -EIO; + + azx_init_chip(chip, true); + + snd_power_change_state(card, SNDRV_CTL_POWER_D0); + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int azx_runtime_suspend(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + + if (chip->disabled) + return 0; + + if (!azx_has_pm_runtime(chip)) + return 0; + + /* enable controller wake up event */ + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) | + STATESTS_INT_MASK); + + azx_stop_chip(chip); + azx_enter_link_reset(chip); + azx_clear_irq_pending(chip); + return 0; +} + +static int azx_runtime_resume(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + struct hda_codec *codec; + int status; + + if (chip->disabled) + return 0; + + if (!azx_has_pm_runtime(chip)) + return 0; + + /* Read STATESTS before controller reset */ + status = azx_readw(chip, STATESTS); + + azx_init_chip(chip, true); + + if (status) { + list_for_each_codec(codec, &chip->bus) + if (status & (1 << codec->addr)) + schedule_delayed_work(&codec->jackpoll_work, + codec->jackpoll_interval); + } + + /* disable controller Wake Up event*/ + azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) & + ~STATESTS_INT_MASK); + + return 0; +} + +static int azx_runtime_idle(struct device *dev) +{ + struct snd_card *card = dev_get_drvdata(dev); + struct azx *chip = card->private_data; + + if (chip->disabled) + return 0; + + if (!power_save_controller || !azx_has_pm_runtime(chip) || + azx_bus(chip)->codec_powered) + return -EBUSY; + + return 0; +} + +#endif /* CONFIG_PM */ + +#ifdef CONFIG_PM +static const struct dev_pm_ops azx_pm = { + SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume) + SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle) +}; + +#define AZX_PM_OPS &azx_pm +#else +#define AZX_PM_OPS NULL +#endif /* CONFIG_PM */ + +static int azx_probe_continue(struct azx *chip); + +/* + * destructor + */ +static int azx_free(struct azx *chip) +{ + struct device *snddev = chip->card->dev; + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + struct hdac_bus *bus = azx_bus(chip); + + if (azx_has_pm_runtime(chip) && chip->running) + pm_runtime_get_noresume(snddev); + + azx_del_card_list(chip); + + complete_all(&hda->probe_wait); + + if (bus->chip_init) { + azx_clear_irq_pending(chip); + azx_stop_all_streams(chip); + azx_stop_chip(chip); + } + + if (bus->irq >= 0) + free_irq(bus->irq, (void*)chip); + if (bus->remap_addr) + iounmap(bus->remap_addr); + + azx_free_stream_pages(chip); + azx_free_streams(chip); + snd_hdac_bus_exit(bus); + kfree(hda); + + return 0; +} + +static int azx_dev_disconnect(struct snd_device *device) +{ + struct azx *chip = device->device_data; + + chip->bus.shutdown = 1; + return 0; +} + +static int azx_dev_free(struct snd_device *device) +{ + return azx_free(device->device_data); +} + +/* + * constructor + */ +static const struct hda_controller_ops loongson_hda_ops; + +static int azx_create(struct snd_card *card, struct pci_dev *pcidev, + struct platform_device *platdev, int dev, + unsigned int driver_caps, struct azx **rchip) +{ + static struct snd_device_ops ops = { + .dev_disconnect = azx_dev_disconnect, + .dev_free = azx_dev_free, + }; + struct hda_loongson *hda; + struct azx *chip; + int err; + + *rchip = NULL; + + hda = kzalloc(sizeof(*hda), GFP_KERNEL); + if (!hda) { + dev_err(card->dev, "Cannot allocate hda\n"); + return -ENOMEM; + } + + chip = &hda->chip; + mutex_init(&chip->open_mutex); + chip->card = card; + chip->pci = pcidev; + chip->ops = &loongson_hda_ops; + chip->driver_caps = driver_caps; + chip->driver_type = driver_caps & 0xff; + chip->dev_index = dev; + if (jackpoll_ms[dev] >= 50 && jackpoll_ms[dev] <= 60000) + chip->jackpoll_interval = msecs_to_jiffies(jackpoll_ms[dev]); + INIT_LIST_HEAD(&chip->pcm_list); + INIT_WORK(&hda->irq_pending_work, azx_irq_pending_work); + INIT_LIST_HEAD(&hda->list); + init_completion(&hda->probe_wait); + + chip->get_position[0] = chip->get_position[1] = azx_get_pos_lpib; + + chip->snoop = false; + chip->single_cmd = single_cmd; + azx_bus(chip)->codec_mask = chip->codec_probe_mask = 0xf; + + if (bdl_pos_adj[dev] < 0) { + switch (chip->driver_type) { + case AZX_DRIVER_LS7A: + bdl_pos_adj[dev] = 1; + break; + default: + bdl_pos_adj[dev] = 32; + break; + } + } + chip->bdl_pos_adj = bdl_pos_adj[dev]; + + err = azx_bus_init(chip, model[dev]); + if (err < 0) { + kfree(hda); + return err; + } + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) { + dev_err(card->dev, "Error creating device [card]!\n"); + azx_free(chip); + return err; + } + + *rchip = chip; + + return 0; +} + +static int azx_first_init(struct azx *chip) +{ + int dev = chip->dev_index; + struct snd_card *card = chip->card; + struct hdac_bus *bus = azx_bus(chip); + int err; + unsigned short gcap; + + bus->addr = pci_resource_start(chip->pci, 0); + bus->remap_addr = ioremap(bus->addr, + pci_resource_end(chip->pci, 0) - pci_resource_start(chip->pci, 0) + 1); + if (bus->remap_addr == NULL) { + dev_err(card->dev, "ioremap error\n"); + return -ENXIO; + } + + chip->msi = 0; + + if (azx_acquire_irq(chip, 0) < 0) + return -EBUSY; + + synchronize_irq(bus->irq); + + gcap = azx_readw(chip, GCAP); + dev_dbg(card->dev, "chipset global capabilities = 0x%x\n", gcap); + + /* disable 64bit DMA address on some devices */ + if (chip->driver_caps & AZX_DCAPS_NO_64BIT) { + dev_dbg(card->dev, "Disabling 64bit DMA\n"); + gcap &= ~AZX_GCAP_64OK; + } + + /* disable buffer size rounding to 128-byte multiples if supported */ + if (align_buffer_size >= 0) + chip->align_buffer_size = !!align_buffer_size; + else { + if (chip->driver_caps & AZX_DCAPS_NO_ALIGN_BUFSIZE) + chip->align_buffer_size = 0; + else + chip->align_buffer_size = 1; + } + + /* allow 64bit DMA address if supported by H/W */ + if ((gcap & AZX_GCAP_64OK) && !dma_set_mask(chip->card->dev, DMA_BIT_MASK(64))) + dma_set_coherent_mask(chip->card->dev, DMA_BIT_MASK(64)); + else { + dma_set_mask(chip->card->dev, DMA_BIT_MASK(32)); + dma_set_coherent_mask(chip->card->dev, DMA_BIT_MASK(32)); + } + + /* read number of streams from GCAP register instead of using + * hardcoded value + */ + chip->capture_streams = (gcap >> 8) & 0x0f; + chip->playback_streams = (gcap >> 12) & 0x0f; + if (!chip->playback_streams && !chip->capture_streams) { + /* gcap didn't give any info, switching to old method */ + chip->capture_streams = LS7A_NUM_CAPTURE; + chip->playback_streams = LS7A_NUM_PLAYBACK; + } + chip->capture_index_offset = 0; + chip->playback_index_offset = chip->capture_streams; + chip->num_streams = chip->playback_streams + chip->capture_streams; + + /* initialize streams */ + err = azx_init_streams(chip); + if (err < 0) + return err; + chip->playback_streams = chip->capture_streams = 1; /* Loongson */ + + err = azx_alloc_stream_pages(chip); + if (err < 0) + return err; + + /* initialize chip */ + azx_init_chip(chip, (probe_only[dev] & 2) == 0); + + /* codec detection */ + if (!azx_bus(chip)->codec_mask) { + dev_err(card->dev, "no codecs found!\n"); + return -ENODEV; + } + + strcpy(card->driver, "HDA-Loongson"); + strlcpy(card->shortname, driver_short_names[chip->driver_type], + sizeof(card->shortname)); + snprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx irq %i", + card->shortname, bus->addr, bus->irq); + + return 0; +} + +static const struct hda_controller_ops loongson_hda_ops = { + .position_check = azx_position_check, +}; + +/* number of codec slots for each chipset: 0 = default slots (i.e. 4) */ +static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {}; + +static int azx_probe_continue(struct azx *chip) +{ + struct hda_loongson *hda = container_of(chip, struct hda_loongson, chip); + int dev = chip->dev_index; + int err; + struct device *snddev = chip->card->dev; + + hda->probe_continued = 1; + + err = azx_first_init(chip); + if (err < 0) + goto out_free; + +#ifdef CONFIG_SND_HDA_INPUT_BEEP + chip->beep_mode = beep_mode[dev]; +#endif + + /* create codec instances */ + err = azx_probe_codecs(chip, azx_max_codecs[chip->driver_type]); + if (err < 0) + goto out_free; + + if ((probe_only[dev] & 1) == 0) { + err = azx_codec_configure(chip); + if (err < 0) + goto out_free; + } + + err = snd_card_register(chip->card); + if (err < 0) + goto out_free; + + chip->running = 1; + azx_add_card_list(chip); +#ifdef CONFIG_PM + pm_runtime_forbid(snddev); + pm_runtime_set_active(snddev); +#endif + snd_hda_set_power_save(&chip->bus, power_save * 1000); + if (azx_has_pm_runtime(chip)) + pm_runtime_put_noidle(snddev); + +out_free: + complete_all(&hda->probe_wait); + return err; +} + +static const struct pci_device_id azx_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_HDA), + .driver_data = AZX_DRIVER_LS7A | AZX_DCAPS_LS2X_WORKAROUND}, + {PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_HDMI), + .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_LS2X_WORKAROUND}, + {} +}; + +MODULE_DEVICE_TABLE(pci, azx_ids); + +static int azx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid) +{ + int ret; + bool probe_now; + static int dev; + struct snd_card *card; + struct azx *chip; + struct hda_loongson *hda; + + /* Enable device in PCI config */ + ret = pci_enable_device(pdev); + if (ret < 0) { + printk(KERN_ERR "Loongson HDA (%s): Cannot enable PCI device\n", + pci_name(pdev)); + goto out; + } + + /* request the mem regions */ + ret = pci_request_region(pdev, 0, "Loongson HDA"); + if (ret < 0) { + printk( KERN_ERR "Loongson HDA (%s): cannot request region 0.\n", + pci_name(pdev)); + goto out; + } + + if (dev >= SNDRV_CARDS) + return -ENODEV; + if (!enable[dev]) { + dev++; + return -ENOENT; + } + + ret = snd_card_new(&pdev->dev, index[dev], id[dev], THIS_MODULE, + 0, &card); + if (ret < 0) { + dev_err(&pdev->dev, "Error creating card!\n"); + return ret; + } + + ret = azx_create(card, pdev, NULL, dev, pid->driver_data, &chip); + if (ret < 0) + goto out_free; + card->private_data = chip; + hda = container_of(chip, struct hda_loongson, chip); + + dev_set_drvdata(&pdev->dev, card); + + probe_now = !chip->disabled; + + if (probe_now) { + ret = azx_probe_continue(chip); + if (ret < 0) + goto out_free; + } + + dev++; + if (chip->disabled) + complete_all(&hda->probe_wait); + return 0; + +out_free: + snd_card_free(card); +out: + return ret; +} + +static void azx_pci_remove(struct pci_dev *pdev) +{ + snd_card_free(dev_get_drvdata(&pdev->dev)); +} + +static void azx_pci_shutdown(struct pci_dev *pdev) +{ + struct snd_card *card = dev_get_drvdata(&pdev->dev); + struct azx *chip; + + if (!card) + return; + chip = card->private_data; + if (chip && chip->running) + azx_stop_chip(chip); +} + +/* pci_driver definition */ +static struct pci_driver azx_pci_driver = { + .name = "loongson-audio", + .id_table = azx_ids, + .probe = azx_pci_probe, + .remove = azx_pci_remove, + .shutdown = azx_pci_shutdown, + .driver.pm = AZX_PM_OPS, +}; + +static int __init alsa_card_azx_init(void) +{ + int ret; + + ret = pci_register_driver(&azx_pci_driver); + if (ret) + pr_err("hda azx pci driver register\n"); + + return ret; +} + +static void __exit alsa_card_azx_exit(void) +{ + pci_unregister_driver(&azx_pci_driver); +} + +module_init(alsa_card_azx_init) +module_exit(alsa_card_azx_exit) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c19afe486194..5ff7e9302415 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4307,6 +4307,7 @@ static int patch_gf_hdmi(struct hda_codec *codec) * patch entries */ static const struct hda_device_id snd_hda_id_hdmi[] = { +HDA_CODEC_ENTRY(0x00147a47, "Loongson HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x1002793c, "RS600 HDMI", patch_atihdmi), HDA_CODEC_ENTRY(0x10027919, "RS600 HDMI", patch_atihdmi), HDA_CODEC_ENTRY(0x1002791a, "RS690/780 HDMI", patch_atihdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index adfab80b8189..a1a8441cc4bd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -25,6 +25,7 @@ #include "hda_auto_parser.h" #include "hda_jack.h" #include "hda_generic.h" +#include "hda_controller.h" /* keep halting ALC5505 DSP, for power saving */ #define HALT_REALTEK_ALC5505 @@ -344,6 +345,13 @@ static void alc_fixup_micmute_led(struct hda_codec *codec, snd_hda_gen_add_micmute_led_cdev(codec, NULL); } +int has_loongson_workaround(struct hda_codec *codec) +{ + struct azx *chip = bus_to_azx(&codec->bus->core); + + return chip->driver_caps & AZX_DCAPS_LS2X_WORKAROUND; +} + /* * Fix hardware PLL issue * On some codecs, the analog PLL gating control must be off while @@ -685,10 +693,10 @@ static int alc_auto_parse_customize_define(struct hda_codec *codec) goto do_sku; } - if (!codec->bus->pci) + if (!codec->bus->pci && !has_loongson_workaround(codec)) return -1; ass = codec->core.subsystem_id & 0xffff; - if (ass != codec->bus->pci->subsystem_device && (ass & 1)) + if (codec->bus->pci && ass != codec->bus->pci->subsystem_device && (ass & 1)) goto do_sku; nid = 0x1d; -- Gitee From bbba9de0b9f28c350d398771c5b4a681a29b2134 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 22 Nov 2021 11:13:42 +0800 Subject: [PATCH 233/241] stmmac: pci: Add dwmac support for Loongson Change-Id: Iba6a8d0555d757861a4808032befc1b9c42f9948 Signed-off-by: Qing Zhang Signed-off-by: Huacai Chen --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 9 + drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../ethernet/stmicro/stmmac/dwmac-loongson.c | 222 ++++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 53f14c5a9e02..13eda0c37f7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -230,6 +230,15 @@ config DWMAC_INTEL This selects the Intel platform specific bus support for the stmmac driver. This driver is used for Intel Quark/EHL/TGL. +config DWMAC_LOONGSON + tristate "Loongson PCI DWMAC support" + default MACH_LOONGSON64 + depends on STMMAC_ETH && PCI + depends on COMMON_CLK + help + This selects the LOONGSON PCI bus support for the stmmac driver, + Support for ethernet controller on Loongson-2K1000 SoC and LS7A1000 bridge. + config STMMAC_PCI tristate "STMMAC PCI bus support" depends on STMMAC_ETH && PCI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 24e6145d4eae..11ea4569c43d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -34,4 +34,5 @@ dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o +obj-$(CONFIG_DWMAC_LOONGSON) += dwmac-loongson.o stmmac-pci-objs:= stmmac_pci.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c new file mode 100644 index 000000000000..ecf759ee1c9f --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Loongson Corporation + */ + +#include +#include +#include +#include +#include +#include "stmmac.h" + +static int loongson_default_data(struct plat_stmmacenet_data *plat) +{ + plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->has_gmac = 1; + plat->force_sf_dma_mode = 1; + + /* Set default value for multicast hash bins */ + plat->multicast_filter_bins = HASH_TABLE_SIZE; + + /* Set default value for unicast filter entries */ + plat->unicast_filter_entries = 1; + + /* Set the maxmtu to a default of JUMBO_LEN */ + plat->maxmtu = JUMBO_LEN; + + /* Set default number of RX and TX queues to use */ + plat->tx_queues_to_use = 1; + plat->rx_queues_to_use = 1; + + /* Disable Priority config by default */ + plat->tx_queues_cfg[0].use_prio = false; + plat->rx_queues_cfg[0].use_prio = false; + + /* Disable RX queues routing by default */ + plat->rx_queues_cfg[0].pkt_route = 0x0; + + /* Default to phy auto-detection */ + plat->phy_addr = -1; + + plat->dma_cfg->pbl = 32; + plat->dma_cfg->pblx8 = true; + + plat->multicast_filter_bins = 256; + return 0; +} + +static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct plat_stmmacenet_data *plat; + struct stmmac_resources res; + struct device_node *np; + int ret, i, phy_mode; + bool mdio = false; + + np = dev_of_node(&pdev->dev); + + if (!np) { + pr_info("dwmac_loongson_pci: No OF node\n"); + return -ENODEV; + } + + if (!of_device_is_compatible(np, "loongson, pci-gmac")) { + pr_info("dwmac_loongson_pci: Incompatible OF node\n"); + return -ENODEV; + } + + plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL); + if (!plat) + return -ENOMEM; + + if (plat->mdio_node) { + dev_err(&pdev->dev, "Found MDIO subnode\n"); + mdio = true; + } + + if (mdio) { + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + plat->mdio_bus_data->needs_reset = true; + } + + plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); + if (!plat->dma_cfg) + return -ENOMEM; + + /* Enable pci device */ + ret = pci_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); + return ret; + } + + /* Get the base address of device */ + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (ret) + return ret; + break; + } + + plat->bus_id = of_alias_get_id(np, "ethernet"); + if (plat->bus_id < 0) + plat->bus_id = pci_dev_id(pdev); + + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) { + dev_err(&pdev->dev, "phy_mode not found\n"); + return phy_mode; + } + + plat->phy_interface = phy_mode; + plat->interface = PHY_INTERFACE_MODE_GMII; + + pci_set_master(pdev); + + loongson_default_data(plat); + pci_enable_msi(pdev); + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[0]; + + res.irq = of_irq_get_byname(np, "macirq"); + if (res.irq < 0) { + dev_err(&pdev->dev, "IRQ macirq not found\n"); + ret = -ENODEV; + } + + res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); + if (res.wol_irq < 0) { + dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n"); + res.wol_irq = res.irq; + } + + res.lpi_irq = of_irq_get_byname(np, "eth_lpi"); + if (res.lpi_irq < 0) { + dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); + ret = -ENODEV; + } + + return stmmac_dvr_probe(&pdev->dev, plat, &res); +} + +static void loongson_dwmac_remove(struct pci_dev *pdev) +{ + int i; + + stmmac_dvr_remove(&pdev->dev); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (pci_resource_len(pdev, i) == 0) + continue; + pcim_iounmap_regions(pdev, BIT(i)); + break; + } + + pci_disable_device(pdev); +} + +static int __maybe_unused loongson_dwmac_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = stmmac_suspend(dev); + if (ret) + return ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int __maybe_unused loongson_dwmac_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return stmmac_resume(dev); +} + +static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, + loongson_dwmac_resume); + +static const struct pci_device_id loongson_dwmac_id_table[] = { + { PCI_VDEVICE(LOONGSON, 0x7a03) }, + {} +}; +MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); + +struct pci_driver loongson_dwmac_driver = { + .name = "dwmac-loongson-pci", + .id_table = loongson_dwmac_id_table, + .probe = loongson_dwmac_probe, + .remove = loongson_dwmac_remove, + .driver = { + .pm = &loongson_dwmac_pm_ops, + }, +}; + +module_pci_driver(loongson_dwmac_driver); + +MODULE_DESCRIPTION("Loongson DWMAC PCI driver"); +MODULE_AUTHOR("Qing Zhang "); +MODULE_LICENSE("GPL v2"); -- Gitee From 43ff7ac530ac66c7d5437f176d5b359b4a787165 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 14 Jan 2022 15:14:30 +0800 Subject: [PATCH 234/241] net: stmmac: Fix "Unbalanced pm_runtime_enable!" warning If the device is PCI based like intel-eth-pci, pm_runtime_enable() is already called by pci_pm_init(). So only pm_runtime_enable() when it's not already enabled. Change-Id: I177735933c2ed323b7f269bb2bc0128d85078774 Signed-off-by: Kai-Heng Feng Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 05de2b50a797..edef2407097d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5185,7 +5185,8 @@ int stmmac_dvr_probe(struct device *device, pm_runtime_get_noresume(device); pm_runtime_set_active(device); - pm_runtime_enable(device); + if (!pm_runtime_enabled(device)) + pm_runtime_enable(device); if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) { -- Gitee From cf1ab99de1c5b0f575b73c9f3ef55c4a28bfedf9 Mon Sep 17 00:00:00 2001 From: Feiyang Chen Date: Tue, 12 Jul 2022 09:16:18 +0800 Subject: [PATCH 235/241] stmmac: pci: Add LS7A support for dwmac-loongson Current dwmac-loongson only support LS2K in the "probed with PCI and configured with DT" manner. We add LS7A support on which the devices are fully PCI (non-DT). Change-Id: I12af537f147bf9cff60873584d41423922bd60cd Signed-off-by: Huacai Chen Signed-off-by: Feiyang Chen --- .../ethernet/stmicro/stmmac/dwmac-loongson.c | 175 ++++++++++++------ .../net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 2 files changed, 123 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index ecf759ee1c9f..f3f3682eae78 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -9,14 +9,22 @@ #include #include "stmmac.h" -static int loongson_default_data(struct plat_stmmacenet_data *plat) +struct stmmac_pci_info { + int (*setup)(struct pci_dev *pdev, struct plat_stmmacenet_data *plat); +}; + +static void common_default_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) { + plat->bus_id = (pci_domain_nr(pdev->bus) << 16) | PCI_DEVID(pdev->bus->number, pdev->devfn); + plat->interface = PHY_INTERFACE_MODE_GMII; + plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ plat->has_gmac = 1; plat->force_sf_dma_mode = 1; /* Set default value for multicast hash bins */ - plat->multicast_filter_bins = HASH_TABLE_SIZE; + plat->multicast_filter_bins = 256; /* Set default value for unicast filter entries */ plat->unicast_filter_entries = 1; @@ -35,32 +43,79 @@ static int loongson_default_data(struct plat_stmmacenet_data *plat) /* Disable RX queues routing by default */ plat->rx_queues_cfg[0].pkt_route = 0x0; - /* Default to phy auto-detection */ - plat->phy_addr = -1; - plat->dma_cfg->pbl = 32; plat->dma_cfg->pblx8 = true; - plat->multicast_filter_bins = 256; + plat->clk_ref_rate = 125000000; + plat->clk_ptp_rate = 125000000; +} + +static int loongson_gmac_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + common_default_data(pdev, plat); + + plat->mdio_bus_data->phy_mask = 0; + + plat->phy_addr = -1; + plat->phy_interface = PHY_INTERFACE_MODE_RGMII_ID; + + return 0; +} + +static struct stmmac_pci_info loongson_gmac_pci_info = { + .setup = loongson_gmac_data, +}; + +static void loongson_gnet_fix_speed(void *priv, unsigned int speed) +{ + struct net_device *ndev = (struct net_device *)(*(unsigned long *)priv); + struct stmmac_priv *ptr = netdev_priv(ndev); + + if (speed == SPEED_1000) { + if (readl(ptr->ioaddr + MAC_CTRL_REG) & (1 << 15) /* PS */) { + /* reset phy */ + phy_set_bits(ndev->phydev, 0 /*MII_BMCR*/, + 0x200 /*BMCR_ANRESTART*/); + } + } +} + +static int loongson_gnet_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + common_default_data(pdev, plat); + + plat->mdio_bus_data->phy_mask = 0xfffffffb; + + plat->phy_addr = 2; + plat->phy_interface = PHY_INTERFACE_MODE_GMII; + + /* GNET 1000M speed need workaround */ + plat->fix_mac_speed = loongson_gnet_fix_speed; + + /* Get netdev pointer address */ + plat->bsp_priv = &(pdev->dev.driver_data); + return 0; } -static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) +static struct stmmac_pci_info loongson_gnet_pci_info = { + .setup = loongson_gnet_data, +}; + +static int loongson_dwmac_probe(struct pci_dev *pdev, + const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; + struct stmmac_pci_info *info; struct stmmac_resources res; struct device_node *np; - int ret, i, phy_mode; + int ret, i, bus_id, phy_mode; bool mdio = false; np = dev_of_node(&pdev->dev); - - if (!np) { - pr_info("dwmac_loongson_pci: No OF node\n"); - return -ENODEV; - } - - if (!of_device_is_compatible(np, "loongson, pci-gmac")) { + if (np && !of_device_is_compatible(np, "loongson, pci-gmac")) { pr_info("dwmac_loongson_pci: Incompatible OF node\n"); return -ENODEV; } @@ -74,14 +129,14 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id mdio = true; } - if (mdio) { - plat->mdio_bus_data = devm_kzalloc(&pdev->dev, - sizeof(*plat->mdio_bus_data), - GFP_KERNEL); - if (!plat->mdio_bus_data) - return -ENOMEM; + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, + sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) + return -ENOMEM; + + if (mdio) plat->mdio_bus_data->needs_reset = true; - } plat->dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->dma_cfg), GFP_KERNEL); if (!plat->dma_cfg) @@ -104,42 +159,52 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id break; } - plat->bus_id = of_alias_get_id(np, "ethernet"); - if (plat->bus_id < 0) - plat->bus_id = pci_dev_id(pdev); - - phy_mode = device_get_phy_mode(&pdev->dev); - if (phy_mode < 0) { - dev_err(&pdev->dev, "phy_mode not found\n"); - return phy_mode; - } - - plat->phy_interface = phy_mode; - plat->interface = PHY_INTERFACE_MODE_GMII; - pci_set_master(pdev); - loongson_default_data(plat); - pci_enable_msi(pdev); - memset(&res, 0, sizeof(res)); - res.addr = pcim_iomap_table(pdev)[0]; + info = (struct stmmac_pci_info *)id->driver_data; + ret = info->setup(pdev, plat); + if (ret) + return ret; - res.irq = of_irq_get_byname(np, "macirq"); - if (res.irq < 0) { - dev_err(&pdev->dev, "IRQ macirq not found\n"); - ret = -ENODEV; + if (np) { + bus_id = of_alias_get_id(np, "ethernet"); + if (bus_id >= 0) + plat->bus_id = bus_id; + + phy_mode = device_get_phy_mode(&pdev->dev); + if (phy_mode < 0) { + dev_err(&pdev->dev, "phy_mode not found\n"); + return phy_mode; + } + plat->phy_interface = phy_mode; } - res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); - if (res.wol_irq < 0) { - dev_info(&pdev->dev, "IRQ eth_wake_irq not found, using macirq\n"); - res.wol_irq = res.irq; - } + pci_enable_msi(pdev); - res.lpi_irq = of_irq_get_byname(np, "eth_lpi"); - if (res.lpi_irq < 0) { - dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); - ret = -ENODEV; + memset(&res, 0, sizeof(res)); + res.addr = pcim_iomap_table(pdev)[0]; + if (np) { + res.irq = of_irq_get_byname(np, "macirq"); + if (res.irq < 0) { + dev_err(&pdev->dev, "IRQ macirq not found\n"); + ret = -ENODEV; + } + + res.wol_irq = of_irq_get_byname(np, "eth_wake_irq"); + if (res.wol_irq < 0) { + dev_info(&pdev->dev, + "IRQ eth_wake_irq not found, using macirq\n"); + res.wol_irq = res.irq; + } + + res.lpi_irq = of_irq_get_byname(np, "eth_lpi"); + if (res.lpi_irq < 0) { + dev_err(&pdev->dev, "IRQ eth_lpi not found\n"); + ret = -ENODEV; + } + } else { + res.irq = pdev->irq; + res.wol_irq = pdev->irq; } return stmmac_dvr_probe(&pdev->dev, plat, &res); @@ -199,8 +264,12 @@ static int __maybe_unused loongson_dwmac_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, loongson_dwmac_resume); +#define PCI_DEVICE_ID_LOONGSON_GMAC 0x7a03 +#define PCI_DEVICE_ID_LOONGSON_GNET 0x7a13 + static const struct pci_device_id loongson_dwmac_id_table[] = { - { PCI_VDEVICE(LOONGSON, 0x7a03) }, + { PCI_DEVICE_DATA(LOONGSON, GMAC, &loongson_gmac_pci_info) }, + { PCI_DEVICE_DATA(LOONGSON, GNET, &loongson_gnet_pci_info) }, {} }; MODULE_DEVICE_TABLE(pci, loongson_dwmac_id_table); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index edef2407097d..36952bb2f49e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -74,7 +74,7 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_TX_THRESH(x) ((x)->dma_tx_size / 4) #define STMMAC_RX_THRESH(x) ((x)->dma_rx_size / 4) -static int flow_ctrl = FLOW_AUTO; +static int flow_ctrl = FLOW_OFF; module_param(flow_ctrl, int, 0644); MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]"); -- Gitee From 809ad455259bc5479ae1dcc48f11e750150f75a6 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 10 Aug 2022 15:03:08 +0800 Subject: [PATCH 236/241] LoongArch: Update Loongson-3 default config file 1, Add more patition types; 2, Add compressed firmware support; 3, Add NVME related options; 4, Add some USB Type-C options; 5, Add some common network options; 6, Add some Bluetooth device drivers; 7, Add LoongArch CRC32(c) Acceleration; 8, Add jump label (patching mechanism for static key). 9, Add virtio drivers in order to run in qemu; 10, Remove obsolete config options (for some detailed information, see Link). Change-Id: Ieef8740a9e3fe3794676dcb6797fb406466eddfb Link: https://lore.kernel.org/kernel-janitors/20220929090645.1389-1-lukas.bulwahn@gmail.com/ Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 159 ++++++++++++++++++++- 1 file changed, 156 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 5e13d898fc10..aa3b77f59c63 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -23,6 +23,7 @@ CONFIG_BLK_CGROUP=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_RDMA=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y @@ -48,7 +49,7 @@ CONFIG_CPU_HAS_LASX=y CONFIG_NUMA=y CONFIG_EFI=y CONFIG_SMP=y -CONFIG_NR_CPUS=64 +CONFIG_NR_CPUS=256 CONFIG_HZ_250=y CONFIG_KEXEC=y CONFIG_HIBERNATION=y @@ -66,15 +67,28 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y +CONFIG_EFI_BOOTLOADER_CONTROL=m CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m +CONFIG_VHOST_NET=m +CONFIG_VHOST_SCSI=m +CONFIG_VHOST_VSOCK=m +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_DEV_THROTTLING_LOW=y +CONFIG_BLK_WBT=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_CGROUP_IOCOST=y CONFIG_PARTITION_ADVANCED=y +CONFIG_BSD_DISKLABEL=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CMDLINE_PARTITION=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y CONFIG_BINFMT_MISC=m @@ -84,6 +98,7 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_FRONTSWAP=y +CONFIG_CMA=y CONFIG_ZSWAP=y CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y CONFIG_ZPOOL=y @@ -93,8 +108,11 @@ CONFIG_ZSMALLOC=m CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y +CONFIG_TLS=m +CONFIG_TLS_DEVICE=y CONFIG_XFRM_USER=y CONFIG_NET_KEY=y +CONFIG_XDP_SOCKETS=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -106,14 +124,22 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_INET_ESP=m CONFIG_INET_UDP_DIAG=y CONFIG_TCP_CONG_ADVANCED=y CONFIG_TCP_CONG_BBR=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y +CONFIG_INET6_ESP=m CONFIG_IPV6_MROUTE=y +CONFIG_MPTCP=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_BRIDGE_NETFILTER=m @@ -123,6 +149,8 @@ CONFIG_NF_LOG_NETDEV=m CONFIG_NF_CONNTRACK_AMANDA=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_TABLES=m @@ -205,7 +233,11 @@ CONFIG_IP_VS=m CONFIG_IP_VS_IPV6=y CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m CONFIG_IP_VS_NFCT=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_DUP_IPV4=m @@ -265,10 +297,14 @@ CONFIG_BPFILTER=y CONFIG_IP_SCTP=m CONFIG_RDS=y CONFIG_L2TP=m +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_VLAN_8021Q_MVRP=y +CONFIG_LLC2=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_PRIO=m @@ -289,18 +325,44 @@ CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_BPF=m CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=y CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y +CONFIG_BPF_STREAM_PARSER=y CONFIG_BT=m +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +CONFIG_BT_BNEP_MC_FILTER=y +CONFIG_BT_BNEP_PROTO_FILTER=y +CONFIG_BT_HIDP=m +CONFIG_BT_HS=y CONFIG_BT_HCIBTUSB=m -# CONFIG_BT_HCIBTUSB_BCM is not set +CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y +CONFIG_BT_HCIBTUSB_MTK=y +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_ATH3K=y +CONFIG_BT_HCIUART_INTEL=y +CONFIG_BT_HCIUART_AG6XX=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBPA10X=m +CONFIG_BT_HCIBFUSB=m +CONFIG_BT_HCIDTL1=m +CONFIG_BT_HCIBT3C=m +CONFIG_BT_HCIBLUECARD=m +CONFIG_BT_HCIVHCI=m +CONFIG_BT_MRVL=m +CONFIG_BT_ATH3K=m CONFIG_CFG80211=m CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m CONFIG_RFKILL=m CONFIG_RFKILL_INPUT=y CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_CEPH_LIB=m CONFIG_PCIEPORTBUS=y CONFIG_HOTPLUG_PCI_PCIE=y @@ -309,6 +371,7 @@ CONFIG_PCIEAER=y CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_SHPC=y +CONFIG_PCI_HOST_GENERIC=y CONFIG_PCCARD=m CONFIG_YENTA=m CONFIG_RAPIDIO=y @@ -320,6 +383,7 @@ CONFIG_RAPIDIO_MPORT_CDEV=m CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_FW_LOADER_COMPRESS=y CONFIG_MTD=m CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=m @@ -329,6 +393,8 @@ CONFIG_MTD_CFI_AMDSTD=m CONFIG_MTD_CFI_STAA=m CONFIG_MTD_RAM=m CONFIG_MTD_ROM=m +CONFIG_MTD_UBI=m +CONFIG_MTD_UBI_BLOCK=y CONFIG_PARPORT=y CONFIG_PARPORT_PC=y CONFIG_PARPORT_SERIAL=y @@ -336,11 +402,23 @@ CONFIG_PARPORT_PC_FIFO=y CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_RBD=m CONFIG_BLK_DEV_NVME=y +CONFIG_NVME_MULTIPATH=y +CONFIG_NVME_RDMA=m +CONFIG_NVME_FC=m +CONFIG_NVME_TCP=m +CONFIG_NVME_TARGET=m +CONFIG_NVME_TARGET_PASSTHRU=y +CONFIG_NVME_TARGET_LOOP=m +CONFIG_NVME_TARGET_RDMA=m +CONFIG_NVME_TARGET_FC=m +CONFIG_NVME_TARGET_TCP=m CONFIG_EEPROM_AT24=m CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y @@ -370,6 +448,7 @@ CONFIG_SCSI_QLA_FC=m CONFIG_TCM_QLA2XXX=m CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_LPFC=m +CONFIG_SCSI_VIRTIO=m CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y @@ -414,6 +493,7 @@ CONFIG_VXLAN=y CONFIG_RIONET=m CONFIG_TUN=m CONFIG_VETH=m +CONFIG_VIRTIO_NET=m # CONFIG_NET_VENDOR_3COM is not set # CONFIG_NET_VENDOR_ADAPTEC is not set # CONFIG_NET_VENDOR_AGERE is not set @@ -479,6 +559,7 @@ CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m +CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m @@ -514,8 +595,14 @@ CONFIG_RTL8821AE=m CONFIG_RTL8192CU=m # CONFIG_RTLWIFI_DEBUG is not set CONFIG_RTL8XXXU=m +CONFIG_RTW88=m +CONFIG_RTW88_8822BE=m +CONFIG_RTW88_8822CE=m +CONFIG_RTW88_8723DE=m +CONFIG_RTW88_8821CE=m CONFIG_ZD1211RW=m CONFIG_USB_NET_RNDIS_WLAN=m +CONFIG_USB4_NET=m CONFIG_INPUT_POLLDEV=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_MOUSEDEV_PSAUX=y @@ -540,10 +627,12 @@ CONFIG_SERIAL_8250_RSA=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_NONSTANDARD=y CONFIG_PRINTER=m +CONFIG_VIRTIO_CONSOLE=y CONFIG_IPMI_HANDLER=m CONFIG_IPMI_DEVICE_INTERFACE=m CONFIG_IPMI_SI=m CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_PIIX4=y @@ -583,6 +672,8 @@ CONFIG_DRM_AMDGPU_CIK=y CONFIG_DRM_AMDGPU_USERPTR=y CONFIG_DRM_AST=y CONFIG_DRM_LOONGSON=y +CONFIG_DRM_QXL=m +CONFIG_DRM_VIRTIO_GPU=m CONFIG_FB_EFI=y CONFIG_FB_RADEON=y CONFIG_LCD_PLATFORM=m @@ -644,15 +735,29 @@ CONFIG_USB_SERIAL_FTDI_SIO=m CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_OPTION=m CONFIG_USB_GADGET=y +CONFIG_TYPEC=m +CONFIG_TYPEC_TCPM=m +CONFIG_TYPEC_TCPCI=m +CONFIG_TYPEC_UCSI=m +CONFIG_UCSI_ACPI=m CONFIG_INFINIBAND=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_EFI=y CONFIG_DMADEVICES=y +CONFIG_UDMABUF=y +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y +CONFIG_DMABUF_HEAPS_CMA=y CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m CONFIG_UIO_DMEM_GENIRQ=m CONFIG_UIO_PCI_GENERIC=m -# CONFIG_VIRTIO_MENU is not set +CONFIG_VFIO=m +CONFIG_VFIO_PCI=m +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=m +CONFIG_VIRTIO_MMIO=m CONFIG_STAGING=y CONFIG_COMEDI=m CONFIG_COMEDI_PCI_DRIVERS=m @@ -673,12 +778,20 @@ CONFIG_COMEDI_NI_PCIDIO=m CONFIG_COMEDI_NI_PCIMIO=m CONFIG_R8188EU=m # CONFIG_88EU_AP_MODE is not set +CONFIG_LOONGSON_IOMMU=y CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y CONFIG_DEVFREQ_GOV_POWERSAVE=y CONFIG_DEVFREQ_GOV_USERSPACE=y +CONFIG_NTB=m +CONFIG_NTB_MSI=y +CONFIG_NTB_IDT=m +CONFIG_NTB_SWITCHTEC=m +CONFIG_NTB_PERF=m +CONFIG_NTB_TRANSPORT=m CONFIG_PWM=y +CONFIG_USB4=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -686,10 +799,17 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y +CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y @@ -698,11 +818,14 @@ CONFIG_QFMT_V1=m CONFIG_QFMT_V2=m CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=m +CONFIG_CUSE=m +CONFIG_VIRTIO_FS=m CONFIG_OVERLAY_FS=y CONFIG_OVERLAY_FS_INDEX=y CONFIG_OVERLAY_FS_XINO_AUTO=y CONFIG_OVERLAY_FS_METACOPY=y CONFIG_FSCACHE=y +CONFIG_CACHEFILES=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -711,19 +834,39 @@ CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_FAT_DEFAULT_CODEPAGE=936 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312" +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_CONFIGFS_FS=y +CONFIG_ORANGEFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_ECRYPT_FS_MESSAGING=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m +CONFIG_UBIFS_FS=m +CONFIG_UBIFS_FS_ADVANCED_COMPR=y CONFIG_CRAMFS=m CONFIG_SQUASHFS=y CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y +CONFIG_MINIX_FS=m +CONFIG_ROMFS_FS=m +CONFIG_PSTORE=m +CONFIG_PSTORE_LZO_COMPRESS=m +CONFIG_PSTORE_LZ4_COMPRESS=m +CONFIG_PSTORE_LZ4HC_COMPRESS=m +CONFIG_PSTORE_842_COMPRESS=y +CONFIG_PSTORE_ZSTD_COMPRESS=y +CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +CONFIG_EROFS_FS=m +CONFIG_EROFS_FS_ZIP_LZMA=y +CONFIG_EROFS_FS_PCPU_KTHREAD=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y @@ -734,6 +877,10 @@ CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_NFSD_BLOCKLAYOUT=y +CONFIG_CEPH_FS=m +CONFIG_CEPH_FSCACHE=y +CONFIG_CEPH_FS_POSIX_ACL=y +CONFIG_CEPH_FS_SECURITY_LABEL=y CONFIG_CIFS=m # CONFIG_CIFS_DEBUG is not set CONFIG_9P_FS=y @@ -741,6 +888,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_936=y CONFIG_NLS_ASCII=y CONFIG_NLS_UTF8=y +CONFIG_DLM=m CONFIG_KEY_DH_OPERATIONS=y CONFIG_SECURITY=y CONFIG_SECURITY_SELINUX=y @@ -756,6 +904,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_VMAC=m +CONFIG_CRYPTO_CRC32_LOONGARCH=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_ANUBIS=m @@ -777,6 +926,10 @@ CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_DMA_CMA=y +CONFIG_DMA_PERNUMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=0 CONFIG_PRINTK_TIME=y CONFIG_STRIP_ASM_SYMS=y CONFIG_MAGIC_SYSRQ=y -- Gitee From 2c12ddfc99b9ed32fbe28dd481e5d2efa205abc9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 237/241] LoongArch: Add Loongson Binary Translation support Change-Id: I73924787ed5ad44f2ac938500ddf64b35079a75b Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 3 + arch/loongarch/include/asm/asmmacro.h | 39 ++ arch/loongarch/include/asm/errno.h | 19 + arch/loongarch/include/asm/lbt.h | 148 ++++++ arch/loongarch/include/asm/loongarchregs.h | 4 + arch/loongarch/include/asm/processor.h | 21 +- arch/loongarch/include/asm/switch_to.h | 2 + arch/loongarch/include/asm/thread_info.h | 4 + .../include/asm/trans_mips_syscalls.h | 110 ++++ arch/loongarch/include/asm/trans_syscalls.h | 13 + arch/loongarch/include/asm/unistd.h | 16 + arch/loongarch/include/uapi/asm/ptrace.h | 14 + arch/loongarch/include/uapi/asm/sigcontext.h | 8 + arch/loongarch/kernel/Makefile | 2 + arch/loongarch/kernel/asm-offsets.c | 19 +- arch/loongarch/kernel/cpu-probe.c | 14 + arch/loongarch/kernel/fpu.S | 82 +++ arch/loongarch/kernel/process.c | 5 + arch/loongarch/kernel/ptrace.c | 46 ++ arch/loongarch/kernel/scall-trans-i386.S | 495 ++++++++++++++++++ arch/loongarch/kernel/scall-trans-mips64.S | 437 ++++++++++++++++ arch/loongarch/kernel/signal-common.h | 7 + arch/loongarch/kernel/signal.c | 138 +++++ arch/loongarch/kernel/trans_i386_syscalls.c | 70 +++ arch/loongarch/kernel/trans_mips_syscalls.c | 125 +++++ arch/loongarch/kernel/traps.c | 33 +- 26 files changed, 1854 insertions(+), 20 deletions(-) create mode 100644 arch/loongarch/include/asm/errno.h create mode 100644 arch/loongarch/include/asm/lbt.h create mode 100644 arch/loongarch/include/asm/trans_mips_syscalls.h create mode 100644 arch/loongarch/include/asm/trans_syscalls.h create mode 100644 arch/loongarch/kernel/scall-trans-i386.S create mode 100644 arch/loongarch/kernel/scall-trans-mips64.S create mode 100644 arch/loongarch/kernel/trans_i386_syscalls.c create mode 100644 arch/loongarch/kernel/trans_mips_syscalls.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index d06cb2d54dbd..e806bfe338df 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -424,6 +424,9 @@ config CPU_HAS_LASX If unsure, say Y. +config CPU_HAS_LBT + bool "Support for the Loongson Binary Tranlatition" + # # - Highmem only makes sense for the 32-bit kernel. # - We use SYS_SUPPORTS_HIGHMEM to offer highmem only for systems where we diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index 4ef3b9dfa80f..952c770afca5 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -330,11 +330,50 @@ .macro fpu_save_csr thread tmp movfcsr2gr \tmp, fcsr0 stptr.w \tmp, \thread, THREAD_FCSR +#ifdef CONFIG_CPU_HAS_LBT + /* TM bit is always 0 if LBT not supported */ + andi \tmp, \tmp, FPU_CSR_TM + beqz \tmp, 1f + bstrins.d \tmp, zero, FPU_CSR_TM_SHIFT, FPU_CSR_TM_SHIFT + movgr2fcsr fcsr0, \tmp + x86mftop \tmp + stptr.d \tmp, \thread, THREAD_FTOP +1: +#endif .endm .macro fpu_restore_csr thread tmp ldptr.w \tmp, \thread, THREAD_FCSR movgr2fcsr fcsr0, \tmp +#ifdef CONFIG_CPU_HAS_LBT + /* TM bit is always 0 if LBT not supported */ + andi \tmp, \tmp, FPU_CSR_TM + beqz \tmp, 1f + ldptr.d \tmp, \thread, THREAD_FTOP + x86mttop 0x0 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x1 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x2 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x3 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x4 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x5 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x6 + beq \tmp, zero, 1f + addi.d \tmp, \tmp, -1 + x86mttop 0x7 +1: +#endif .endm .macro fpu_save_cc thread tmp0 tmp1 diff --git a/arch/loongarch/include/asm/errno.h b/arch/loongarch/include/asm/errno.h new file mode 100644 index 000000000000..2039ecb21cef --- /dev/null +++ b/arch/loongarch/include/asm/errno.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#ifndef _ASM_ERRNO_H +#define _ASM_ERRNO_H + +#include + + +/* The biggest error number defined here or in . */ +#define EMAXERRNO 1133 + +#endif /* _ASM_ERRNO_H */ diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h new file mode 100644 index 000000000000..86e4ec9c745b --- /dev/null +++ b/arch/loongarch/include/asm/lbt.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Corporation Limited + * + */ +#ifndef _ASM_LBT_H +#define _ASM_LBT_H + +#include +#include +#include + +#ifdef CONFIG_CPU_HAS_LBT + +#define STR(x) __STR(x) +#define __STR(x) #x + +extern void _init_lbt(void); + +static inline void save_lbt_registers(struct loongarch_lbt *prev) +{ + unsigned long tmp = 0; + + __asm__ __volatile__ ( + "movscr2gr %[tmp], $scr0 \n" + "stptr.d %[tmp], %[prev], " STR(THREAD_SCR0) " \n" + "movscr2gr %[tmp], $scr1 \n" + "stptr.d %[tmp], %[prev], " STR(THREAD_SCR1) " \n" + "movscr2gr %[tmp], $scr2 \n" + "stptr.d %[tmp], %[prev], " STR(THREAD_SCR2) " \n" + "movscr2gr %[tmp], $scr3 \n" + "stptr.d %[tmp], %[prev], " STR(THREAD_SCR3) " \n" + "x86mfflag %[tmp], 0x3f \n" + "stptr.d %[tmp], %[prev], " STR(THREAD_EFLAGS) " \n" + : + : [prev] "r" (prev), [tmp] "r" (tmp) + : "memory" + ); +} + +static inline void restore_lbt_registers(struct loongarch_lbt *next) +{ + unsigned long tmp = 0; + + __asm__ __volatile__ ( + "ldptr.d %[tmp], %[next], " STR(THREAD_SCR0) " \n" + "movgr2scr $scr0, %[tmp] \n" + "ldptr.d %[tmp], %[next], " STR(THREAD_SCR1) " \n" + "movgr2scr $scr1, %[tmp] \n" + "ldptr.d %[tmp], %[next], " STR(THREAD_SCR2) " \n" + "movgr2scr $scr2, %[tmp] \n" + "ldptr.d %[tmp], %[next], " STR(THREAD_SCR3) " \n" + "movgr2scr $scr3, %[tmp] \n" + "ldptr.d %[tmp], %[next], " STR(THREAD_EFLAGS) " \n" + "x86mtflag %[tmp], 0x3f \n" + : + : [next] "r" (next), [tmp] "r" (tmp) + : + ); +} + +static inline void enable_lbt(void) +{ + if (cpu_has_lbt) + csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); +} + +static inline void disable_lbt(void) +{ + if (cpu_has_lbt) + csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN); +} + +static inline int is_lbt_enabled(void) +{ + if (!cpu_has_lbt) + return 0; + + return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ? + 1 : 0; +} + +static inline int is_lbt_owner(void) +{ + return test_thread_flag(TIF_USEDLBT); +} + +static inline void __own_lbt(void) +{ + enable_lbt(); + set_thread_flag(TIF_USEDLBT); + KSTK_EUEN(current) |= CSR_EUEN_LBTEN; +} + +static inline void init_lbt(void) +{ + __own_lbt(); + _init_lbt(); +} + +static inline void own_lbt_inatomic(int restore) +{ + if (cpu_has_lbt && !is_lbt_owner()) { + __own_lbt(); + if (restore) + restore_lbt_registers(¤t->thread.lbt); + } +} + +static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) +{ + if (is_lbt_owner()) { + if (save) + save_lbt_registers(&tsk->thread.lbt); + + disable_lbt(); + clear_tsk_thread_flag(tsk, TIF_USEDLBT); + } + KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN); +} + +static inline void lose_lbt(int save) +{ + preempt_disable(); + lose_lbt_inatomic(save, current); + preempt_enable(); +} + +#else +static inline void own_lbt_inatomic(int restore) {} +static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {} +static inline void init_lbt(void) {} +static inline void lose_lbt(int save) {} +#endif + +static inline int thread_lbt_context_live(void) +{ + if (__builtin_constant_p(cpu_has_lbt) && !cpu_has_lbt) + return 0; + + return test_thread_flag(TIF_LBT_CTX_LIVE); +} + +#endif diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 72fbc7477aa8..1bd874ef2dcc 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -1445,6 +1445,10 @@ __BUILD_CSR_OP(tlbidx) #define FPU_CSR_RU 0x200 /* towards +Infinity */ #define FPU_CSR_RD 0x300 /* towards -Infinity */ +/* LBT extension */ +#define FPU_CSR_TM_SHIFT 0x6 +#define FPU_CSR_TM 0x40 /* float register in stack mode */ + #define write_fcsr(dest, val) \ do { \ __asm__ __volatile__( \ diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h index 23e563cc47ac..0e86c8a18989 100644 --- a/arch/loongarch/include/asm/processor.h +++ b/arch/loongarch/include/asm/processor.h @@ -81,9 +81,20 @@ BUILD_FPR_ACCESS(64) struct loongarch_fpu { unsigned int fcsr; uint64_t fcc; /* 8x8 */ + uint64_t ftop; union fpureg fpr[NUM_FPU_REGS]; }; +struct loongarch_lbt { + /* Scratch registers */ + unsigned long scr0; + unsigned long scr1; + unsigned long scr2; + unsigned long scr3; + /* Eflags register */ + unsigned long eflags; +}; + #define INIT_CPUMASK { \ {0,} \ } @@ -120,15 +131,6 @@ struct thread_struct { unsigned long csr_ecfg; unsigned long csr_badvaddr; /* Last user fault */ - /* Scratch registers */ - unsigned long scr0; - unsigned long scr1; - unsigned long scr2; - unsigned long scr3; - - /* Eflags register */ - unsigned long eflags; - /* Used by ptrace single_step */ unsigned long single_step; @@ -138,6 +140,7 @@ struct thread_struct { /* Other stuff associated with the thread. */ unsigned long trap_nr; unsigned long error_code; + struct loongarch_lbt lbt; struct loongarch_vdso_info *vdso; /* diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h index 44793ed2ef3e..d22331460981 100644 --- a/arch/loongarch/include/asm/switch_to.h +++ b/arch/loongarch/include/asm/switch_to.h @@ -8,6 +8,7 @@ #include #include #include +#include struct task_struct; @@ -35,6 +36,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev, #define switch_to(prev, next, last) \ do { \ lose_fpu_inatomic(1, prev); \ + lose_lbt_inatomic(1, prev); \ __process_watch(prev, next); \ (last) = __switch_to(prev, next, task_thread_info(next), \ __builtin_return_address(0), __builtin_frame_address(0)); \ diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h index 9955141b8324..fb5e3670ef0d 100644 --- a/arch/loongarch/include/asm/thread_info.h +++ b/arch/loongarch/include/asm/thread_info.h @@ -88,6 +88,8 @@ register unsigned long current_stack_pointer __asm__("$r3"); #define TIF_LSX_CTX_LIVE 21 /* LSX context must be preserved */ #define TIF_LASX_CTX_LIVE 22 /* LASX context must be preserved */ #define TIF_PATCH_PENDING 23 /* pending live patching update */ +#define TIF_USEDLBT 24 /* LBT has been used */ +#define TIF_LBT_CTX_LIVE 25 /* LBT context */ #define _TIF_SIGPENDING (1< + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _TRANS_MIPS_SYSCALLS_H +#define _TRANS_MIPS_SYSCALLS_H + +#define M_ENOMSG 35 +#define M_EIDRM 36 +#define M_ECHRNG 37 +#define M_EL2NSYNC 38 +#define M_EL3HLT 39 +#define M_EL3RST 40 +#define M_ELNRNG 41 +#define M_EUNATCH 42 +#define M_ENOCSI 43 +#define M_EL2HLT 44 +#define M_EDEADLK 45 +#define M_ENOLCK 46 +#define M_EBADE 50 +#define M_EBADR 51 +#define M_EXFULL 52 +#define M_ENOANO 53 +#define M_EBADRQC 54 +#define M_EBADSLT 55 +#define M_EBFONT 59 +#define M_ENOSTR 60 +#define M_ENODATA 61 +#define M_ETIME 62 +#define M_ENOSR 63 +#define M_ENONET 64 +#define M_ENOPKG 65 +#define M_EREMOTE 66 +#define M_ENOLINK 67 +#define M_EADV 68 +#define M_ESRMNT 69 +#define M_ECOMM 70 +#define M_EPROTO 71 +#define M_EDOTDOT 73 +#define M_EMULTIHOP 74 +#define M_EBADMSG 77 +#define M_ENAMETOOLONG 78 +#define M_EOVERFLOW 79 +#define M_ENOTUNIQ 80 +#define M_EBADFD 81 +#define M_EREMCHG 82 +#define M_ELIBACC 83 +#define M_ELIBBAD 84 +#define M_ELIBSCN 85 +#define M_ELIBMAX 86 +#define M_ELIBEXEC 87 +#define M_EILSEQ 88 +#define M_ENOSYS 89 +#define M_ELOOP 90 +#define M_ERESTART 91 +#define M_ESTRPIPE 92 +#define M_ENOTEMPTY 93 +#define M_EUSERS 94 +#define M_ENOTSOCK 95 +#define M_EDESTADDRREQ 96 +#define M_EMSGSIZE 97 +#define M_EPROTOTYPE 98 +#define M_ENOPROTOOPT 99 +#define M_EPROTONOSUPPORT 120 +#define M_ESOCKTNOSUPPORT 121 +#define M_EOPNOTSUPP 122 +#define M_EPFNOSUPPORT 123 +#define M_EAFNOSUPPORT 124 +#define M_EADDRINUSE 125 +#define M_EADDRNOTAVAIL 126 +#define M_ENETDOWN 127 +#define M_ENETUNREACH 128 +#define M_ENETRESET 129 +#define M_ECONNABORTED 130 +#define M_ECONNRESET 131 +#define M_ENOBUFS 132 +#define M_EISCONN 133 +#define M_ENOTCONN 134 +#define M_EUCLEAN 135 +#define M_ENOTNAM 137 +#define M_ENAVAIL 138 +#define M_EISNAM 139 +#define M_EREMOTEIO 140 +#define M_ESHUTDOWN 143 +#define M_ETOOMANYREFS 144 +#define M_ETIMEDOUT 145 +#define M_ECONNREFUSED 146 +#define M_EHOSTDOWN 147 +#define M_EHOSTUNREACH 148 +#define M_EALREADY 149 +#define M_EINPROGRESS 150 +#define M_ESTALE 151 +#define M_ECANCELED 158 +#define M_ENOMEDIUM 159 +#define M_EMEDIUMTYPE 160 +#define M_ENOKEY 161 +#define M_EKEYEXPIRED 162 +#define M_EKEYREVOKED 163 +#define M_EKEYREJECTED 164 +#define M_EOWNERDEAD 165 +#define M_ENOTRECOVERABLE 166 +#define M_ERFKILL 167 +#define M_EHWPOISON 168 +#define M_EDQUOT 1133 +#endif /* _TRANS_MIPS_SYSCALLS_H */ diff --git a/arch/loongarch/include/asm/trans_syscalls.h b/arch/loongarch/include/asm/trans_syscalls.h new file mode 100644 index 000000000000..f783c06b4517 --- /dev/null +++ b/arch/loongarch/include/asm/trans_syscalls.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Authors: Hanlu Li + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _TRANS_SYSCALLS_H_ +#define _TRANS_SYSCALLS_H_ +#include +#endif diff --git a/arch/loongarch/include/asm/unistd.h b/arch/loongarch/include/asm/unistd.h index b21b66cca8d9..db63d8a2905b 100644 --- a/arch/loongarch/include/asm/unistd.h +++ b/arch/loongarch/include/asm/unistd.h @@ -8,3 +8,19 @@ #include #define NR_syscalls (__NR_syscalls) + +/* for binary translation */ +#if defined(CONFIG_CPU_HAS_LBT) +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETPGRP + +#define __NR_MIPS64_Linux 5000 +#define __NR_MIPS64_Linux_syscalls 329 +#define __NR_i386_Linux_syscalls 386 + +#define TRANS_MIPS_N64 0x010000 +#define TRANS_I386 0x100000 +#define TRANS_X64 0x110000 +#define TRANS_ARCH_MASK 0xffff0000 +#define SYS_NUM_MASK 0xffff +#endif diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h index b48bf163f5a9..cbde1deb15eb 100644 --- a/arch/loongarch/include/uapi/asm/ptrace.h +++ b/arch/loongarch/include/uapi/asm/ptrace.h @@ -56,6 +56,20 @@ struct user_lasx_state { uint64_t vregs[32*4]; }; +/* + * This structure definition saves the LBT data structure, + * the data comes from the task_struct structure, format is as follows: + * regs[0]: thread.lbt.scr0 + * regs[1]: thread.lbt.scr1 + * regs[2]: thread.lbt.scr2 + * regs[3]: thread.lbt.scr3 + * regs[4]: thread.lbt.eflags + * regs[5]: thread.fpu.ftop + */ +struct user_lbt_state { + uint64_t regs[6]; +}; + /* Read and write watchpoint registers. */ #define NUM_WATCH_REGS 16 diff --git a/arch/loongarch/include/uapi/asm/sigcontext.h b/arch/loongarch/include/uapi/asm/sigcontext.h index 0d11a600101c..67a459776784 100644 --- a/arch/loongarch/include/uapi/asm/sigcontext.h +++ b/arch/loongarch/include/uapi/asm/sigcontext.h @@ -58,4 +58,12 @@ struct lasx_context { __u32 fcsr; }; +/* LBT context */ +#define LBT_CTX_MAGIC 0x42540001 +#define LBT_CTX_ALIGN 8 +struct lbt_context { + __u64 scr[4]; + __u32 eflags; +}; + #endif /* _UAPI_ASM_SIGCONTEXT_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 7f17ccf4fc13..32218252319d 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -49,6 +49,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_CPU_HAS_LBT) += scall-trans-mips64.o trans_mips_syscalls.o scall-trans-i386.o trans_i386_syscalls.o + obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 197316bb9180..c64c67497a55 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -117,15 +117,7 @@ void output_thread_defines(void) OFFSET(THREAD_CSRECFG, task_struct, thread.csr_ecfg); - OFFSET(THREAD_SCR0, task_struct, thread.scr0); - OFFSET(THREAD_SCR1, task_struct, thread.scr1); - OFFSET(THREAD_SCR2, task_struct, thread.scr2); - OFFSET(THREAD_SCR3, task_struct, thread.scr3); - - OFFSET(THREAD_EFLAGS, task_struct, thread.eflags); - OFFSET(THREAD_FPU, task_struct, thread.fpu); - OFFSET(THREAD_BVADDR, task_struct, \ thread.csr_badvaddr); OFFSET(THREAD_ECODE, task_struct, \ @@ -171,6 +163,17 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FCSR, loongarch_fpu, fcsr); OFFSET(THREAD_FCC, loongarch_fpu, fcc); + OFFSET(THREAD_FTOP, loongarch_fpu, ftop); + BLANK(); +} + +void output_thread_lbt_defines(void) +{ + OFFSET(THREAD_SCR0, loongarch_lbt, scr0); + OFFSET(THREAD_SCR1, loongarch_lbt, scr1); + OFFSET(THREAD_SCR2, loongarch_lbt, scr2); + OFFSET(THREAD_SCR3, loongarch_lbt, scr3); + OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags); BLANK(); } diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index adb21c7e6783..a164ec63c81c 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -145,6 +145,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_LVZ; elf_hwcap |= HWCAP_LOONGARCH_LVZ; } +#ifdef CONFIG_CPU_HAS_LBT + if (config & CPUCFG2_X86BT) { + c->options |= LOONGARCH_CPU_LBT_X86; + elf_hwcap |= HWCAP_LOONGARCH_LBT_X86; + } + if (config & CPUCFG2_ARMBT) { + c->options |= LOONGARCH_CPU_LBT_ARM; + elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM; + } + if (config & CPUCFG2_MIPSBT) { + c->options |= LOONGARCH_CPU_LBT_MIPS; + elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS; + } +#endif config = read_cpucfg(LOONGARCH_CPUCFG6); if (config & CPUCFG6_PMP) diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 653a785caa60..3917b6accfc7 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -161,6 +161,14 @@ .macro sc_save_fcsr base, tmp0 movfcsr2gr \tmp0, fcsr0 EX st.w \tmp0, \base, 0 +#if defined(CONFIG_CPU_HAS_LBT) + /* TM bit is always 0 if LBT not supported */ + andi \tmp0, \tmp0, FPU_CSR_TM + beqz \tmp0, 1f + bstrins.d \tmp0, $r0, FPU_CSR_TM_SHIFT, FPU_CSR_TM_SHIFT + movgr2fcsr fcsr0, \tmp0 +1: +#endif .endm .macro sc_restore_fcsr base, tmp0 @@ -168,6 +176,29 @@ movgr2fcsr fcsr0, \tmp0 .endm +#if defined(CONFIG_CPU_HAS_LBT) + .macro sc_save_scr base, tmp0 + movscr2gr \tmp0, $scr0 + EX st.d \tmp0, \base, 0 + movscr2gr \tmp0, $scr1 + EX st.d \tmp0, \base, 8 + movscr2gr \tmp0, $scr2 + EX st.d \tmp0, \base, 16 + movscr2gr \tmp0, $scr3 + EX st.d \tmp0, \base, 24 + .endm + + .macro sc_restore_scr base, tmp0 + EX ld.d \tmp0, \base, 0 + movgr2scr $scr0, \tmp0 + EX ld.d \tmp0, \base, 8 + movgr2scr $scr1, \tmp0 + EX ld.d \tmp0, \base, 16 + movgr2scr $scr2, \tmp0 + EX ld.d \tmp0, \base, 24 + movgr2scr $scr3, \tmp0 + .endm +#endif .macro sc_save_lsx base EX_V 0xb1 $vr0, \base, (0 * LSX_REG_WIDTH) EX_V 0xb1 $vr1, \base, (1 * LSX_REG_WIDTH) @@ -312,6 +343,10 @@ * Save a thread's fp context. */ SYM_FUNC_START(_save_fp) + /* + * since TM bit of FSCR may afftect fpr0-fp7 + * fcsr save need before FPR save + */ fpu_save_csr a0 t1 fpu_save_double a0 t1 # clobbers t1 fpu_save_cc a0 t1 t2 # clobbers t1, t2 @@ -324,6 +359,10 @@ EXPORT_SYMBOL(_save_fp) */ SYM_FUNC_START(_restore_fp) fpu_restore_double a0 t1 # clobbers t1 + /* + * since TM bit of FSCR may afftect fpr0-fp7 + * fscr restore need be after FPR store + */ fpu_restore_csr a0 t1 fpu_restore_cc a0 t1 t2 # clobbers t1, t2 jirl zero, ra, 0 @@ -503,6 +542,49 @@ SYM_FUNC_START(_restore_lsx_context) jirl zero, ra, 0 SYM_FUNC_END(_restore_lsx_context) +#if defined(CONFIG_CPU_HAS_LBT) +/* + * Load scr/eflag with zero. + */ +SYM_FUNC_START(_init_lbt) + movgr2scr $scr0, zero + movgr2scr $scr1, zero + movgr2scr $scr2, zero + movgr2scr $scr3, zero + + x86mtflag zero, 0x3f + jr ra +SYM_FUNC_END(_init_lbt) + +/* + * a0: scr + */ +SYM_FUNC_START(_save_scr_context) + /* eflags */ + x86mfflag t0, 0x3f + EX st.w t0, a1, 0 + + sc_save_scr a0, t0 + + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_save_scr_context) + +/* + * a0: scr + */ +SYM_FUNC_START(_restore_scr_context) + /* eflags */ + EX ld.w t0, a1, 0 + x86mtflag t0, 0x3f + + sc_restore_scr a0, t1 + + li.w a0, 0 # success + jirl zero, ra, 0 +SYM_FUNC_END(_restore_scr_context) +#endif + /* * a0: fpregs * a1: fcc diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 0cf89ce42749..878c0e09f984 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -71,10 +72,12 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) euen = regs->csr_euen & ~(CSR_EUEN_FPEN); regs->csr_euen = euen; lose_fpu(0); + lose_lbt(0); current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0; clear_thread_flag(TIF_LSX_CTX_LIVE); clear_thread_flag(TIF_LASX_CTX_LIVE); + clear_thread_flag(TIF_LBT_CTX_LIVE); clear_used_math(); regs->csr_era = pc; regs->regs[3] = sp; @@ -171,8 +174,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); + clear_tsk_thread_flag(p, TIF_USEDLBT); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); + clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE); return 0; } diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index d41670fd5cf7..7b87a38ffbdd 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -244,6 +244,39 @@ static int cfg_set(struct task_struct *target, return 0; } +#ifdef CONFIG_CPU_HAS_LBT +static int lbt_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + int r; + + r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0)); + r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1)); + r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2)); + r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3)); + r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(target->thread.fpu.ftop)); + + return r; +} + +static int lbt_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int err; + const int ftop_start = (regset->n - 1) * regset->size; + + err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.lbt.scr0, 0, ftop_start); + err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.ftop, ftop_start, + ftop_start + sizeof(u64)); + return err; +} +#endif + #ifdef CONFIG_CPU_HAS_LSX static void copy_pad_fprs(struct task_struct *target, @@ -402,6 +435,9 @@ enum loongarch_regset { REGSET_GPR, REGSET_FPR, REGSET_CPUCFG, +#ifdef CONFIG_CPU_HAS_LBT + REGSET_LBT, +#endif #ifdef CONFIG_CPU_HAS_LSX REGSET_LSX, #endif @@ -435,6 +471,16 @@ static const struct user_regset loongarch64_regsets[] = { .regset_get = cfg_get, .set = cfg_set, }, +#ifdef CONFIG_CPU_HAS_LBT + [REGSET_LBT] = { + .core_note_type = NT_LOONGARCH_LBT, + .n = 6, + .size = sizeof(u64), + .align = sizeof(u64), + .regset_get = lbt_get, + .set = lbt_set, + }, +#endif #ifdef CONFIG_CPU_HAS_LSX [REGSET_LSX] = { .core_note_type = NT_LOONGARCH_LSX, diff --git a/arch/loongarch/kernel/scall-trans-i386.S b/arch/loongarch/kernel/scall-trans-i386.S new file mode 100644 index 000000000000..c3a5ce660c6c --- /dev/null +++ b/arch/loongarch/kernel/scall-trans-i386.S @@ -0,0 +1,495 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 96, 97, 98, 99, 2000, 01, 02 by Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2001 MIPS Technologies, Inc. + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + .align 5 +SYM_FUNC_START(handle_sys_lat_i386) + UNWIND_HINT_REGS + li.d t1, 0 #_TIF_WORK_SYSCALL_ENTRY + LONG_L t0, tp, TI_FLAGS # syscall tracing enabled? + and t0, t1, t0 + bnez t0, syscall_trace_entry + +choose_abi: + li.d t0, TRANS_ARCH_MASK + and t1, t0, a7 # pick the ARCH specfied tag + li.d t0, SYS_NUM_MASK + and a7, a7, t0 # pick the syscall num + li.d t0, TRANS_I386 + bne t0, t1, illegal_syscall + +syscall_i386: + + sltui t0, a7, __NR_i386_Linux_syscalls + 1 + beqz t0, illegal_syscall + + /* Syscall number held in a7 */ + slli.d t0, a7, 3 # offset into table + la t2, i386_syscall_table + add.d t0, t2, t0 + ld.d t2, t0, 0 # syscall routine + beqz t2, illegal_syscall + + jalr t2 # Do The Real Thing (TM) + + li.w t0, -EMAXERRNO - 1 # error? + sltu t0, t0, a0 + st.d t0, sp, PT_R7 # set error flag + beqz t0, 1f + + ld.d t1, sp, PT_R11 # syscall number + addi.d t1, t1, 1 # +1 for handle_signal + st.d t1, sp, PT_R0 # save it for syscall restarting +1: st.d a0, sp, PT_R4 # result + + +trans_i386_syscall_exit: + NOT_SIBLING_CALL_HINT + RESTORE_TEMP + RESTORE_STATIC + RESTORE_SOME + RESTORE_SP_AND_RET + +/* ------------------------------------------------------------------------ */ + +syscall_trace_entry: + SAVE_STATIC + move a0, sp + move a1, a7 + move a0, zero #bl syscall_trace_enter + + blt a0, zero, 1f # seccomp failed? Skip syscall + + RESTORE_STATIC + ld.d a0, sp, PT_R4 # Restore argument registers + ld.d a1, sp, PT_R5 + ld.d a2, sp, PT_R6 + ld.d a3, sp, PT_R7 + ld.d a4, sp, PT_R8 + ld.d a5, sp, PT_R9 + ld.d a6, sp, PT_R10 + ld.d a7, sp, PT_R11 # Restore syscall (maybe modified) + b choose_abi + +1: b trans_i386_syscall_exit + + /* + * The system call does not exist in this kernel + */ + +illegal_syscall: + li.w a0, ENOSYS # error + st.d a0, sp, PT_R4 + li.w t0, 1 # set error flag + st.d t0, sp, PT_R7 + b trans_i386_syscall_exit +SYM_FUNC_END(handle_sys_lat_i386) + + .align 3 +SYM_DATA_START(i386_syscall_table) + PTR sys_restart_syscall /* 0 */ + PTR sys_exit /* 1 */ + PTR sys_ni_syscall /* 2 sys_fork */ + PTR sys_read /* 3 */ + PTR sys_write /* 4 */ + PTR sys_open /* 5 */ + PTR sys_close /* 6 */ + PTR sys_ni_syscall /* 7 sys_waitpid */ + PTR sys_creat /* 8 */ + PTR sys_link /* 9 */ + PTR sys_unlink /* 10 */ + PTR sys_execve /* 11 */ + PTR sys_chdir /* 12 */ + PTR sys_ni_syscall /* 13 sys_time */ + PTR sys_mknod /* 14 */ + PTR sys_chmod /* 15 */ + PTR sys_lchown16 /* 16 */ + PTR sys_ni_syscall /* 17 break */ + PTR sys_ni_syscall /* 18 sys_stat */ + PTR sys_lseek /* 19 */ + PTR sys_getpid /* 20 */ + PTR sys_mount /* 21 */ + PTR sys_ni_syscall /* 22 sys_oldumount */ + PTR sys_setuid16 /* 23 */ + PTR sys_getuid16 /* 24 */ + PTR sys_ni_syscall /* 25 sys_stime */ + PTR sys_ptrace /* 26 */ + PTR sys_alarm /* 27 */ + PTR sys_ni_syscall /* 28 sys_fstat */ + PTR sys_ni_syscall /* 29 sys_pause */ + PTR sys_ni_syscall /* 30 sys_utime */ + PTR sys_ni_syscall /* 31 stty */ + PTR sys_ni_syscall /* 32 gtty */ + PTR sys_access /* 33 */ + PTR sys_ni_syscall /* 34 sys_nice */ + PTR sys_ni_syscall /* 35 ftime */ + PTR sys_sync /* 36 */ + PTR sys_kill /* 37 */ + PTR sys_rename /* 38 */ + PTR sys_mkdir /* 39 */ + PTR sys_rmdir /* 40 */ + PTR sys_dup /* 41 */ + PTR sys_pipe /* 42 */ + PTR sys_times /* 43 */ + PTR sys_ni_syscall /* 44 prof */ + PTR sys_brk /* 45 */ + PTR sys_setgid16 /* 46 */ + PTR sys_getgid16 /* 47 */ + PTR sys_ni_syscall /* 48 sys_signal */ + PTR sys_geteuid16 /* 49 */ + PTR sys_getegid16 /* 50 */ + PTR sys_acct /* 51 */ + PTR sys_umount /* 52 */ + PTR sys_ni_syscall /* 53 lock */ + PTR sys_ioctl /* 54 */ + PTR sys_fcntl /* 55 */ + PTR sys_ni_syscall /* 56 mxp */ + PTR sys_setpgid /* 57 */ + PTR sys_ni_syscall /* 58 ulimit */ + PTR sys_ni_syscall /* 59 sys_olduname */ + PTR sys_umask /* 60 */ + PTR sys_chroot /* 61 */ + PTR sys_ustat /* 62 */ + PTR sys_dup2 /* 63 */ + PTR sys_getppid /* 64 */ + PTR sys_getpgrp /* 65 */ + PTR sys_setsid /* 66 */ + PTR sys_ni_syscall /* 67 sys_sigaction */ + PTR sys_sgetmask /* 68 */ + PTR sys_ssetmask /* 69 */ + PTR sys_setreuid16 /* 70 */ + PTR sys_setregid16 /* 71 */ + PTR sys_ni_syscall /* 72 sys_sigsuspend */ + PTR sys_ni_syscall /* 73 sys_sigpending */ + PTR sys_sethostname /* 74 */ + PTR sys_setrlimit /* 75 */ + PTR sys_ni_syscall /* 76 sys_old_getrlimit */ + PTR sys_getrusage /* 77 */ + PTR sys_gettimeofday /* 78 */ + PTR sys_settimeofday /* 79 */ + PTR sys_getgroups16 /* 80 */ + PTR sys_setgroups16 /* 81 */ + PTR sys_ni_syscall /* 82 sys_old_select */ + PTR sys_symlink /* 83 */ + PTR sys_ni_syscall /* 84 sys_lstat */ + PTR sys_readlink /* 85 */ + PTR sys_uselib /* 86 */ + PTR sys_swapon /* 87 */ + PTR sys_reboot /* 88 */ + PTR sys_ni_syscall /* 89 sys_old_readdir */ + PTR sys_ni_syscall /* 90 sys_old_mmap */ + PTR sys_munmap /* 91 */ + PTR sys_truncate /* 92 */ + PTR sys_ftruncate /* 93 */ + PTR sys_fchmod /* 94 */ + PTR sys_fchown16 /* 95 */ + PTR sys_getpriority /* 96 */ + PTR sys_setpriority /* 97 */ + PTR sys_ni_syscall /* 98 profil */ + PTR sys_statfs /* 99 */ + PTR sys_fstatfs /* 100 */ + PTR sys_ni_syscall /* 101 sys_ioperm */ + PTR sys_socketcall /* 102 */ + PTR sys_syslog /* 103 */ + PTR sys_setitimer /* 104 */ + PTR sys_getitimer /* 105 */ + PTR sys_newstat /* 106 */ + PTR sys_newlstat /* 107 */ + PTR sys_newfstat /* 108 */ + PTR sys_ni_syscall /* 109 sys_uname */ + PTR sys_ni_syscall /* 110 sys_iopl */ + PTR sys_vhangup /* 111 */ + PTR sys_ni_syscall /* 112 idel */ + PTR sys_vm86old /* 113 */ + PTR sys_wait4 /* 114 */ + PTR sys_swapoff /* 115 */ + PTR sys_sysinfo /* 116 */ + PTR sys_ipc /* 117 */ + PTR sys_fsync /* 118 */ + PTR sys_ni_syscall /* 119 sys_sigreturn */ + PTR sys_clone /* 120 */ + PTR sys_setdomainname /* 121 */ + PTR sys_newuname /* 122 */ + PTR sys_modify_ldt /* 123 */ + PTR sys_adjtimex /* 124 */ + PTR sys_mprotect /* 125 */ + PTR sys_ni_syscall /* 126 sys_sigprocmask */ + PTR sys_ni_syscall /* 127 create_module */ + PTR sys_init_module /* 128 */ + PTR sys_delete_module /* 129 */ + PTR sys_ni_syscall /* 130 get_kernel_syms */ + PTR sys_quotactl /* 131 */ + PTR sys_getpgid /* 132 */ + PTR sys_fchdir /* 133 */ + PTR sys_bdflush /* 134 */ + PTR sys_sysfs /* 135 */ + PTR sys_personality /* 136 */ + PTR sys_ni_syscall /* 137 afs_syscall */ + PTR sys_setfsuid16 /* 138 */ + PTR sys_setfsgid16 /* 139 */ + PTR sys_ni_syscall /* 140 sys_llseek */ + PTR sys_getdents /* 141 */ + PTR sys_select /* 142 */ + PTR sys_flock /* 143 */ + PTR sys_msync /* 144 */ + PTR sys_readv /* 145 */ + PTR sys_writev /* 146 */ + PTR sys_getsid /* 147 */ + PTR sys_fdatasync /* 148 */ + PTR sys_ni_syscall //sys_sysctl /* 149 */ + PTR sys_mlock /* 150 */ + PTR sys_munlock /* 151 */ + PTR sys_mlockall /* 152 */ + PTR sys_munlockall /* 153 */ + PTR sys_sched_setparam /* 154 */ + PTR sys_sched_getparam /* 155 */ + PTR sys_sched_setscheduler /* 156 */ + PTR sys_sched_getscheduler /* 157 */ + PTR sys_sched_yield /* 158 */ + PTR sys_sched_get_priority_max /* 159 */ + PTR sys_sched_get_priority_min /* 160 */ + PTR sys_sched_rr_get_interval /* 161 */ + PTR sys_nanosleep /* 162 */ + PTR sys_mremap /* 163 */ + PTR sys_setresuid16 /* 164 */ + PTR sys_getresuid16 /* 165 */ + PTR sys_vm86 /* 166 */ + PTR sys_ni_syscall /* 167 query_module */ + PTR sys_poll /* 168 */ + PTR sys_ni_syscall /* 169 nfsservctl */ + PTR sys_setresgid16 /* 170 */ + PTR sys_getresgid16 /* 171 */ + PTR sys_prctl /* 172 */ + PTR sys_rt_sigreturn /* 173 */ + PTR sys_rt_sigaction /* 174 */ + PTR sys_latx_rt_sigprocmask /* 175 */ + PTR sys_rt_sigpending /* 176 */ + PTR sys_rt_sigtimedwait /* 177 */ + PTR sys_rt_sigqueueinfo /* 178 */ + PTR sys_rt_sigsuspend /* 179 */ + PTR sys_pread64 /* 180 */ + PTR sys_pwrite64 /* 181 */ + PTR sys_chown16 /* 182 */ + PTR sys_getcwd /* 183 */ + PTR sys_capget /* 184 */ + PTR sys_capset /* 185 */ + PTR sys_sigaltstack /* 186 */ + PTR sys_sendfile /* 187 */ + PTR sys_ni_syscall /* 188 getpmsg */ + PTR sys_ni_syscall /* 189 putpmsg */ + PTR sys_ni_syscall /* 190 sys_vfork */ + PTR sys_getrlimit /* 191 */ + PTR sys_mmap_pgoff /* 192 */ + PTR sys_ni_syscall /* 193 sys_truncate64 */ + PTR sys_ni_syscall /* 194 sys_ftruncate64 */ + PTR sys_ni_syscall /* 195 sys_stat64 */ + PTR sys_ni_syscall /* 196 sys_lstat64 */ + PTR sys_ni_syscall /* 197 sys_fstat64 */ + PTR sys_lchown /* 198 */ + PTR sys_getuid /* 199 */ + PTR sys_getgid /* 200 */ + PTR sys_geteuid /* 201 */ + PTR sys_getegid /* 202 */ + PTR sys_setreuid /* 203 */ + PTR sys_setregid /* 204 */ + PTR sys_getgroups /* 205 */ + PTR sys_setgroups /* 206 */ + PTR sys_fchown /* 207 */ + PTR sys_setresuid /* 208 */ + PTR sys_getresuid /* 209 */ + PTR sys_setresgid /* 210 */ + PTR sys_getresgid /* 211 */ + PTR sys_chown /* 212 */ + PTR sys_setuid /* 213 */ + PTR sys_setgid /* 214 */ + PTR sys_setfsuid /* 215 */ + PTR sys_setfsgid /* 216 */ + PTR sys_pivot_root /* 217 */ + PTR sys_mincore /* 218 */ + PTR sys_madvise /* 219 */ + PTR sys_getdents64 /* 220 */ + PTR sys_ni_syscall /* 221 sys_fcntl64 */ + PTR sys_ni_syscall /* 222 is unused */ + PTR sys_ni_syscall /* 223 is unused */ + PTR sys_gettid /* 224 */ + PTR sys_readahead /* 225 */ + PTR sys_setxattr /* 226 */ + PTR sys_lsetxattr /* 227 */ + PTR sys_fsetxattr /* 228 */ + PTR sys_getxattr /* 229 */ + PTR sys_lgetxattr /* 230 */ + PTR sys_fgetxattr /* 231 */ + PTR sys_listxattr /* 232 */ + PTR sys_llistxattr /* 233 */ + PTR sys_flistxattr /* 234 */ + PTR sys_removexattr /* 235 */ + PTR sys_lremovexattr /* 236 */ + PTR sys_fremovexattr /* 237 */ + PTR sys_tkill /* 238 */ + PTR sys_sendfile64 /* 239 */ + PTR sys_futex /* 240 */ + PTR sys_sched_setaffinity /* 241 */ + PTR sys_sched_getaffinity /* 242 */ + PTR sys_ni_syscall /* 243 sys_set_thread_area */ + PTR sys_ni_syscall /* 244 sys_get_thread_area */ + PTR sys_io_setup /* 245 */ + PTR sys_io_destroy /* 246 */ + PTR sys_io_getevents /* 247 */ + PTR sys_io_submit /* 248 */ + PTR sys_io_cancel /* 249 */ + PTR sys_fadvise64 /* 250 */ + PTR sys_ni_syscall /* 251 is available for reuse*/ + PTR sys_exit_group /* 252 */ + PTR sys_lookup_dcookie /* 253 */ + PTR sys_epoll_create /* 254 */ + PTR sys_epoll_ctl /* 255 */ + PTR sys_epoll_wait /* 256 */ + PTR sys_remap_file_pages /* 257 */ + PTR sys_set_tid_address /* 258 */ + PTR sys_timer_create /* 259 */ + PTR sys_timer_settime /* 260 */ + PTR sys_timer_gettime /* 261 */ + PTR sys_timer_getoverrun /* 262 */ + PTR sys_timer_delete /* 263 */ + PTR sys_clock_settime /* 264 */ + PTR sys_clock_gettime /* 265 */ + PTR sys_clock_getres /* 266 */ + PTR sys_clock_nanosleep /* 267 */ + PTR sys_statfs64 /* 268 */ + PTR sys_fstatfs64 /* 269 */ + PTR sys_tgkill /* 270 */ + PTR sys_ni_syscall //sys_utimes /* 271 */ + PTR sys_fadvise64_64 /* 272 */ + PTR sys_ni_syscall /* 273 vserver */ + PTR sys_mbind /* 274 */ + PTR sys_get_mempolicy /* 275 */ + PTR sys_set_mempolicy /* 276 */ + PTR sys_mq_open /* 277 */ + PTR sys_mq_unlink /* 278 */ + PTR sys_mq_timedsend /* 279 */ + PTR sys_mq_timedreceive /* 280 */ + PTR sys_mq_notify /* 281 */ + PTR sys_mq_getsetattr /* 282 */ + PTR sys_kexec_load /* 283 */ + PTR sys_waitid /* 284 */ + PTR sys_ni_syscall /* 285 sys_setaltroot */ + PTR sys_add_key /* 286 */ + PTR sys_request_key /* 287 */ + PTR sys_keyctl /* 288 */ + PTR sys_ioprio_set /* 289 */ + PTR sys_ioprio_get /* 290 */ + PTR sys_inotify_init /* 291 */ + PTR sys_inotify_add_watch /* 292 */ + PTR sys_inotify_rm_watch /* 293 */ + PTR sys_migrate_pages /* 294 */ + PTR sys_openat /* 295 */ + PTR sys_mkdirat /* 296 */ + PTR sys_mknodat /* 297 */ + PTR sys_fchownat /* 298 */ + PTR sys_ni_syscall //sys_futimesat /* 299 */ + PTR sys_ni_syscall /* 300 sys_fstatat64 */ + PTR sys_unlinkat /* 301 */ + PTR sys_renameat /* 302 */ + PTR sys_linkat /* 303 */ + PTR sys_symlinkat /* 304 */ + PTR sys_readlinkat /* 305 */ + PTR sys_fchmodat /* 306 */ + PTR sys_faccessat /* 307 */ + PTR sys_pselect6 /* 308 */ + PTR sys_ppoll /* 309 */ + PTR sys_unshare /* 310 */ + PTR sys_set_robust_list /* 311 */ + PTR sys_get_robust_list /* 312 */ + PTR sys_splice /* 313 */ + PTR sys_sync_file_range /* 314 */ + PTR sys_tee /* 315 */ + PTR sys_vmsplice /* 316 */ + PTR sys_move_pages /* 317 */ + PTR sys_getcpu /* 318 */ + PTR sys_epoll_pwait /* 319 */ + PTR sys_utimensat /* 320 */ + PTR sys_signalfd /* 321 */ + PTR sys_timerfd_create /* 322 */ + PTR sys_eventfd /* 323 */ + PTR sys_fallocate /* 324 */ + PTR sys_timerfd_settime /* 325 */ + PTR sys_timerfd_gettime /* 326 */ + PTR sys_signalfd4 /* 327 */ + PTR sys_eventfd2 /* 328 */ + PTR sys_epoll_create1 /* 329 */ + PTR sys_dup3 /* 330 */ + PTR sys_pipe2 /* 331 */ + PTR sys_inotify_init1 /* 332 */ + PTR sys_preadv /* 333 */ + PTR sys_pwritev /* 334 */ + PTR sys_rt_tgsigqueueinfo /* 335 */ + PTR sys_perf_event_open /* 336 */ + PTR sys_recvmmsg /* 337 */ + PTR sys_fanotify_init /* 338 */ + PTR sys_fanotify_mark /* 339 */ + PTR sys_prlimit64 /* 340 */ + PTR sys_name_to_handle_at /* 341 */ + PTR sys_open_by_handle_at /* 342 */ + PTR sys_clock_adjtime /* 343 */ + PTR sys_syncfs /* 344 */ + PTR sys_sendmmsg /* 345 */ + PTR sys_setns /* 346 */ + PTR sys_process_vm_readv /* 347 */ + PTR sys_process_vm_writev /* 348 */ + PTR sys_kcmp /* 349 */ + PTR sys_finit_module /* 350 */ + PTR sys_sched_setattr /* 351 */ + PTR sys_sched_getattr /* 352 */ + PTR sys_renameat2 /* 353 */ + PTR sys_seccomp /* 354 */ + PTR sys_getrandom /* 355 */ + PTR sys_memfd_create /* 356 */ + PTR sys_bpf /* 357 */ + PTR sys_execveat /* 358 */ + PTR sys_socket /* 359 */ + PTR sys_socketpair /* 360 */ + PTR sys_bind /* 361 */ + PTR sys_connect /* 362 */ + PTR sys_listen /* 363 */ + PTR sys_accept4 /* 364 */ + PTR sys_getsockopt /* 365 */ + PTR sys_setsockopt /* 366 */ + PTR sys_getsockname /* 367 */ + PTR sys_getpeername /* 368 */ + PTR sys_sendto /* 369 */ + PTR sys_sendmsg /* 370 */ + PTR sys_recvfrom /* 371 */ + PTR sys_recvmsg /* 372 */ + PTR sys_shutdown /* 373 */ + PTR sys_userfaultfd /* 374 */ + PTR sys_membarrier /* 375 */ + PTR sys_mlock2 /* 376 */ + PTR sys_copy_file_range /* 377 */ + PTR sys_preadv2 /* 378 */ + PTR sys_pwritev2 /* 379 */ + PTR sys_pkey_mprotect /* 380 */ + PTR sys_pkey_alloc /* 381 */ + PTR sys_pkey_free /* 382 */ + PTR sys_statx /* 383 */ + PTR sys_ni_syscall /* 384 sys_arch_prctl */ + PTR sys_io_pgetevents /* 385 */ + PTR sys_rseq /* 386 */ +SYM_DATA_END(i386_syscall_table) diff --git a/arch/loongarch/kernel/scall-trans-mips64.S b/arch/loongarch/kernel/scall-trans-mips64.S new file mode 100644 index 000000000000..a1a32ee247f9 --- /dev/null +++ b/arch/loongarch/kernel/scall-trans-mips64.S @@ -0,0 +1,437 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technology Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + .align 5 +SYM_FUNC_START(handle_sys_lat_mips64) + UNWIND_HINT_REGS + li.d t1, 0 #_TIF_WORK_SYSCALL_ENTRY + LONG_L t0, tp, TI_FLAGS # syscall tracing enabled? + and t0, t1, t0 + bnez t0, syscall_trace_entry + +choose_abi: + li.d t0, TRANS_ARCH_MASK + and t1, t0, a7 + li.d t0, SYS_NUM_MASK + and a7, a7, t0 + li.d t0, TRANS_MIPS_N64 + bne t0, t1, illegal_syscall + +syscall_mips64: + li.w t0, __NR_MIPS64_Linux + sub.d t2, a7, t0 + sltui t0, t2, __NR_MIPS64_Linux_syscalls + 1 + beqz t0, illegal_syscall + + /* Syscall number held in a7 */ + slli.d t0, t2, 3 # offset into table + la t2, mips64_syscall_table + add.d t0, t2, t0 + ld.d t2, t0, 0 # syscall routine + beqz t2, illegal_syscall + + jalr t2 # Do The Real Thing (TM) + + li.w t0, -EMAXERRNO - 1 # error? + sltu t0, t0, a0 + st.d t0, sp, PT_R7 # set error flag + beqz t0, 1f + + ld.d t1, sp, PT_R11 # syscall number + addi.d t1, t1, 1 # +1 for handle_signal + st.d t1, sp, PT_R0 # save it for syscall restarting + sub.d a0, zero, a0 + bl trans_mips_errno +1: st.d a0, sp, PT_R4 # result + + +trans_syscall_exit: + NOT_SIBLING_CALL_HINT + RESTORE_TEMP + RESTORE_STATIC + RESTORE_SOME + RESTORE_SP_AND_RET + +/* ------------------------------------------------------------------------ */ + +syscall_trace_entry: + SAVE_STATIC + move a0, sp + move a1, a7 + move a0, zero #bl syscall_trace_enter + + blt a0, zero, 1f # seccomp failed? Skip syscall + + RESTORE_STATIC + ld.d a0, sp, PT_R4 # Restore argument registers + ld.d a1, sp, PT_R5 + ld.d a2, sp, PT_R6 + ld.d a3, sp, PT_R7 + ld.d a4, sp, PT_R8 + ld.d a5, sp, PT_R9 + ld.d a6, sp, PT_R10 + ld.d a7, sp, PT_R11 # Restore syscall (maybe modified) + b choose_abi + +1: b trans_syscall_exit + + /* + * The system call does not exist in this kernel + */ + +illegal_syscall: + li.w a0, ENOSYS # error + st.d a0, sp, PT_R4 + li.w t0, 1 # set error flag + st.d t0, sp, PT_R7 + b trans_syscall_exit +SYM_FUNC_END(handle_sys_lat_mips64) + + .align 3 +SYM_DATA_START(mips64_syscall_table) + PTR sys_read /* 5000 */ + PTR sys_write + PTR sys_ni_syscall + PTR sys_close + PTR sys_ni_syscall /* stat */ + PTR sys_ni_syscall /* 5005 */ + PTR sys_ni_syscall /* lstat */ + PTR sys_ni_syscall + PTR sys_lseek + PTR sys_ni_syscall + PTR sys_mprotect /* 5010 */ + PTR sys_munmap + PTR sys_brk + PTR sys_ni_syscall + PTR sys_rt_sigprocmask + PTR sys_ioctl /* 5015 */ + PTR sys_pread64 + PTR sys_pwrite64 + PTR sys_readv + PTR sys_writev + PTR sys_access /* 5020 */ + PTR sys_pipe + PTR sys_ni_syscall + PTR sys_sched_yield + PTR sys_mremap + PTR sys_msync /* 5025 */ + PTR sys_mincore + PTR sys_madvise + PTR sys_shmget + PTR sys_shmat + PTR sys_shmctl /* 5030 */ + PTR sys_dup + PTR sys_dup2 + PTR sys_ni_syscall /* pause */ + PTR sys_nanosleep + PTR sys_getitimer /* 5035 */ + PTR sys_setitimer + PTR sys_alarm + PTR sys_getpid + PTR sys_sendfile64 + PTR sys_ni_syscall /* 5040 */ + PTR sys_connect + PTR sys_accept + PTR sys_sendto + PTR sys_recvfrom + PTR sys_sendmsg /* 5045 */ + PTR sys_recvmsg + PTR sys_shutdown + PTR sys_bind + PTR sys_listen + PTR sys_getsockname /* 5050 */ + PTR sys_getpeername + PTR sys_socketpair + PTR sys_setsockopt + PTR sys_getsockopt + PTR sys_ni_syscall /* 5055 */ + PTR sys_ni_syscall + PTR sys_execve + PTR sys_exit + PTR sys_wait4 + PTR sys_kill /* 5060 */ + PTR sys_newuname + PTR sys_semget + PTR sys_semop + PTR sys_semctl + PTR sys_shmdt /* 5065 */ + PTR sys_msgget + PTR sys_msgsnd + PTR sys_msgrcv + PTR sys_msgctl + PTR sys_fcntl /* 5070 */ + PTR sys_flock + PTR sys_fsync + PTR sys_fdatasync + PTR sys_truncate + PTR sys_ftruncate /* 5075 */ + PTR sys_ni_syscall + PTR sys_getcwd + PTR sys_chdir + PTR sys_fchdir + PTR sys_rename /* 5080 */ + PTR sys_mkdir + PTR sys_rmdir + PTR sys_ni_syscall + PTR sys_link + PTR sys_unlink /* 5085 */ + PTR sys_symlink + PTR sys_readlink + PTR sys_chmod + PTR sys_fchmod + PTR sys_chown /* 5090 */ + PTR sys_fchown + PTR sys_lchown + PTR sys_umask + PTR sys_gettimeofday + PTR sys_getrlimit /* 5095 */ + PTR sys_getrusage + PTR sys_sysinfo + PTR sys_times + PTR sys_ptrace + PTR sys_getuid /* 5100 */ + PTR sys_syslog + PTR sys_getgid + PTR sys_setuid + PTR sys_setgid + PTR sys_geteuid /* 5105 */ + PTR sys_getegid + PTR sys_setpgid + PTR sys_getppid + PTR sys_getpgrp + PTR sys_setsid /* 5110 */ + PTR sys_setreuid + PTR sys_setregid + PTR sys_getgroups + PTR sys_setgroups + PTR sys_setresuid /* 5115 */ + PTR sys_getresuid + PTR sys_setresgid + PTR sys_getresgid + PTR sys_getpgid + PTR sys_setfsuid /* 5120 */ + PTR sys_setfsgid + PTR sys_getsid + PTR sys_capget + PTR sys_capset + PTR sys_rt_sigpending /* 5125 */ + PTR sys_rt_sigtimedwait + PTR sys_rt_sigqueueinfo + PTR sys_rt_sigsuspend + PTR sys_sigaltstack + PTR sys_ni_syscall /* 5130 */ + PTR sys_mknod + PTR sys_personality + PTR sys_ni_syscall + PTR sys_ni_syscall /* statfs */ + PTR sys_ni_syscall /* 5135 */ + PTR sys_ni_syscall + PTR sys_getpriority + PTR sys_setpriority + PTR sys_sched_setparam + PTR sys_sched_getparam /* 5140 */ + PTR sys_sched_setscheduler + PTR sys_sched_getscheduler + PTR sys_sched_get_priority_max + PTR sys_sched_get_priority_min + PTR sys_sched_rr_get_interval /* 5145 */ + PTR sys_mlock + PTR sys_munlock + PTR sys_mlockall + PTR sys_munlockall + PTR sys_vhangup /* 5150 */ + PTR sys_pivot_root + PTR sys_ni_syscall + PTR sys_prctl + PTR sys_adjtimex + PTR sys_setrlimit /* 5155 */ + PTR sys_chroot + PTR sys_sync + PTR sys_acct + PTR sys_settimeofday + PTR sys_mount /* 5160 */ + PTR sys_umount + PTR sys_swapon + PTR sys_swapoff + PTR sys_reboot + PTR sys_sethostname /* 5165 */ + PTR sys_setdomainname + PTR sys_ni_syscall /* was create_module */ + PTR sys_init_module + PTR sys_delete_module + PTR sys_ni_syscall /* 5170, was get_kernel_syms */ + PTR sys_ni_syscall /* was query_module */ + PTR sys_quotactl + PTR sys_ni_syscall /* was nfsservctl */ + PTR sys_ni_syscall /* res. for getpmsg */ + PTR sys_ni_syscall /* 5175 for putpmsg */ + PTR sys_ni_syscall /* res. for afs_syscall */ + PTR sys_ni_syscall /* res. for security */ + PTR sys_gettid + PTR sys_readahead + PTR sys_setxattr /* 5180 */ + PTR sys_lsetxattr + PTR sys_fsetxattr + PTR sys_getxattr + PTR sys_lgetxattr + PTR sys_fgetxattr /* 5185 */ + PTR sys_listxattr + PTR sys_llistxattr + PTR sys_flistxattr + PTR sys_removexattr + PTR sys_lremovexattr /* 5190 */ + PTR sys_fremovexattr + PTR sys_tkill + PTR sys_ni_syscall + PTR sys_futex + PTR sys_sched_setaffinity /* 5195 */ + PTR sys_sched_getaffinity + PTR sys_ni_syscall + PTR sys_ni_syscall + PTR sys_ni_syscall + PTR sys_io_setup /* 5200 */ + PTR sys_io_destroy + PTR sys_io_getevents + PTR sys_io_submit + PTR sys_io_cancel + PTR sys_exit_group /* 5205 */ + PTR sys_lookup_dcookie + PTR sys_epoll_create + PTR sys_epoll_ctl + PTR sys_epoll_wait + PTR sys_remap_file_pages /* 5210 */ + PTR sys_rt_sigreturn + PTR sys_set_tid_address + PTR sys_restart_syscall + PTR sys_semtimedop + PTR sys_fadvise64_64 /* 5215 */ + PTR sys_timer_create + PTR sys_timer_settime + PTR sys_timer_gettime + PTR sys_timer_getoverrun + PTR sys_timer_delete /* 5220 */ + PTR sys_clock_settime + PTR sys_clock_gettime + PTR sys_clock_getres + PTR sys_clock_nanosleep + PTR sys_tgkill /* 5225 */ + PTR sys_ni_syscall /* utimes */ + PTR sys_mbind + PTR sys_get_mempolicy + PTR sys_set_mempolicy + PTR sys_mq_open /* 5230 */ + PTR sys_mq_unlink + PTR sys_mq_timedsend + PTR sys_mq_timedreceive + PTR sys_mq_notify + PTR sys_mq_getsetattr /* 5235 */ + PTR sys_ni_syscall /* sys_vserver */ + PTR sys_waitid + PTR sys_ni_syscall /* available, was setaltroot */ + PTR sys_add_key + PTR sys_request_key /* 5240 */ + PTR sys_keyctl + PTR sys_ni_syscall /* set_thread_area */ + PTR sys_inotify_init + PTR sys_inotify_add_watch + PTR sys_inotify_rm_watch /* 5245 */ + PTR sys_migrate_pages + PTR sys_ni_syscall + PTR sys_mkdirat + PTR sys_mknodat + PTR sys_fchownat /* 5250 */ + PTR sys_ni_syscall /* futimesat */ + PTR sys_ni_syscall /* newfstatat */ + PTR sys_unlinkat + PTR sys_renameat + PTR sys_linkat /* 5255 */ + PTR sys_symlinkat + PTR sys_readlinkat + PTR sys_fchmodat + PTR sys_faccessat + PTR sys_pselect6 /* 5260 */ + PTR sys_ppoll + PTR sys_unshare + PTR sys_splice + PTR sys_sync_file_range + PTR sys_tee /* 5265 */ + PTR sys_vmsplice + PTR sys_move_pages + PTR sys_set_robust_list + PTR sys_get_robust_list + PTR sys_kexec_load /* 5270 */ + PTR sys_getcpu + PTR sys_epoll_pwait + PTR sys_ioprio_set + PTR sys_ioprio_get + PTR sys_utimensat /* 5275 */ + PTR sys_ni_syscall /* signalfd */ + PTR sys_ni_syscall /* was timerfd */ + PTR sys_ni_syscall /* eventfd */ + PTR sys_fallocate + PTR sys_timerfd_create /* 5280 */ + PTR sys_timerfd_gettime + PTR sys_timerfd_settime + PTR sys_signalfd4 + PTR sys_eventfd2 + PTR sys_epoll_create1 /* 5285 */ + PTR sys_dup3 + PTR sys_pipe2 + PTR sys_inotify_init1 + PTR sys_preadv + PTR sys_pwritev /* 5290 */ + PTR sys_rt_tgsigqueueinfo + PTR sys_perf_event_open + PTR sys_accept4 + PTR sys_recvmmsg + PTR sys_fanotify_init /* 5295 */ + PTR sys_fanotify_mark + PTR sys_ni_syscall + PTR sys_name_to_handle_at + PTR sys_open_by_handle_at + PTR sys_clock_adjtime /* 5300 */ + PTR sys_syncfs + PTR sys_sendmmsg + PTR sys_setns + PTR sys_process_vm_readv + PTR sys_process_vm_writev /* 5305 */ + PTR sys_kcmp + PTR sys_finit_module + PTR sys_getdents64 + PTR sys_sched_setattr + PTR sys_sched_getattr /* 5310 */ + PTR sys_renameat2 + PTR sys_seccomp + PTR sys_getrandom + PTR sys_memfd_create + PTR sys_bpf /* 5315 */ + PTR sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier + PTR sys_mlock2 + PTR sys_copy_file_range /* 5320 */ + PTR sys_preadv2 + PTR sys_pwritev2 + PTR sys_pkey_mprotect + PTR sys_pkey_alloc + PTR sys_pkey_free /* 5325 */ + PTR sys_statx + PTR sys_rseq + PTR sys_io_pgetevents + PTR sys_set_user_tp +SYM_DATA_END(mips64_syscall_table) diff --git a/arch/loongarch/kernel/signal-common.h b/arch/loongarch/kernel/signal-common.h index 91e890eb3a00..a6598c3010a5 100644 --- a/arch/loongarch/kernel/signal-common.h +++ b/arch/loongarch/kernel/signal-common.h @@ -27,6 +27,7 @@ struct extctx_layout { struct _ctx_layout fpu; struct _ctx_layout lsx; struct _ctx_layout lasx; + struct _ctx_layout lbt; struct _ctx_layout end; }; @@ -55,5 +56,11 @@ extern asmlinkage int _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); extern asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +#if defined(CONFIG_CPU_HAS_LBT) +extern asmlinkage int +_save_scr_context(void __user *scr, void __user *eflags); +extern asmlinkage int +_restore_scr_context(void __user *scr, void __user *eflags); +#endif #endif /* __SIGNAL_COMMON_H */ diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 757c0ee61e92..7f758e0e3b56 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "signal-common.h" @@ -176,6 +177,38 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx) return err; } +#if defined(CONFIG_CPU_HAS_LBT) +static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx) +{ + int err = 0; + uint64_t __user *scr = (uint64_t *)&ctx->scr; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + err |= __put_user(current->thread.lbt.scr0, &scr[0]); + err |= __put_user(current->thread.lbt.scr1, &scr[1]); + err |= __put_user(current->thread.lbt.scr2, &scr[2]); + err |= __put_user(current->thread.lbt.scr3, &scr[3]); + err |= __put_user(current->thread.lbt.eflags, eflags); + + return err; +} + +static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx) +{ + int err = 0; + uint64_t __user *scr = (uint64_t *)&ctx->scr; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + err |= __get_user(current->thread.lbt.scr0, &scr[0]); + err |= __get_user(current->thread.lbt.scr1, &scr[1]); + err |= __get_user(current->thread.lbt.scr2, &scr[2]); + err |= __get_user(current->thread.lbt.scr3, &scr[3]); + err |= __get_user(current->thread.lbt.eflags, eflags); + + return err; +} +#endif + /* * Wrappers for the assembly _{save,restore}_fp_context functions. */ @@ -233,6 +266,24 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx) return _restore_lasx_context(regs, fcc, fcsr); } +#if defined(CONFIG_CPU_HAS_LBT) +static int save_hw_lbt_context(struct lbt_context __user *ctx) +{ + uint64_t __user *scr = (uint64_t *)&ctx->scr; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + return _save_scr_context(scr, eflags); +} + +static int restore_hw_lbt_context(struct lbt_context __user *ctx) +{ + uint64_t __user *scr = (uint64_t *)&ctx->scr; + uint32_t __user *eflags = (uint32_t *)&ctx->eflags; + + return _restore_scr_context(scr, eflags); +} +#endif + /* * Helper routines */ @@ -461,6 +512,67 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx) return err ?: sig; } +#if defined(CONFIG_CPU_HAS_LBT) +static int protected_save_lbt_context(struct extctx_layout *extctx) +{ + int err = 0; + struct sctx_info __user *info = extctx->lbt.addr; + struct lbt_context __user *lbt_ctx = (struct lbt_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *scr = (uint64_t *)&lbt_ctx->scr; + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; + + while (1) { + lock_fpu_owner(); + if (is_lbt_owner()) + err = save_hw_lbt_context(lbt_ctx); + else + err = copy_lbt_to_sigcontext(lbt_ctx); + unlock_fpu_owner(); + + err |= __put_user(LBT_CTX_MAGIC, &info->magic); + err |= __put_user(extctx->lbt.size, &info->size); + + if (likely(!err)) + break; + /* Touch the LBT context and try again */ + err = __put_user(0, &scr[0]) | __put_user(0, eflags); + + if (err) + return err; + } + + return err; +} + +static int protected_restore_lbt_context(struct extctx_layout *extctx) +{ + int err = 0, tmp; + struct sctx_info __user *info = extctx->lbt.addr; + struct lbt_context __user *lbt_ctx = (struct lbt_context *)get_ctx_through_ctxinfo(info); + uint64_t __user *scr = (uint64_t *)&lbt_ctx->scr; + uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags; + + while (1) { + lock_fpu_owner(); + if (is_lbt_owner()) + err = restore_hw_lbt_context(lbt_ctx); + else + err = copy_lbt_from_sigcontext(lbt_ctx); + unlock_fpu_owner(); + + if (likely(!err)) + break; + /* Touch the LBT context and try again */ + err = __get_user(tmp, &scr[0]) | __get_user(tmp, eflags); + + if (err) + return err; + } + + return err; +} +#endif + static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, struct extctx_layout *extctx) { @@ -481,6 +593,10 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, else if (extctx->fpu.addr) err |= protected_save_fpu_context(extctx); +#if defined(CONFIG_CPU_HAS_LBT) + if (extctx->lbt.addr) + err |= protected_save_lbt_context(extctx); +#endif /* Set the "end" magic */ info = (struct sctx_info *)extctx->end.addr; err |= __put_user(0, &info->magic); @@ -545,6 +661,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout * extctx->lasx.addr = info; break; + case LBT_CTX_MAGIC: + if (size < (sizeof(struct sctx_info) + + sizeof(struct lbt_context))) + goto invalid; + extctx->lbt.addr = info; + break; + default: goto invalid; } @@ -597,6 +720,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc else if (extctx.fpu.addr) err |= protected_restore_fpu_context(&extctx); +#if defined(CONFIG_CPU_HAS_LBT) + if (extctx.lbt.addr) + err |= protected_restore_lbt_context(&extctx); +#endif + bad: return err; } @@ -661,6 +789,12 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp); } +#if defined(CONFIG_CPU_HAS_LBT) + if (cpu_has_lbt && thread_lbt_context_live()) + new_sp = extframe_alloc(extctx, &extctx->lbt, + sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp); +#endif + return new_sp; } @@ -762,6 +896,10 @@ static int setup_rt_frame(void *sig_return, struct ksignal *ksig, * c0_era point to the signal handler, $r3 (sp) points to * the struct rt_sigframe. */ +#ifdef CONFIG_CPU_HAS_LBT + if (current_thread_info()->tp_value) + regs->regs[2] = current_thread_info()->tp_value; +#endif regs->regs[4] = ksig->sig; regs->regs[5] = (unsigned long) &frame->rs_info; regs->regs[6] = (unsigned long) &frame->rs_uctx; diff --git a/arch/loongarch/kernel/trans_i386_syscalls.c b/arch/loongarch/kernel/trans_i386_syscalls.c new file mode 100644 index 000000000000..a71313d72367 --- /dev/null +++ b/arch/loongarch/kernel/trans_i386_syscalls.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Authors: Hanlu Li + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include + +typedef struct { + uint32_t sig[2]; +} latx_sigset_t; + +typedef uint32_t latx_size_t; + +static inline void latx_sig_setmask(sigset_t *blocked, old_sigset_t set) +{ + memcpy(blocked->sig, &set, sizeof(set)); +} + +/* This is for latx-i386 */ +SYSCALL_DEFINE4(latx_rt_sigprocmask, int, how, latx_sigset_t __user *, nset, + latx_sigset_t __user *, oset, latx_size_t, sigsetsize) +{ + old_sigset_t old_set, new_set; + sigset_t new_blocked; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(latx_sigset_t)) + return -EINVAL; + + old_set = current->blocked.sig[0]; + + if (nset) { + if (copy_from_user(&new_set, nset, sizeof(latx_sigset_t))) + return -EFAULT; + new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); + + new_blocked = current->blocked; + + switch (how) { + case SIG_BLOCK: + sigaddsetmask(&new_blocked, new_set); + break; + case SIG_UNBLOCK: + sigdelsetmask(&new_blocked, new_set); + break; + case SIG_SETMASK: + latx_sig_setmask(&new_blocked, new_set); + break; + default: + return -EINVAL; + } + set_current_blocked(&new_blocked); + + if (copy_to_user(nset, &(current->blocked), + sizeof(latx_sigset_t))) + return -EFAULT; + } + + if (oset) { + if (copy_to_user(oset, &old_set, sizeof(latx_sigset_t))) + return -EFAULT; + } + + return 0; +} diff --git a/arch/loongarch/kernel/trans_mips_syscalls.c b/arch/loongarch/kernel/trans_mips_syscalls.c new file mode 100644 index 000000000000..ea8d319edbbf --- /dev/null +++ b/arch/loongarch/kernel/trans_mips_syscalls.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Loongson Technology Corporation Limited + * Authors: Hanlu Li + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include + +static const unsigned int mips_errno[] = { + [EDEADLK] = M_EDEADLK, + [ENAMETOOLONG] = M_ENAMETOOLONG, + [ENOLCK] = M_ENOLCK, + [ENOSYS] = M_ENOSYS, + [ENOTEMPTY] = M_ENOTEMPTY, + [ELOOP] = M_ELOOP, + [ENOMSG] = M_ENOMSG, + [EIDRM] = M_EIDRM, + [ECHRNG] = M_ECHRNG, + [EL2NSYNC] = M_EL2NSYNC, + [EL3HLT] = M_EL3HLT, + [EL3RST] = M_EL3RST, + [ELNRNG] = M_ELNRNG, + [EUNATCH] = M_EUNATCH, + [ENOCSI] = M_ENOCSI, + [EL2HLT] = M_EL2HLT, + [EBADE] = M_EBADE, + [EBADR] = M_EBADR, + [EXFULL] = M_EXFULL, + [ENOANO] = M_ENOANO, + [EBADRQC] = M_EBADRQC, + [EBADSLT] = M_EBADSLT, + [EBFONT] = M_EBFONT, + [ENOSTR] = M_ENOSTR, + [ENODATA] = M_ENODATA, + [ETIME] = M_ETIME, + [ENOSR] = M_ENOSR, + [ENONET] = M_ENONET, + [ENOPKG] = M_ENOPKG, + [EREMOTE] = M_EREMOTE, + [ENOLINK] = M_ENOLINK, + [EADV] = M_EADV, + [ESRMNT] = M_ESRMNT, + [ECOMM] = M_ECOMM, + [EPROTO] = M_EPROTO, + [EMULTIHOP] = M_EMULTIHOP, + [EDOTDOT] = M_EDOTDOT, + [EBADMSG] = M_EBADMSG, + [EOVERFLOW] = M_EOVERFLOW, + [ENOTUNIQ] = M_ENOTUNIQ, + [EBADFD] = M_EBADFD, + [EREMCHG] = M_EREMCHG, + [ELIBACC] = M_ELIBACC, + [ELIBBAD] = M_ELIBBAD, + [ELIBSCN] = M_ELIBSCN, + [ELIBMAX] = M_ELIBMAX, + [ELIBEXEC] = M_ELIBEXEC, + [EILSEQ] = M_EILSEQ, + [ERESTART] = M_ERESTART, + [ESTRPIPE] = M_ESTRPIPE, + [EUSERS] = M_EUSERS, + [ENOTSOCK] = M_ENOTSOCK, + [EDESTADDRREQ] = M_EDESTADDRREQ, + [EMSGSIZE] = M_EMSGSIZE, + [EPROTOTYPE] = M_EPROTOTYPE, + [ENOPROTOOPT] = M_ENOPROTOOPT, + [EPROTONOSUPPORT] = M_EPROTONOSUPPORT, + [ESOCKTNOSUPPORT] = M_ESOCKTNOSUPPORT, + [EOPNOTSUPP] = M_EOPNOTSUPP, + [EPFNOSUPPORT] = M_EPFNOSUPPORT, + [EAFNOSUPPORT] = M_EAFNOSUPPORT, + [EADDRINUSE] = M_EADDRINUSE, + [EADDRNOTAVAIL] = M_EADDRNOTAVAIL, + [ENETDOWN] = M_ENETDOWN, + [ENETUNREACH] = M_ENETUNREACH, + [ENETRESET] = M_ENETRESET, + [ECONNABORTED] = M_ECONNABORTED, + [ECONNRESET] = M_ECONNRESET, + [ENOBUFS] = M_ENOBUFS, + [EISCONN] = M_EISCONN, + [ENOTCONN] = M_ENOTCONN, + [ESHUTDOWN] = M_ESHUTDOWN, + [ETOOMANYREFS] = M_ETOOMANYREFS, + [ETIMEDOUT] = M_ETIMEDOUT, + [ECONNREFUSED] = M_ECONNREFUSED, + [EHOSTDOWN] = M_EHOSTDOWN, + [EHOSTUNREACH] = M_EHOSTUNREACH, + [EALREADY] = M_EALREADY, + [EINPROGRESS] = M_EINPROGRESS, + [ESTALE] = M_ESTALE, + [EUCLEAN] = M_EUCLEAN, + [ENOTNAM] = M_ENOTNAM, + [ENAVAIL] = M_ENAVAIL, + [EISNAM] = M_EISNAM, + [EREMOTEIO] = M_EREMOTEIO, + [EDQUOT] = M_EDQUOT, + [ENOMEDIUM] = M_ENOMEDIUM, + [EMEDIUMTYPE] = M_EMEDIUMTYPE, + [ECANCELED] = M_ECANCELED, + [ENOKEY] = M_ENOKEY, + [EKEYEXPIRED] = M_EKEYEXPIRED, + [EKEYREVOKED] = M_EKEYREVOKED, + [EKEYREJECTED] = M_EKEYREJECTED, + [EOWNERDEAD] = M_EOWNERDEAD, + [ENOTRECOVERABLE] = M_ENOTRECOVERABLE, + [ERFKILL] = M_ERFKILL, + [EHWPOISON] = M_EHWPOISON, +}; + +asmlinkage int trans_mips_errno(int errno) +{ + if (errno >= EDEADLK && errno <= EHWPOISON) + return mips_errno[errno]; + return errno; +} + +SYSCALL_DEFINE0(set_user_tp) +{ + current_thread_info()->tp_value = task_pt_regs(current)->regs[2]; + return 0; +} diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 013f0f90f5fb..ce0e18d7335b 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "access-helper.h" @@ -978,12 +979,38 @@ asmlinkage void noinstr do_lasx(struct pt_regs *regs) irqentry_exit(regs, state); } +static void init_restore_lbt(void) +{ + if (!thread_lbt_context_live()) { + /* First lbt context user */ + init_lbt(); + set_thread_flag(TIF_LBT_CTX_LIVE); + } else { + /* Enable and restore */ + own_lbt_inatomic(1); + } +} + asmlinkage void noinstr do_lbt(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); - local_irq_enable(); - force_sig(SIGILL); - local_irq_disable(); + + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_enable(); + + if (!cpu_has_lbt) { + force_sig(SIGILL); + goto out; + } + + preempt_disable(); + init_restore_lbt(); + preempt_enable(); + +out: + if (regs->csr_prmd & CSR_PRMD_PIE) + local_irq_disable(); + irqentry_exit(regs, state); } -- Gitee From 0d1f2f36f5d514a70b2231d732d9b6321f8ab5eb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 31 Dec 2020 15:13:33 +0800 Subject: [PATCH 238/241] LoongArch: Add KVM virtualization support Change-Id: Id11c9b906bd7a4769f238d128770fba376ba22ef Signed-off-by: Huacai Chen --- arch/loongarch/Kbuild | 1 + arch/loongarch/Kconfig | 42 + arch/loongarch/configs/loongson3_defconfig | 4 + arch/loongarch/include/asm/Kbuild | 1 - arch/loongarch/include/asm/cpu-features.h | 31 + arch/loongarch/include/asm/cpu-info.h | 13 + arch/loongarch/include/asm/kvm_host.h | 356 +++ arch/loongarch/include/asm/kvm_para.h | 199 ++ arch/loongarch/include/asm/kvm_types.h | 7 + arch/loongarch/include/asm/loongarchregs.h | 191 +- arch/loongarch/include/asm/paravirt.h | 97 + arch/loongarch/include/asm/qspinlock.h | 32 + .../include/asm/qspinlock_paravirt.h | 18 + arch/loongarch/include/asm/smp.h | 2 +- arch/loongarch/include/asm/spinlock.h | 3 + arch/loongarch/include/uapi/asm/Kbuild | 1 - arch/loongarch/include/uapi/asm/kvm.h | 273 ++ arch/loongarch/include/uapi/asm/kvm_para.h | 8 + arch/loongarch/kernel/Makefile | 3 + arch/loongarch/kernel/asm-offsets.c | 34 + arch/loongarch/kernel/cpu-probe.c | 53 +- arch/loongarch/kernel/paravirt-spinlocks.c | 48 + arch/loongarch/kernel/paravirt.c | 319 +++ arch/loongarch/kernel/smp.c | 13 +- arch/loongarch/kernel/time.c | 3 + arch/loongarch/kernel/watch.c | 2 + arch/loongarch/kvm/Kconfig | 41 + arch/loongarch/kvm/Makefile | 22 + arch/loongarch/kvm/csr.c | 678 +++++ arch/loongarch/kvm/emulate.c | 335 +++ arch/loongarch/kvm/entry.S | 304 +++ arch/loongarch/kvm/exit.c | 521 ++++ arch/loongarch/kvm/fpu.c | 94 + arch/loongarch/kvm/hypcall.c | 130 + arch/loongarch/kvm/intc/irqchip-debug.c | 49 + arch/loongarch/kvm/intc/ls3a_ext_irq.c | 922 +++++++ arch/loongarch/kvm/intc/ls3a_ext_irq.h | 129 + arch/loongarch/kvm/intc/ls3a_ipi.c | 292 +++ arch/loongarch/kvm/intc/ls3a_ipi.h | 77 + arch/loongarch/kvm/intc/ls7a_irq.c | 633 +++++ arch/loongarch/kvm/intc/ls7a_irq.h | 112 + arch/loongarch/kvm/interrupt.c | 133 + arch/loongarch/kvm/irqfd.c | 124 + arch/loongarch/kvm/kvm_compat.h | 762 ++++++ arch/loongarch/kvm/kvmcpu.h | 126 + arch/loongarch/kvm/kvmcsr.h | 115 + arch/loongarch/kvm/loongarch.c | 2208 +++++++++++++++++ arch/loongarch/kvm/ls_irq.h | 15 + arch/loongarch/kvm/mmu.c | 1316 ++++++++++ arch/loongarch/kvm/timer.c | 274 ++ arch/loongarch/kvm/trace.h | 201 ++ arch/loongarch/mm/hugetlbpage.c | 2 + drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/irqchip/irq-loongson-eiointc.c | 33 +- include/uapi/linux/kvm.h | 29 + virt/kvm/irqchip.c | 4 + 56 files changed, 11415 insertions(+), 22 deletions(-) create mode 100644 arch/loongarch/include/asm/kvm_host.h create mode 100644 arch/loongarch/include/asm/kvm_para.h create mode 100644 arch/loongarch/include/asm/kvm_types.h create mode 100644 arch/loongarch/include/asm/paravirt.h create mode 100644 arch/loongarch/include/asm/qspinlock_paravirt.h create mode 100644 arch/loongarch/include/uapi/asm/kvm.h create mode 100644 arch/loongarch/include/uapi/asm/kvm_para.h create mode 100644 arch/loongarch/kernel/paravirt-spinlocks.c create mode 100644 arch/loongarch/kernel/paravirt.c create mode 100644 arch/loongarch/kvm/Kconfig create mode 100644 arch/loongarch/kvm/Makefile create mode 100644 arch/loongarch/kvm/csr.c create mode 100644 arch/loongarch/kvm/emulate.c create mode 100644 arch/loongarch/kvm/entry.S create mode 100644 arch/loongarch/kvm/exit.c create mode 100644 arch/loongarch/kvm/fpu.c create mode 100644 arch/loongarch/kvm/hypcall.c create mode 100644 arch/loongarch/kvm/intc/irqchip-debug.c create mode 100644 arch/loongarch/kvm/intc/ls3a_ext_irq.c create mode 100644 arch/loongarch/kvm/intc/ls3a_ext_irq.h create mode 100644 arch/loongarch/kvm/intc/ls3a_ipi.c create mode 100644 arch/loongarch/kvm/intc/ls3a_ipi.h create mode 100644 arch/loongarch/kvm/intc/ls7a_irq.c create mode 100644 arch/loongarch/kvm/intc/ls7a_irq.h create mode 100644 arch/loongarch/kvm/interrupt.c create mode 100644 arch/loongarch/kvm/irqfd.c create mode 100644 arch/loongarch/kvm/kvm_compat.h create mode 100644 arch/loongarch/kvm/kvmcpu.h create mode 100644 arch/loongarch/kvm/kvmcsr.h create mode 100644 arch/loongarch/kvm/loongarch.c create mode 100644 arch/loongarch/kvm/ls_irq.h create mode 100644 arch/loongarch/kvm/mmu.c create mode 100644 arch/loongarch/kvm/timer.c create mode 100644 arch/loongarch/kvm/trace.h diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index be21050cf9df..7214ae22f0ec 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -21,4 +21,5 @@ obj-y += mm/ obj-y += net/ obj-y += vdso/ +obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_BUILTIN_DTB) += boot/dts/ diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index e806bfe338df..64cfb86477ee 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -226,6 +226,7 @@ config CPU_LOONGSON64 select CPU_SUPPORTS_LASX select GPIOLIB select SWIOTLB + select HAVE_KVM select ARCH_SUPPORTS_ATOMIC_RMW help The Loongson 64-bit processor implements the LoongArch64 (the 64-bit @@ -850,3 +851,44 @@ source "drivers/cpufreq/Kconfig" endmenu source "drivers/firmware/Kconfig" +source "arch/loongarch/kvm/Kconfig" + +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on PARAVIRT && SMP + help + Paravirtualized spinlocks allow a pvops backend to replace the + spinlock implementation with something virtualization-friendly + (for example, block the virtual CPU rather than spinning). + + It has a minimal impact on native kernels and gives a nice performance + benefit on paravirtualized KVM kernels. + + If you are unsure how to answer this question, answer Y. + +config QUEUED_LOCK_STAT + bool "Paravirt queued spinlock statistics" + depends on PARAVIRT_SPINLOCKS && DEBUG_FS + help + Enable the collection of statistical data on the slowpath + behavior of paravirtualized queued spinlocks and report + them on debugfs. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index aa3b77f59c63..9b33575b7d93 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -70,6 +70,10 @@ CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y CONFIG_EFI_BOOTLOADER_CONTROL=m CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EFI_TEST=m +CONFIG_VIRTUALIZATION=y +CONFIG_KVM=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_VHOST_NET=m CONFIG_VHOST_SCSI=m CONFIG_VHOST_VSOCK=m diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index b032dcabfa11..5981af719607 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -26,4 +26,3 @@ generic-y += param.h generic-y += posix_types.h generic-y += resource.h generic-y += seccomp.h -generic-y += kvm_para.h diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index 03b9cf1453a6..cf96d843cd97 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -62,4 +62,35 @@ #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) +#define cpu_has_matc_guest (cpu_data[0].guest_cfg & (1 << 0)) +#define cpu_has_matc_root (cpu_data[0].guest_cfg & (1 << 1)) +#define cpu_has_matc_nest (cpu_data[0].guest_cfg & (1 << 2)) +#define cpu_has_sitp (cpu_data[0].guest_cfg & (1 << 6)) +#define cpu_has_titp (cpu_data[0].guest_cfg & (1 << 8)) +#define cpu_has_toep (cpu_data[0].guest_cfg & (1 << 10)) +#define cpu_has_topp (cpu_data[0].guest_cfg & (1 << 12)) +#define cpu_has_torup (cpu_data[0].guest_cfg & (1 << 14)) +#define cpu_has_gcip_all (cpu_data[0].guest_cfg & (1 << 16)) +#define cpu_has_gcip_hit (cpu_data[0].guest_cfg & (1 << 17)) +#define cpu_has_gcip_secure (cpu_data[0].guest_cfg & (1 << 18)) + +/* + * Guest capabilities + */ +#define cpu_guest_has_conf1 (cpu_data[0].guest.conf & (1 << 1)) +#define cpu_guest_has_conf2 (cpu_data[0].guest.conf & (1 << 2)) +#define cpu_guest_has_conf3 (cpu_data[0].guest.conf & (1 << 3)) +#define cpu_guest_has_fpu (cpu_data[0].guest.options & LOONGARCH_CPU_FPU) +#define cpu_guest_has_perf (cpu_data[0].guest.options & LOONGARCH_CPU_PMP) +#define cpu_guest_has_watch (cpu_data[0].guest.options & LOONGARCH_CPU_WATCH) +#define cpu_guest_has_lsx (cpu_data[0].guest.ases & LOONGARCH_ASE_LSX) +#define cpu_guest_has_kscr(n) (cpu_data[0].guest.kscratch_mask & (1u << (n))) + +/* + * Guest dynamic capabilities + */ +#define cpu_guest_has_dyn_fpu (cpu_data[0].guest.options_dyn & LOONGARCH_CPU_FPU) +#define cpu_guest_has_dyn_perf (cpu_data[0].guest.options_dyn & LOONGARCH_CPU_PMP) +#define cpu_guest_has_dyn_lsx (cpu_data[0].guest.ases_dyn & LOONGARCH_ASE_LSX) + #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index 8757ba20b054..2e3bd40c5e4d 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -32,6 +32,15 @@ struct cache_desc { #define CACHE_LEVEL_MAX 3 #define CACHE_LEAVES_MAX 6 +struct guest_info { + unsigned long ases; + unsigned long ases_dyn; + unsigned long long options; + unsigned long long options_dyn; + u8 conf; + unsigned int kscratch_mask; +}; + struct cpuinfo_loongarch { u64 asid_cache; unsigned long asid_mask; @@ -62,6 +71,10 @@ struct cpuinfo_loongarch { unsigned int watch_dreg_count; /* Number data breakpoints */ unsigned int watch_ireg_count; /* Number instruction breakpoints */ unsigned int watch_reg_use_cnt; /* min(NUM_WATCH_REGS, watch_dreg_count + watch_ireg_count), Usable by ptrace */ + + /* VZ & Guest features */ + struct guest_info guest; + unsigned long guest_cfg; } __attribute__((aligned(SMP_CACHE_BYTES))); extern struct cpuinfo_loongarch cpu_data[]; diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h new file mode 100644 index 000000000000..fcc5c1e2b2ad --- /dev/null +++ b/arch/loongarch/include/asm/kvm_host.h @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __LOONGARCH_KVM_HOST_H__ +#define __LOONGARCH_KVM_HOST_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define __KVM_HAVE_ARCH_VCPU_DEBUGFS + +/* Loongarch KVM register ids */ +#define LOONGARCH_CSR_32(_R, _S) \ + (KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U32 | (8 * (_R) + (_S))) + +#define LOONGARCH_CSR_64(_R, _S) \ + (KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | (8 * (_R) + (_S))) + +#define KVM_IOC_CSRID(id) LOONGARCH_CSR_64(id, 0) +#define KVM_GET_IOC_CSRIDX(id) ((id & KVM_CSR_IDX_MASK) >> 3) + +#define LOONGSON_VIRT_REG_BASE 0x1f000000 +#define KVM_MAX_VCPUS 256 +#define KVM_USER_MEM_SLOTS 256 +/* memory slots that does not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 0 + +#define KVM_HALT_POLL_NS_DEFAULT 500000 + +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(1) +#define KVM_REQ_EVENT KVM_ARCH_REQ(4) + +#define KVM_INVALID_ADDR 0xdeadbeef +#define KVM_HVA_ERR_BAD (-1UL) +#define KVM_HVA_ERR_RO_BAD (-2UL) +static inline bool kvm_is_error_hva(unsigned long addr) +{ + return IS_ERR_VALUE(addr); +} + +struct kvm_vm_stat { + ulong remote_tlb_flush; + u64 vm_ioctl_irq_line; + u64 ls7a_ioapic_update; + u64 ls7a_ioapic_set_irq; + u64 ioapic_reg_write; + u64 ioapic_reg_read; + u64 set_ls7a_ioapic; + u64 get_ls7a_ioapic; + u64 set_ls3a_ext_irq; + u64 get_ls3a_ext_irq; + u64 trigger_ls3a_ext_irq; + u64 pip_read_exits; + u64 pip_write_exits; + u64 ls7a_msi_irq; +}; +struct kvm_vcpu_stat { + u64 excep_exits[EXCCODE_INT_START]; + u64 idle_exits; + u64 signal_exits; + u64 int_exits; + u64 rdcsr_cpu_feature_exits; + u64 rdcsr_misc_func_exits; + u64 rdcsr_ipi_access_exits; + u64 cpucfg_exits; + u64 huge_dec_exits; + u64 huge_thp_exits; + u64 huge_adjust_exits; + u64 huge_set_exits; + u64 huge_merge_exits; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; + u64 halt_poll_invalid; + u64 halt_wakeup; +}; + +#define KVM_MEMSLOT_DISABLE_THP (1UL << 17) +struct kvm_arch_memory_slot { + unsigned int flags; +}; + +enum { + IOCSR_FEATURES, + IOCSR_VENDOR, + IOCSR_CPUNAME, + IOCSR_NODECNT, + IOCSR_MISC_FUNC, + IOCSR_MAX +}; + +struct kvm_context { + unsigned long gid_mask; + unsigned long gid_ver_mask; + unsigned long gid_fisrt_ver; + unsigned long vpid_cache; + struct kvm_vcpu *last_vcpu; +}; + +struct kvm_arch { + /* Guest physical mm */ + struct mm_struct gpa_mm; + /* Mask of CPUs needing GPA ASID flush */ + cpumask_t asid_flush_mask; + + unsigned char online_vcpus; + unsigned char is_migrate; + s64 stablecounter_gftoffset; + u32 cpucfg_lasx; + struct ls7a_kvm_ioapic *v_ioapic; + struct ls3a_kvm_ipi *v_gipi; + struct ls3a_kvm_routerirq *v_routerirq; + struct ls3a_kvm_extirq *v_extirq; + spinlock_t iocsr_lock; + struct kvm_iocsr_entry iocsr[IOCSR_MAX]; + struct kvm_cpucfg cpucfgs; + struct kvm_context __percpu *vmcs; +}; + + +#define LOONGARCH_CSRS 0x100 +#define CSR_UCWIN_BASE 0x100 +#define CSR_UCWIN_SIZE 0x10 +#define CSR_DMWIN_BASE 0x180 +#define CSR_DMWIN_SIZE 0x4 +#define CSR_PERF_BASE 0x200 +#define CSR_PERF_SIZE 0x8 +#define CSR_DEBUG_BASE 0x500 +#define CSR_DEBUG_SIZE 0x3 +#define CSR_ALL_SIZE 0x800 + +struct loongarch_csrs { + unsigned long csrs[CSR_ALL_SIZE]; +}; + +/* Resume Flags */ +#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ +#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ + +#define RESUME_GUEST 0 +#define RESUME_GUEST_DR RESUME_FLAG_DR +#define RESUME_HOST RESUME_FLAG_HOST + +enum emulation_result { + EMULATE_DONE, /* no further processing */ + EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ + EMULATE_FAIL, /* can't emulate this instruction */ + EMULATE_WAIT, /* WAIT instruction */ + EMULATE_PRIV_FAIL, + EMULATE_EXCEPT, /* A guest exception has been generated */ + EMULATE_PV_HYPERCALL, /* HYPCALL instruction */ + EMULATE_DEBUG, /* Emulate guest kernel debug */ + EMULATE_DO_IOCSR, /* handle IOCSR request */ +}; + +#define KVM_LARCH_FPU (0x1 << 0) +#define KVM_LARCH_LSX (0x1 << 1) +#define KVM_LARCH_LASX (0x1 << 2) +#define KVM_LARCH_FP_ALL (KVM_LARCH_FPU | KVM_LARCH_LSX | KVM_LARCH_LASX) +#define KVM_LARCH_DATA_HWBP (0x1 << 3) +#define KVM_LARCH_INST_HWBP (0x1 << 4) +#define KVM_LARCH_HWBP (KVM_LARCH_DATA_HWBP | KVM_LARCH_INST_HWBP) +#define KVM_LARCH_RESET (0x1 << 5) +#define KVM_LARCH_PERF (0x1 << 6) +#define KVM_LARCH_LBT (0x1 << 7) + +struct kvm_vcpu_arch { + unsigned long guest_eentry; + unsigned long host_eentry; + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + + /* Host registers preserved across guest mode execution */ + unsigned long host_stack; + unsigned long host_gp; + unsigned long host_pgd; + unsigned long host_pgdhi; + unsigned long host_entryhi; + + /* Host CSR registers used when handling exits from guest */ + unsigned long badv; + unsigned long host_estat; + unsigned long badi; + unsigned long host_ecfg; + unsigned long host_percpu; + + u32 is_hypcall; + /* GPRS */ + unsigned long gprs[32]; + unsigned long pc; + + /* FPU State */ + struct loongarch_fpu fpu FPU_ALIGN; + struct loongarch_lbt lbt; + /* Which auxiliary state is loaded (KVM_LOONGARCH_AUX_*) */ + unsigned int aux_inuse; + + /* CSR State */ + struct loongarch_csrs *csr; + + /* GPR used as IO source/target */ + u32 io_gpr; + + struct hrtimer swtimer; + /* Count timer control KVM register */ + u32 count_ctl; + + /* Bitmask of exceptions that are pending */ + unsigned long irq_pending; + /* Bitmask of pending exceptions to be cleared */ + unsigned long irq_clear; + + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + + /* vcpu's vpid is different on each host cpu in an smp system */ + u64 vpid[NR_CPUS]; + + /* Period of stable timer tick in ns */ + u64 timer_period; + /* Frequency of stable timer in Hz */ + u64 timer_mhz; + /* Stable bias from the raw time */ + u64 timer_bias; + /* Dynamic nanosecond bias (multiple of timer_period) to avoid overflow */ + s64 timer_dyn_bias; + /* Save ktime */ + ktime_t stable_ktime_saved; + + u64 core_ext_ioisr[4]; + + /* Last CPU the VCPU state was loaded on */ + int last_sched_cpu; + /* Last CPU the VCPU actually executed guest code on */ + int last_exec_cpu; + + u8 fpu_enabled; + u8 lsx_enabled; + /* paravirt steal time */ + struct { + u64 guest_addr; + u64 last_steal; + struct gfn_to_pfn_cache cache; + } st; + + /* pv related host specific info */ + struct { + bool pv_unhalted; + } pv; + + struct kvm_guest_debug_arch guest_debug; + /* save host pmu csr */ + u64 perf_ctrl[4]; + u64 perf_cntr[4]; + +}; + +static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg) +{ + return csr->csrs[reg]; +} + +static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, \ + unsigned long val) +{ + csr->csrs[reg] = val; +} + +/* Helpers */ +static inline bool _kvm_guest_has_fpu(struct kvm_vcpu_arch *arch) +{ + return cpu_has_fpu && arch->fpu_enabled; +} + + +static inline bool _kvm_guest_has_lsx(struct kvm_vcpu_arch *arch) +{ + return cpu_has_lsx && arch->lsx_enabled; +} + +bool _kvm_guest_has_lasx(struct kvm_vcpu *vcpu); +void _kvm_init_fault(void); + +/* Debug: dump vcpu state */ +int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); + +/* MMU handling */ +int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, bool write); +void kvm_flush_tlb_all(void); +void _kvm_destroy_mm(struct kvm *kvm); +pgd_t *kvm_pgd_alloc(void); +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); + +enum _kvm_fault_result { + KVM_LOONGARCH_MAPPED = 0, + KVM_LOONGARCH_GVA, + KVM_LOONGARCH_GPA, + KVM_LOONGARCH_TLB, + KVM_LOONGARCH_TLBINV, + KVM_LOONGARCH_TLBMOD, +}; + +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end, bool blockable); +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + +static inline void update_pc(struct kvm_vcpu_arch *arch) +{ + arch->pc += 4; +} + +/** + * kvm_is_ifetch_fault() - Find whether a TLBL exception is due to ifetch fault. + * @vcpu: Virtual CPU. + * + * Returns: Whether the TLBL exception was likely due to an instruction + * fetch fault rather than a data load fault. + */ +static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *arch) +{ + if (arch->pc == arch->badv) + return true; + + return false; +} + +/* Misc */ +static inline void kvm_arch_hardware_unsetup(void) {} +static inline void kvm_arch_sync_events(struct kvm *kvm) {} +static inline void kvm_arch_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} +static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} + +extern int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern void kvm_exception_entry(void); +#endif /* __LOONGARCH_KVM_HOST_H__ */ diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h new file mode 100644 index 000000000000..a14e15b469de --- /dev/null +++ b/arch/loongarch/include/asm/kvm_para.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_KVM_PARA_H +#define _ASM_LOONGARCH_KVM_PARA_H + +#include + +#define KVM_HYPERCALL ".word 0x002b8000" +/* + * Hypcall code field + */ +#define KVM_HC_CODE_SERIVCE 0x0 +#define KVM_HC_CODE_SWDBG 0x5 +/* + * function id + * 0x00000 ~ 0xfffff Standard Hypervisor Calls + */ +#define KVM_HC_FUNC_FEATURE 0x0 +#define KVM_HC_FUNC_NOTIFY 0x1 +#define KVM_HC_FUNC_IPI 0x2 +/* + * LoongArch support PV feature list + */ +#define KVM_FEATURE_STEAL_TIME 0 +#define KVM_FEATURE_MULTI_IPI 1 +#define KVM_FEATURE_PARAVIRT_SPINLOCK 2 +/* + * LoongArch hypcall return code + */ +#define KVM_RET_SUC 1 +#define KVM_RET_NOT_SUPPORTED -1 + +#define KVM_VCPU_PREEMPTED (1 << 0) + +/* + * Hypercalls interface for KVM. + * + * a0: function identifier + * a1-a6: args + * Return value will be placed in v0. + * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + */ +static inline long kvm_hypercall0(u64 fid) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "r" (fun) + : "memory" + ); + + return ret; +} + +static inline long kvm_hypercall1(u64 fid, unsigned long arg0) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "r" (fun), "r" (a1) + : "memory" + ); + + return ret; +} + +static inline long kvm_hypercall2(u64 fid, + unsigned long arg0, unsigned long arg1) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2) + : "memory" + ); + + return ret; +} + +static inline long kvm_hypercall3(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "r" (fun), "r" (a1), "r" (a2), "r" (a3) + : "memory" + ); + + return ret; +} + +static inline long kvm_hypercall4(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "i"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4) + : "memory" + ); + + return ret; +} + +static inline long kvm_hypercall5(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + register unsigned long a5 asm("a5") = arg4; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "i"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5) + : "memory" + ); + + return ret; +} + +static inline long kvm_hypercall6(u64 fid, + unsigned long arg0, unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) +{ + register long ret asm("v0"); + register unsigned long fun asm("a0") = fid; + register unsigned long a1 asm("a1") = arg0; + register unsigned long a2 asm("a2") = arg1; + register unsigned long a3 asm("a3") = arg2; + register unsigned long a4 asm("a4") = arg3; + register unsigned long a5 asm("a5") = arg4; + register unsigned long a6 asm("a6") = arg5; + + __asm__ __volatile__( + KVM_HYPERCALL + : "=r" (ret) + : "i"(fun), "r" (a1), "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6) + : "memory" + ); + + return ret; +} + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +void __init kvm_spinlock_init(void); +#else /* !CONFIG_PARAVIRT_SPINLOCKS */ +static inline void kvm_spinlock_init(void) +{ +} +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#endif /* _ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/include/asm/kvm_types.h b/arch/loongarch/include/asm/kvm_types.h new file mode 100644 index 000000000000..9cdc260c3579 --- /dev/null +++ b/arch/loongarch/include/asm/kvm_types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_KVM_TYPES_H +#define _ASM_LOONGARCH_KVM_TYPES_H + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 4 + +#endif /* _ASM_LOONGARCH_KVM_TYPES_H */ diff --git a/arch/loongarch/include/asm/loongarchregs.h b/arch/loongarch/include/asm/loongarchregs.h index 1bd874ef2dcc..dc6a13d56096 100644 --- a/arch/loongarch/include/asm/loongarchregs.h +++ b/arch/loongarch/include/asm/loongarchregs.h @@ -214,9 +214,47 @@ __asm__(".macro parse_r var r\n\t" /* IOCSR */ #define iocsr_read32(reg) __iocsrrd_w(reg) #define iocsr_read64(reg) __iocsrrd_d(reg) +#define iocsr_write8(val, reg) __iocsrwr_b(val, reg) #define iocsr_write32(val, reg) __iocsrwr_w(val, reg) #define iocsr_write64(val, reg) __iocsrwr_d(val, reg) +/* GCSR */ +#define gcsr_read(csr) \ +({ \ + register unsigned long __v = 0; \ + __asm__ __volatile__ ( \ + "parse_r __reg, %[val] \n\t" \ + ".word 0x5 << 24 | %[reg] << 10 | 0 << 5 | __reg \n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define gcsr_write(v, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "parse_r __reg, %[val] \n\t" \ + ".word 0x5 << 24 | %[reg] << 10 | 1 << 5 | __reg \n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ +}) + +#define gcsr_xchg(v, m, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "parse_r __rd, %[val] \n\t" \ + "parse_r __rj, %[mask] \n\t" \ + ".word 0x5 << 24 | %[reg] << 10 | __rj << 5 | __rd \n\t" \ + : [val] "+r" (__v) \ + : [mask] "r" (m), [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + #endif /* !__ASSEMBLY__ */ /* CSR register number */ @@ -357,13 +395,13 @@ __asm__(".macro parse_r var r\n\t" #define CSR_TLBLO1_V (_ULCAST_(0x1) << CSR_TLBLO1_V_SHIFT) #define LOONGARCH_CSR_GTLBC 0x15 /* Guest TLB control */ -#define CSR_GTLBC_RID_SHIFT 16 -#define CSR_GTLBC_RID_WIDTH 8 -#define CSR_GTLBC_RID (_ULCAST_(0xff) << CSR_GTLBC_RID_SHIFT) +#define CSR_GTLBC_TGID_SHIFT 16 +#define CSR_GTLBC_TGID_WIDTH 8 +#define CSR_GTLBC_TGID (_ULCAST_(0xff) << CSR_GTLBC_TGID_SHIFT) #define CSR_GTLBC_TOTI_SHIFT 13 #define CSR_GTLBC_TOTI (_ULCAST_(0x1) << CSR_GTLBC_TOTI_SHIFT) -#define CSR_GTLBC_USERID_SHIFT 12 -#define CSR_GTLBC_USERID (_ULCAST_(0x1) << CSR_GTLBC_USERID_SHIFT) +#define CSR_GTLBC_USETGID_SHIFT 12 +#define CSR_GTLBC_USETGID (_ULCAST_(0x1) << CSR_GTLBC_USETGID_SHIFT) #define CSR_GTLBC_GMTLBSZ_SHIFT 0 #define CSR_GTLBC_GMTLBSZ_WIDTH 6 #define CSR_GTLBC_GMTLBSZ (_ULCAST_(0x3f) << CSR_GTLBC_GMTLBSZ_SHIFT) @@ -568,6 +606,12 @@ __asm__(".macro parse_r var r\n\t" #define CSR_GCFG_MATC_GUEST (_ULCAST_(0x0) << CSR_GCFG_MATC_SHITF) #define CSR_GCFG_MATC_ROOT (_ULCAST_(0x1) << CSR_GCFG_MATC_SHITF) #define CSR_GCFG_MATC_NEST (_ULCAST_(0x2) << CSR_GCFG_MATC_SHITF) +#define CSR_GCFG_MATP_SHITF 0 +#define CSR_GCFG_MATP_WIDTH 4 +#define CSR_GCFG_MATP_MASK (_ULCAST_(0x3) << CSR_GCFG_MATP_SHITF) +#define CSR_GCFG_MATP_GUEST (_ULCAST_(0x0) << CSR_GCFG_MATP_SHITF) +#define CSR_GCFG_MATP_ROOT (_ULCAST_(0x1) << CSR_GCFG_MATP_SHITF) +#define CSR_GCFG_MATP_NEST (_ULCAST_(0x2) << CSR_GCFG_MATP_SHITF) #define LOONGARCH_CSR_GINTC 0x52 /* Guest interrupt control */ #define CSR_GINTC_HC_SHIFT 16 @@ -925,6 +969,7 @@ __asm__(".macro parse_r var r\n\t" #define CSR_PERFCTRL_PLV2 (_ULCAST_(1) << 18) #define CSR_PERFCTRL_PLV3 (_ULCAST_(1) << 19) #define CSR_PERFCTRL_IE (_ULCAST_(1) << 20) +#define CSR_PERFCTRL_GMOD (_ULCAST_(3) << 21) #define CSR_PERFCTRL_EVENT 0x3ff /* Debug registers */ @@ -1238,6 +1283,131 @@ static inline void write_csr_tlbrefill_pagesize(unsigned int size) #define write_csr_perfctrl3(val) csr_write64(val, LOONGARCH_CSR_PERFCTRL3) #define write_csr_perfcntr3(val) csr_write64(val, LOONGARCH_CSR_PERFCNTR3) +/* Guest related CSRS */ +#define read_csr_gtlbc() csr_read32(LOONGARCH_CSR_GTLBC) +#define write_csr_gtlbc(val) csr_write32(val, LOONGARCH_CSR_GTLBC) +#define read_csr_trgp() csr_read32(LOONGARCH_CSR_TRGP) +#define read_csr_gcfg() csr_read32(LOONGARCH_CSR_GCFG) +#define write_csr_gcfg(val) csr_write32(val, LOONGARCH_CSR_GCFG) +#define read_csr_gstat() csr_read32(LOONGARCH_CSR_GSTAT) +#define write_csr_gstat(val) csr_write32(val, LOONGARCH_CSR_GSTAT) +#define read_csr_gintc() csr_read32(LOONGARCH_CSR_GINTC) +#define write_csr_gintc(val) csr_write32(val, LOONGARCH_CSR_GINTC) +#define read_csr_gcntc() csr_read64(LOONGARCH_CSR_GCNTC) +#define write_csr_gcntc(val) csr_write64(val, LOONGARCH_CSR_GCNTC) + +/* Guest CSRS read and write */ +#define read_gcsr_crmd() gcsr_read(LOONGARCH_CSR_CRMD) +#define write_gcsr_crmd(val) gcsr_write(val, LOONGARCH_CSR_CRMD) +#define read_gcsr_prmd() gcsr_read(LOONGARCH_CSR_PRMD) +#define write_gcsr_prmd(val) gcsr_write(val, LOONGARCH_CSR_PRMD) +#define read_gcsr_euen() gcsr_read(LOONGARCH_CSR_EUEN) +#define write_gcsr_euen(val) gcsr_write(val, LOONGARCH_CSR_EUEN) +#define read_gcsr_misc() gcsr_read(LOONGARCH_CSR_MISC) +#define write_gcsr_misc(val) gcsr_write(val, LOONGARCH_CSR_MISC) +#define read_gcsr_ecfg() gcsr_read(LOONGARCH_CSR_ECFG) +#define write_gcsr_ecfg(val) gcsr_write(val, LOONGARCH_CSR_ECFG) +#define read_gcsr_estat() gcsr_read(LOONGARCH_CSR_ESTAT) +#define write_gcsr_estat(val) gcsr_write(val, LOONGARCH_CSR_ESTAT) +#define read_gcsr_era() gcsr_read(LOONGARCH_CSR_ERA) +#define write_gcsr_era(val) gcsr_write(val, LOONGARCH_CSR_ERA) +#define read_gcsr_badv() gcsr_read(LOONGARCH_CSR_BADV) +#define write_gcsr_badv(val) gcsr_write(val, LOONGARCH_CSR_BADV) +#define read_gcsr_badi() gcsr_read(LOONGARCH_CSR_BADI) +#define write_gcsr_badi(val) gcsr_write(val, LOONGARCH_CSR_BADI) +#define read_gcsr_eentry() gcsr_read(LOONGARCH_CSR_EENTRY) +#define write_gcsr_eentry(val) gcsr_write(val, LOONGARCH_CSR_EENTRY) + +#define read_gcsr_tlbidx() gcsr_read(LOONGARCH_CSR_TLBIDX) +#define write_gcsr_tlbidx(val) gcsr_write(val, LOONGARCH_CSR_TLBIDX) +#define read_gcsr_tlbhi() gcsr_read(LOONGARCH_CSR_TLBEHI) +#define write_gcsr_tlbhi(val) gcsr_write(val, LOONGARCH_CSR_TLBEHI) +#define read_gcsr_tlblo0() gcsr_read(LOONGARCH_CSR_TLBELO0) +#define write_gcsr_tlblo0(val) gcsr_write(val, LOONGARCH_CSR_TLBELO0) +#define read_gcsr_tlblo1() gcsr_read(LOONGARCH_CSR_TLBELO1) +#define write_gcsr_tlblo1(val) gcsr_write(val, LOONGARCH_CSR_TLBELO1) + +#define read_gcsr_asid() gcsr_read(LOONGARCH_CSR_ASID) +#define write_gcsr_asid(val) gcsr_write(val, LOONGARCH_CSR_ASID) +#define read_gcsr_pgdl() gcsr_read(LOONGARCH_CSR_PGDL) +#define write_gcsr_pgdl(val) gcsr_write(val, LOONGARCH_CSR_PGDL) +#define read_gcsr_pgdh() gcsr_read(LOONGARCH_CSR_PGDH) +#define write_gcsr_pgdh(val) gcsr_write(val, LOONGARCH_CSR_PGDH) +#define read_gcsr_pgd() gcsr_read(LOONGARCH_CSR_PGD) +#define write_gcsr_pgd(val) gcsr_write(val, LOONGARCH_CSR_PGD) +#define read_gcsr_pwctl0() gcsr_read(LOONGARCH_CSR_PWCTL0) +#define write_gcsr_pwctl0(val) gcsr_write(val, LOONGARCH_CSR_PWCTL0) +#define read_gcsr_pwctl1() gcsr_read(LOONGARCH_CSR_PWCTL1) +#define write_gcsr_pwctl1(val) gcsr_write(val, LOONGARCH_CSR_PWCTL1) +#define read_gcsr_stlbpgsize() gcsr_read(LOONGARCH_CSR_STLBPGSIZE) +#define write_gcsr_stlbpgsize(val) gcsr_write(val, LOONGARCH_CSR_STLBPGSIZE) +#define read_gcsr_rvacfg() gcsr_read(LOONGARCH_CSR_RVACFG) +#define write_gcsr_rvacfg(val) gcsr_write(val, LOONGARCH_CSR_RVACFG) + +#define read_gcsr_cpuid() gcsr_read(LOONGARCH_CSR_CPUID) +#define write_gcsr_cpuid(val) gcsr_write(val, LOONGARCH_CSR_CPUID) +#define read_gcsr_prcfg1() gcsr_read(LOONGARCH_CSR_PRCFG1) +#define write_gcsr_prcfg1(val) gcsr_write(val, LOONGARCH_CSR_PRCFG1) +#define read_gcsr_prcfg2() gcsr_read(LOONGARCH_CSR_PRCFG2) +#define write_gcsr_prcfg2(val) gcsr_write(val, LOONGARCH_CSR_PRCFG2) +#define read_gcsr_prcfg3() gcsr_read(LOONGARCH_CSR_PRCFG3) +#define write_gcsr_prcfg3(val) gcsr_write(val, LOONGARCH_CSR_PRCFG3) + +#define read_gcsr_kscratch0() gcsr_read(LOONGARCH_CSR_KS0) +#define write_gcsr_kscratch0(val) gcsr_write(val, LOONGARCH_CSR_KS0) +#define read_gcsr_kscratch1() gcsr_read(LOONGARCH_CSR_KS1) +#define write_gcsr_kscratch1(val) gcsr_write(val, LOONGARCH_CSR_KS1) +#define read_gcsr_kscratch2() gcsr_read(LOONGARCH_CSR_KS2) +#define write_gcsr_kscratch2(val) gcsr_write(val, LOONGARCH_CSR_KS2) +#define read_gcsr_kscratch3() gcsr_read(LOONGARCH_CSR_KS3) +#define write_gcsr_kscratch3(val) gcsr_write(val, LOONGARCH_CSR_KS3) +#define read_gcsr_kscratch4() gcsr_read(LOONGARCH_CSR_KS4) +#define write_gcsr_kscratch4(val) gcsr_write(val, LOONGARCH_CSR_KS4) +#define read_gcsr_kscratch5() gcsr_read(LOONGARCH_CSR_KS5) +#define write_gcsr_kscratch5(val) gcsr_write(val, LOONGARCH_CSR_KS5) +#define read_gcsr_kscratch6() gcsr_read(LOONGARCH_CSR_KS6) +#define write_gcsr_kscratch6(val) gcsr_write(val, LOONGARCH_CSR_KS6) +#define read_gcsr_kscratch7() gcsr_read(LOONGARCH_CSR_KS7) +#define write_gcsr_kscratch7(val) gcsr_write(val, LOONGARCH_CSR_KS7) + +#define read_gcsr_timerid() gcsr_read(LOONGARCH_CSR_TMID) +#define write_gcsr_timerid(val) gcsr_write(val, LOONGARCH_CSR_TMID) +#define read_gcsr_timercfg() gcsr_read(LOONGARCH_CSR_TCFG) +#define write_gcsr_timercfg(val) gcsr_write(val, LOONGARCH_CSR_TCFG) +#define read_gcsr_timertick() gcsr_read(LOONGARCH_CSR_TVAL) +#define write_gcsr_timertick(val) gcsr_write(val, LOONGARCH_CSR_TVAL) +#define read_gcsr_timeroffset() gcsr_read(LOONGARCH_CSR_CNTC) +#define write_gcsr_timeroffset(val) gcsr_write(val, LOONGARCH_CSR_CNTC) + +#define read_gcsr_llbctl() gcsr_read(LOONGARCH_CSR_LLBCTL) +#define write_gcsr_llbctl(val) gcsr_write(val, LOONGARCH_CSR_LLBCTL) + +#define read_gcsr_tlbrentry() gcsr_read(LOONGARCH_CSR_TLBRENTRY) +#define write_gcsr_tlbrentry(val) gcsr_write(val, LOONGARCH_CSR_TLBRENTRY) +#define read_gcsr_tlbrbadv() gcsr_read(LOONGARCH_CSR_TLBRBADV) +#define write_gcsr_tlbrbadv(val) gcsr_write(val, LOONGARCH_CSR_TLBRBADV) +#define read_gcsr_tlbrera() gcsr_read(LOONGARCH_CSR_TLBRERA) +#define write_gcsr_tlbrera(val) gcsr_write(val, LOONGARCH_CSR_TLBRERA) +#define read_gcsr_tlbrsave() gcsr_read(LOONGARCH_CSR_TLBRSAVE) +#define write_gcsr_tlbrsave(val) gcsr_write(val, LOONGARCH_CSR_TLBRSAVE) +#define read_gcsr_tlbrelo0() gcsr_read(LOONGARCH_CSR_TLBRELO0) +#define write_gcsr_tlbrelo0(val) gcsr_write(val, LOONGARCH_CSR_TLBRELO0) +#define read_gcsr_tlbrelo1() gcsr_read(LOONGARCH_CSR_TLBRELO1) +#define write_gcsr_tlbrelo1(val) gcsr_write(val, LOONGARCH_CSR_TLBRELO1) +#define read_gcsr_tlbrehi() gcsr_read(LOONGARCH_CSR_TLBREHI) +#define write_gcsr_tlbrehi(val) gcsr_write(val, LOONGARCH_CSR_TLBREHI) +#define read_gcsr_tlbrprmd() gcsr_read(LOONGARCH_CSR_TLBRPRMD) +#define write_gcsr_tlbrprmd(val) gcsr_write(val, LOONGARCH_CSR_TLBRPRMD) + +#define read_gcsr_directwin0() gcsr_read(LOONGARCH_CSR_DMWIN0) +#define write_gcsr_directwin0(val) gcsr_write(val, LOONGARCH_CSR_DMWIN0) +#define read_gcsr_directwin1() gcsr_read(LOONGARCH_CSR_DMWIN1) +#define write_gcsr_directwin1(val) gcsr_write(val, LOONGARCH_CSR_DMWIN1) +#define read_gcsr_directwin2() gcsr_read(LOONGARCH_CSR_DMWIN2) +#define write_gcsr_directwin2(val) gcsr_write(val, LOONGARCH_CSR_DMWIN2) +#define read_gcsr_directwin3() gcsr_read(LOONGARCH_CSR_DMWIN3) +#define write_gcsr_directwin3(val) gcsr_write(val, LOONGARCH_CSR_DMWIN3) + /* * Manipulate bits in a register. */ @@ -1280,15 +1450,26 @@ change_##name(unsigned long change, unsigned long val) \ } #define __BUILD_CSR_OP(name) __BUILD_CSR_COMMON(csr_##name) +#define __BUILD_GCSR_OP(name) __BUILD_CSR_COMMON(gcsr_##name) __BUILD_CSR_OP(euen) __BUILD_CSR_OP(ecfg) __BUILD_CSR_OP(tlbidx) +__BUILD_CSR_OP(gcfg) +__BUILD_CSR_OP(gstat) +__BUILD_CSR_OP(gtlbc) +__BUILD_CSR_OP(gintc) +__BUILD_GCSR_OP(llbctl) +__BUILD_GCSR_OP(tlbidx) #define set_csr_estat(val) \ csr_xchg32(val, val, LOONGARCH_CSR_ESTAT) #define clear_csr_estat(val) \ csr_xchg32(~(val), val, LOONGARCH_CSR_ESTAT) +#define set_gcsr_estat(val) \ + gcsr_xchg(val, val, LOONGARCH_CSR_ESTAT) +#define clear_gcsr_estat(val) \ + gcsr_xchg(~(val), val, LOONGARCH_CSR_ESTAT) #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/paravirt.h b/arch/loongarch/include/asm/paravirt.h new file mode 100644 index 000000000000..ee766aec04ae --- /dev/null +++ b/arch/loongarch/include/asm/paravirt.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_LOONGARCH_PARAVIRT_H +#define _ASM_LOONGARCH_PARAVIRT_H +#include + +#ifdef CONFIG_PARAVIRT +static inline bool kvm_para_available(void) +{ + return true; +} +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u8 preempted; + __u8 pad[47]; +}; +extern struct pv_time_ops pv_time_ops; + +bool pv_is_native_spin_unlock(void); + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_time_ops.steal_clock(cpu); +} + +static inline bool pv_feature_support(int feature) +{ + return kvm_hypercall1(KVM_HC_FUNC_FEATURE, feature) == KVM_RET_SUC; +} +static inline void pv_notify_host(int feature, unsigned long data) +{ + kvm_hypercall2(KVM_HC_FUNC_NOTIFY, feature, data); +} + +#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS) +struct qspinlock; + +struct pv_lock_ops { + void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val); + void (*queued_spin_unlock)(struct qspinlock *lock); + void (*wait)(u8 *ptr, u8 val); + void (*kick)(int cpu); + bool (*vcpu_is_preempted)(long cpu); +}; + +extern struct pv_lock_ops pv_lock_ops; + +void __init kvm_spinlock_init(void); + +static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock, + u32 val) +{ + pv_lock_ops.queued_spin_lock_slowpath(lock, val); +} + +static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock) +{ + pv_lock_ops.queued_spin_unlock(lock); +} + +static __always_inline void pv_wait(u8 *ptr, u8 val) +{ + pv_lock_ops.wait(ptr, val); +} + +static __always_inline void pv_kick(int cpu) +{ + pv_lock_ops.kick(cpu); +} + +static __always_inline bool pv_vcpu_is_preempted(long cpu) +{ + return pv_lock_ops.vcpu_is_preempted(cpu); +} + +#endif /* SMP && PARAVIRT_SPINLOCKS */ + +int __init pv_time_init(void); +int __init pv_ipi_init(void); +#else +static inline bool kvm_para_available(void) +{ + return false; +} + +#define pv_time_init() do {} while (0) +#define pv_ipi_init() do {} while (0) +#endif +#endif /* _ASM_LOONGARCH_PARAVIRT_H */ diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h index 34f43f8ad591..3ef3e7847741 100644 --- a/arch/loongarch/include/asm/qspinlock.h +++ b/arch/loongarch/include/asm/qspinlock.h @@ -2,16 +2,48 @@ #ifndef _ASM_QSPINLOCK_H #define _ASM_QSPINLOCK_H +#include #include +#define _Q_PENDING_LOOPS (1 << 9) #define queued_spin_unlock queued_spin_unlock +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_queued_spin_unlock(struct qspinlock *lock); + +static inline void native_queued_spin_unlock(struct qspinlock *lock) +{ + compiletime_assert_atomic_type(lock->locked); + c_sync(); + WRITE_ONCE(lock->locked, 0); +} + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + pv_queued_spin_lock_slowpath(lock, val); +} + +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + pv_queued_spin_unlock(lock); +} + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(long cpu) +{ + return pv_vcpu_is_preempted(cpu); +} +#else static inline void queued_spin_unlock(struct qspinlock *lock) { compiletime_assert_atomic_type(lock->locked); c_sync(); WRITE_ONCE(lock->locked, 0); } +#endif #include diff --git a/arch/loongarch/include/asm/qspinlock_paravirt.h b/arch/loongarch/include/asm/qspinlock_paravirt.h new file mode 100644 index 000000000000..7d0385596127 --- /dev/null +++ b/arch/loongarch/include/asm/qspinlock_paravirt.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_QSPINLOCK_PARAVIRT_H +#define __ASM_QSPINLOCK_PARAVIRT_H + +#ifdef CONFIG_64BIT + +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +void __pv_queued_spin_unlock(struct qspinlock *lock) +{ + u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); + + if (likely(lockval == _Q_LOCKED_VAL)) + return; + __pv_queued_spin_unlock_slowpath(lock, lockval); +} + +#endif /* CONFIG_64BIT */ +#endif diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index eb0d6e8c2f5c..340e88d09f2c 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -31,7 +31,7 @@ struct plat_smp_ops { #endif }; -extern const struct plat_smp_ops *mp_ops; +extern struct plat_smp_ops *mp_ops; void register_smp_ops(const struct plat_smp_ops *ops); static inline void plat_smp_setup(void) diff --git a/arch/loongarch/include/asm/spinlock.h b/arch/loongarch/include/asm/spinlock.h index 52dcc6419525..60df51242523 100644 --- a/arch/loongarch/include/asm/spinlock.h +++ b/arch/loongarch/include/asm/spinlock.h @@ -11,6 +11,9 @@ #ifndef _ASM_SPINLOCK_H #define _ASM_SPINLOCK_H +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + #include #include #include diff --git a/arch/loongarch/include/uapi/asm/Kbuild b/arch/loongarch/include/uapi/asm/Kbuild index 4aa680ca2e5f..f66554cd5c45 100644 --- a/arch/loongarch/include/uapi/asm/Kbuild +++ b/arch/loongarch/include/uapi/asm/Kbuild @@ -1,2 +1 @@ # SPDX-License-Identifier: GPL-2.0 -generic-y += kvm_para.h diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h new file mode 100644 index 000000000000..7f2d99407e45 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Loongson Technologies, Inc. All rights reserved. + * Authors: Sanjay Lal + * Authors: Xing Li + */ + +#ifndef __LINUX_KVM_LOONGARCH_H +#define __LINUX_KVM_LOONGARCH_H + +#include + +#ifndef __KERNEL__ +#include +#endif + +#define __KVM_HAVE_GUEST_DEBUG +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 +#define KVM_GUESTDBG_USE_HW_BP 0x00020000 +#define KVM_DATA_HW_BREAKPOINT_NUM 8 +#define KVM_INST_HW_BREAKPOINT_NUM 8 + +/* + * KVM Loongarch specific structures and definitions. + * + * Some parts derived from the x86 version of this file. + */ + +#define __KVM_HAVE_READONLY_MEM + +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +/* + * for KVM_GET_REGS and KVM_SET_REGS + */ +struct kvm_regs { + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 gpr[32]; + __u64 pc; +}; + +/* + * for KVM_GET_CPUCFG + */ +struct kvm_cpucfg { + /* out (KVM_GET_CPUCFG) */ + __u32 cpucfg[64]; +}; + +/* + * for KVM_GET_FPU and KVM_SET_FPU + */ +struct kvm_fpu { + __u32 fcsr; + __u64 fcc; /* 8x8 */ + struct kvm_fpureg { + __u64 val64[4]; //support max 256 bits + } fpr[32]; +}; + +/* + * For LOONGARCH, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various + * registers. The id field is broken down as follows: + * + * bits[63..52] - As per linux/kvm.h + * bits[51..32] - Must be zero. + * bits[31..16] - Register set. + * + * Register set = 0: GP registers from kvm_regs (see definitions below). + * Register set = 1: CSR registers. + * Register set = 2: KVM specific registers (see definitions below). + * Register set = 3: FPU / SIMD registers (see definitions below). + * Register set = 4: LBT registers (see definitions below). + * + * Other sets registers may be added in the future. Each set would + * have its own identifier in bits[31..16]. + */ + +#define KVM_REG_LOONGARCH_GP (KVM_REG_LOONGARCH | 0x00000ULL) +#define KVM_REG_LOONGARCH_CSR (KVM_REG_LOONGARCH | 0x10000ULL) +#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) +#define KVM_REG_LOONGARCH_FPU (KVM_REG_LOONGARCH | 0x30000ULL) +#define KVM_REG_LOONGARCH_LBT (KVM_REG_LOONGARCH | 0x40000ULL) +#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) +#define KVM_CSR_IDX_MASK (0x10000 - 1) + +/* + * KVM_REG_LOONGARCH_KVM - KVM specific control registers. + */ +#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) +#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 4) + +#define KVM_REG_LBT_SCR0 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 1) +#define KVM_REG_LBT_SCR1 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 2) +#define KVM_REG_LBT_SCR2 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 3) +#define KVM_REG_LBT_SCR3 (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 4) +#define KVM_REG_LBT_FLAGS (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 5) +#define KVM_REG_LBT_FTOP (KVM_REG_LOONGARCH_LBT | KVM_REG_SIZE_U64 | 6) + +#define __KVM_HAVE_IRQ_LINE + +struct kvm_debug_exit_arch { + __u64 era; + __u32 fwps; + __u32 mwps; + __u32 exception; +}; + +/* for KVM_SET_GUEST_DEBUG */ +struct hw_breakpoint { + __u64 addr; + __u64 mask; + __u32 asid; + __u32 ctrl; +}; + +struct kvm_guest_debug_arch { + struct hw_breakpoint data_breakpoint[KVM_DATA_HW_BREAKPOINT_NUM]; + struct hw_breakpoint inst_breakpoint[KVM_INST_HW_BREAKPOINT_NUM]; + int inst_bp_nums, data_bp_nums; +}; + +/* definition of registers in kvm_run */ +struct kvm_sync_regs { +}; + +/* dummy definition */ +struct kvm_sregs { +}; + +struct kvm_iocsr_entry { + __u32 addr; + __u32 pad; + __u64 data; +}; + +struct kvm_csr_entry { + __u32 index; + __u32 reserved; + __u64 data; +}; + +/* for KVM_GET_MSRS and KVM_SET_MSRS */ +struct kvm_msrs { + __u32 ncsrs; /* number of msrs in entries */ + __u32 pad; + + struct kvm_csr_entry entries[0]; +}; + +struct kvm_loongarch_interrupt { + /* in */ + __u32 cpu; + __u32 irq; +}; + +#define KVM_IRQCHIP_LS7A_IOAPIC 0x0 +#define KVM_IRQCHIP_LS3A_GIPI 0x1 +#define KVM_IRQCHIP_LS3A_HT_IRQ 0x2 +#define KVM_IRQCHIP_LS3A_ROUTE 0x3 +#define KVM_IRQCHIP_LS3A_EXTIRQ 0x4 +#define KVM_IRQCHIP_LS3A_IPMASK 0x5 +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 64 + +#define KVM_MAX_CORES 256 +#define KVM_EXTIOI_IRQS (256) +#define KVM_EXTIOI_IRQS_BITMAP_SIZE (KVM_EXTIOI_IRQS / 8) +/* map to ipnum per 32 irqs */ +#define KVM_EXTIOI_IRQS_IPMAP_SIZE (KVM_EXTIOI_IRQS / 32) +#define KVM_EXTIOI_IRQS_PER_GROUP 32 +#define KVM_EXTIOI_IRQS_COREMAP_SIZE (KVM_EXTIOI_IRQS) +#define KVM_EXTIOI_IRQS_NODETYPE_SIZE 16 + +struct ls7a_ioapic_state { + /* 0x000 interrupt id register */ + __u64 int_id; + /* 0x020 interrupt mask register */ + __u64 int_mask; + /* 0x040 1=msi */ + __u64 htmsi_en; + /* 0x060 edge=1 level =0 */ + __u64 intedge; + /* 0x080 for clean edge int,set 1 clean,set 0 is noused */ + __u64 intclr; + /* 0x0c0 */ + __u64 auto_crtl0; + /* 0x0e0 */ + __u64 auto_crtl1; + /* 0x100 - 0x140 */ + __u8 route_entry[64]; + /* 0x200 - 0x240 */ + __u8 htmsi_vector[64]; + /* 0x300 */ + __u64 intisr_chip0; + /* 0x320 */ + __u64 intisr_chip1; + /* edge detection */ + __u64 last_intirr; + /* 0x380 interrupt request register */ + __u64 intirr; + /* 0x3a0 interrupt service register */ + __u64 intisr; + /* 0x3e0 interrupt level polarity selection register, + * 0 for high level tirgger + */ + __u64 int_polarity; +}; + +struct loongarch_gipi_single { + __u32 status; + __u32 en; + __u32 set; + __u32 clear; + __u64 buf[4]; +}; + +struct loongarch_gipiState { + struct loongarch_gipi_single core[KVM_MAX_CORES]; +}; + +struct kvm_loongarch_ls3a_extirq_state { + union ext_en_r { + uint64_t reg_u64[KVM_EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[KVM_EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[KVM_EXTIOI_IRQS_BITMAP_SIZE]; + } ext_en_r; + union bounce_r { + uint64_t reg_u64[KVM_EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[KVM_EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[KVM_EXTIOI_IRQS_BITMAP_SIZE]; + } bounce_r; + union ext_isr_r { + uint64_t reg_u64[KVM_EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[KVM_EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[KVM_EXTIOI_IRQS_BITMAP_SIZE]; + } ext_isr_r; + union ext_core_isr_r { + uint64_t reg_u64[KVM_MAX_CORES][KVM_EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[KVM_MAX_CORES][KVM_EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[KVM_MAX_CORES][KVM_EXTIOI_IRQS_BITMAP_SIZE]; + } ext_core_isr_r; + union ip_map_r { + uint64_t reg_u64; + uint32_t reg_u32[KVM_EXTIOI_IRQS_IPMAP_SIZE / 4]; + uint8_t reg_u8[KVM_EXTIOI_IRQS_IPMAP_SIZE]; + } ip_map_r; + union core_map_r { + uint64_t reg_u64[KVM_EXTIOI_IRQS_COREMAP_SIZE / 8]; + uint32_t reg_u32[KVM_EXTIOI_IRQS_COREMAP_SIZE / 4]; + uint8_t reg_u8[KVM_EXTIOI_IRQS_COREMAP_SIZE]; + } core_map_r; + union node_type_r { + uint64_t reg_u64[KVM_EXTIOI_IRQS_NODETYPE_SIZE / 4]; + uint32_t reg_u32[KVM_EXTIOI_IRQS_NODETYPE_SIZE / 2]; + uint16_t reg_u16[KVM_EXTIOI_IRQS_NODETYPE_SIZE]; + uint8_t reg_u8[KVM_EXTIOI_IRQS_NODETYPE_SIZE * 2]; + } node_type_r; +}; + +struct loongarch_kvm_irqchip { + __u16 chip_id; + __u16 len; + __u16 vcpu_id; + __u16 reserved; + char data[0]; +}; + +#endif /* __LINUX_KVM_LOONGARCH_H */ diff --git a/arch/loongarch/include/uapi/asm/kvm_para.h b/arch/loongarch/include/uapi/asm/kvm_para.h new file mode 100644 index 000000000000..4013470c389e --- /dev/null +++ b/arch/loongarch/include/uapi/asm/kvm_para.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_ASM_LOONGARCH_KVM_PARA_H +#define _UAPI_ASM_LOONGARCH_KVM_PARA_H + +/* Device Control API on vcpu fd */ +#define KVM_LARCH_VCPU_PVTIME_CTRL 2 +#define KVM_LARCH_VCPU_PVTIME_IPA 0 + +#endif /* _UAPI_ASM_LOONGARCH_KVM_PARA_H */ diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 32218252319d..9df4750dd5ee 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -72,4 +72,7 @@ obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_SPINLOCK_TEST) += spinlock_test.o +obj-$(CONFIG_PARAVIRT) += paravirt.o +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += paravirt-spinlocks.o + CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS) diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index c64c67497a55..d13690769500 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -281,3 +282,36 @@ void output_pbe_defines(void) BLANK(); } #endif + +void output_kvm_defines(void) +{ + COMMENT(" KVM/LoongArch Specific offsets. "); + + OFFSET(VCPU_FCSR0, kvm_vcpu_arch, fpu.fcsr); + OFFSET(VCPU_FCC, kvm_vcpu_arch, fpu.fcc); + BLANK(); + + OFFSET(KVM_VCPU_ARCH, kvm_vcpu, arch); + OFFSET(KVM_VCPU_KVM, kvm_vcpu, kvm); + OFFSET(KVM_VCPU_RUN, kvm_vcpu, run); + BLANK(); + + OFFSET(KVM_ARCH_HSTACK, kvm_vcpu_arch, host_stack); + OFFSET(KVM_ARCH_HGP, kvm_vcpu_arch, host_gp); + OFFSET(KVM_ARCH_HANDLE_EXIT, kvm_vcpu_arch, handle_exit); + OFFSET(KVM_ARCH_HPGD, kvm_vcpu_arch, host_pgd); + OFFSET(KVM_ARCH_GEENTRY, kvm_vcpu_arch, guest_eentry); + OFFSET(KVM_ARCH_GPC, kvm_vcpu_arch, pc); + OFFSET(KVM_ARCH_GGPR, kvm_vcpu_arch, gprs); + OFFSET(KVM_ARCH_HESTAT, kvm_vcpu_arch, host_estat); + OFFSET(KVM_ARCH_HBADV, kvm_vcpu_arch, badv); + OFFSET(KVM_ARCH_HBADI, kvm_vcpu_arch, badi); + OFFSET(KVM_ARCH_ISHYPCALL, kvm_vcpu_arch, is_hypcall); + OFFSET(KVM_ARCH_HECFG, kvm_vcpu_arch, host_ecfg); + OFFSET(KVM_ARCH_HEENTRY, kvm_vcpu_arch, host_eentry); + OFFSET(KVM_ARCH_HPERCPU, kvm_vcpu_arch, host_percpu); + + OFFSET(KVM_GPGD, kvm, arch.gpa_mm.pgd); + BLANK(); + +} diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index a164ec63c81c..552c2c14030b 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -93,7 +93,7 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) unsigned long asid_mask; c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | - LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; + LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT; elf_hwcap = HWCAP_LOONGARCH_CPUCFG; @@ -214,6 +214,55 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) loongarch_probe_watch_registers(c); } +static inline void cpu_probe_guestinfo(struct cpuinfo_loongarch *c) +{ + unsigned long guestinfo; + guestinfo = read_csr_gstat(); + if (guestinfo & CSR_GSTAT_GIDBIT) { + c->options |= LOONGARCH_CPU_GUESTID; + write_csr_gstat(0); + } +} + +static inline void cpu_probe_lvz(struct cpuinfo_loongarch *c) +{ + unsigned long gcfg, gprcfg1; + cpu_probe_guestinfo(c); + + c->guest.options |= LOONGARCH_CPU_FPU; + c->guest.options_dyn |= LOONGARCH_CPU_FPU; + c->guest.options_dyn |= LOONGARCH_CPU_PMP; + + c->guest.ases |= LOONGARCH_CPU_LSX; + c->guest.ases_dyn |= LOONGARCH_CPU_LSX; + gprcfg1 = read_gcsr_prcfg1(); + c->guest.kscratch_mask = GENMASK((gprcfg1 & CSR_CONF1_KSNUM) - 1, 0); + + gcfg = read_csr_gcfg(); + if (gcfg & CSR_GCFG_MATP_GUEST) + c->guest_cfg |= BIT(0); + if (gcfg & CSR_GCFG_MATP_ROOT) + c->guest_cfg |= BIT(1); + if (gcfg & CSR_GCFG_MATP_NEST) + c->guest_cfg |= BIT(2); + if (gcfg & CSR_GCFG_SITP) + c->guest_cfg |= BIT(6); + if (gcfg & CSR_GCFG_TITP) + c->guest_cfg |= BIT(8); + if (gcfg & CSR_GCFG_TOEP) + c->guest_cfg |= BIT(10); + if (gcfg & CSR_GCFG_TOPP) + c->guest_cfg |= BIT(12); + if (gcfg & CSR_GCFG_TORUP) + c->guest_cfg |= BIT(14); + if (gcfg & CSR_GCFG_GCIP_ALL) + c->guest_cfg |= BIT(16); + if (gcfg & CSR_GCFG_GCIP_HIT) + c->guest_cfg |= BIT(17); + if (gcfg & CSR_GCFG_GCIP_SECURE) + c->guest_cfg |= BIT(18); +} + #define MAX_NAME_LEN 32 #define VENDOR_OFFSET 0 #define CPUNAME_OFFSET 9 @@ -327,6 +376,8 @@ void cpu_probe(void) if (cpu == 0) __ua_limit = ~((1ull << cpu_vabits) - 1); #endif + if (cpu_has_lvz) + cpu_probe_lvz(c); cpu_report(); } diff --git a/arch/loongarch/kernel/paravirt-spinlocks.c b/arch/loongarch/kernel/paravirt-spinlocks.c new file mode 100644 index 000000000000..557119ae242b --- /dev/null +++ b/arch/loongarch/kernel/paravirt-spinlocks.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include +#include + +#include + +__visible void paravirt_wait_nop(u8 *ptr, u8 val) +{ +} + +__visible void paravirt_kick_nop(int cpu) +{ +} + +__visible void __native_queued_spin_unlock(struct qspinlock *lock) +{ + native_queued_spin_unlock(lock); +} + +bool pv_is_native_spin_unlock(void) +{ + return pv_lock_ops.queued_spin_unlock == __native_queued_spin_unlock; +} + +__visible bool __native_vcpu_is_preempted(long cpu) +{ + return false; +} + +bool pv_is_native_vcpu_is_preempted(void) +{ + return pv_lock_ops.vcpu_is_preempted == __native_vcpu_is_preempted; +} + +struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP + .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, + .queued_spin_unlock = __native_queued_spin_unlock, + .wait = paravirt_wait_nop, + .kick = paravirt_kick_nop, + .vcpu_is_preempted = __native_vcpu_is_preempted, +#endif /* SMP */ +}; +EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/loongarch/kernel/paravirt.c b/arch/loongarch/kernel/paravirt.c new file mode 100644 index 000000000000..012c665fb29f --- /dev/null +++ b/arch/loongarch/kernel/paravirt.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Loongson Technology Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * LoongArch paravirtualization support. + * + * Author: wangjianxing + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64); +static int has_steal_clock; +static unsigned int pv_feature_disabled; + +static u64 dummy_steal_clock(int cpu) +{ + return 0; +} + +static u64 pv_steal_clock(int cpu) +{ + u64 steal; + struct kvm_steal_time *src; + int version; + + src = &per_cpu(steal_time, cpu); + do { + + version = src->version; + virt_rmb(); + steal = src->steal; + virt_rmb(); + + } while ((version & 1) || (version != src->version)); + return steal; +} + +struct pv_time_ops pv_time_ops = { + .steal_clock = dummy_steal_clock, +}; +EXPORT_SYMBOL_GPL(pv_time_ops); + +phys_addr_t slow_virt_to_phys(const void *vaddr) +{ + unsigned long addr = (unsigned long) vaddr; + unsigned long page_mask = 0; + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + phys_addr_t phys_addr; + + /* See arch/loongarch/la64/numa.c:setup_per_cpu_areas */ + if (nr_node_ids < 8) + return virt_to_phys((void *)vaddr); + + if (pgd_none(*pgd)) + goto out; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + goto out; + + pud = pud_offset(p4d, addr); + + if (pud_none(*pud) || pud_bad(*pud)) + goto out; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + goto out; +#ifdef CONFIG_HUGETLB_PAGE + if (pmd_huge(*pmd)) { + pte = (pte_t *)pmd; + page_mask = PMD_MASK; + goto out; + } +#endif + pte = pte_offset_kernel(pmd, addr); + page_mask = PAGE_MASK; +out: + phys_addr = (pte_pfn(*pte) << PAGE_SHIFT) | (addr & ~page_mask); + return phys_addr; +} + +static void pv_register_steal_time(void) +{ + int cpu = smp_processor_id(); + struct kvm_steal_time *st; + + if (!has_steal_clock) + return; + + st = &per_cpu(steal_time, cpu); + pv_notify_host(KVM_FEATURE_STEAL_TIME, slow_virt_to_phys(st)); + + pr_info("pv stealtime: cpu %d, st:0x%llx phys:0x%llx\n", + cpu, (unsigned long long)st, (unsigned long long) slow_virt_to_phys(st)); +} + +#ifdef CONFIG_SMP +static void pv_disable_steal_time(void) +{ + if (has_steal_clock) + pv_notify_host(KVM_FEATURE_STEAL_TIME, 0); +} + +static int pv_cpu_online(unsigned int cpu) +{ + unsigned long flags; + local_irq_save(flags); + pv_register_steal_time(); + local_irq_restore(flags); + return 0; +} + +static int pv_cpu_down_prepare(unsigned int cpu) +{ + unsigned long flags; + local_irq_save(flags); + pv_disable_steal_time(); + local_irq_restore(flags); + return 0; +} +#endif + +static void pv_cpu_reboot(void *unused) +{ + pv_disable_steal_time(); +} + +static int pv_reboot_notify(struct notifier_block *nb, unsigned long code, + void *unused) +{ + on_each_cpu(pv_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block pv_reboot_nb = { + .notifier_call = pv_reboot_notify, +}; + +int __init pv_time_init(void) +{ + if (!cpu_has_hypervisor) + return 0; + if (!kvm_para_available()) + return 0; + + if (!(pv_feature_disabled & (1 << KVM_FEATURE_STEAL_TIME)) && + pv_feature_support(KVM_FEATURE_STEAL_TIME)) { + + register_reboot_notifier(&pv_reboot_nb); + + has_steal_clock = 1; + pv_time_ops.steal_clock = pv_steal_clock; + pv_register_steal_time(); + static_key_slow_inc(¶virt_steal_enabled); + static_key_slow_inc(¶virt_steal_rq_enabled); + +#ifdef CONFIG_SMP + if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "loongarch/pv:online", + pv_cpu_online, pv_cpu_down_prepare) < 0) + pr_err("failed to install cpu hotplug callbacks\n"); +#endif + } + return 0; +} + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +static void kvm_wait(u8 *ptr, u8 val) +{ + if (READ_ONCE(*ptr) != val) + return; + + __arch_cpu_idle(); +} + +__visible bool __kvm_vcpu_is_preempted(long cpu) +{ + struct kvm_steal_time *src = &per_cpu(steal_time, cpu); + + return !!(src->preempted & KVM_VCPU_PREEMPTED); +} + +/* Kick a cpu. Used to wake up a halted vcpu */ +static void kvm_kick_cpu(int cpu) +{ + kvm_hypercall1(KVM_HC_KICK_CPU, cpu); +} + +/* + * Setup pv_lock_ops for guest kernel. + */ +void __init kvm_spinlock_init(void) +{ + if (!cpu_has_hypervisor) + return; + if (!kvm_para_available()) + return; + + /* Don't use the pvqspinlock code if there is only 1 vCPU. */ + if (num_possible_cpus() == 1) + return; + + if (!(pv_feature_disabled & (1 << KVM_FEATURE_PARAVIRT_SPINLOCK)) && + pv_feature_support(KVM_FEATURE_PARAVIRT_SPINLOCK)) { + + __pv_init_lock_hash(); + pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_lock_ops.queued_spin_unlock = __pv_queued_spin_unlock; + pv_lock_ops.wait = kvm_wait; + pv_lock_ops.kick = kvm_kick_cpu; + + if (!(pv_feature_disabled & (1 << KVM_FEATURE_STEAL_TIME)) && + pv_feature_support(KVM_FEATURE_STEAL_TIME)) { + pv_lock_ops.vcpu_is_preempted = __kvm_vcpu_is_preempted; + } + + pr_info("pv spinlock: supported\n"); + } +} +#endif + +static void pv_send_ipi(const struct cpumask *mask, unsigned int action) +{ + unsigned long flags; + unsigned int cpu, i, min = 0, max = 0; + u64 ipi_bitmap = 0; + long ret; + + if (cpumask_empty(mask)) + return; + + local_irq_save(flags); + + for_each_cpu(i, mask) { + cpu = cpu_logical_map(i); + if (!ipi_bitmap) { + min = max = cpu; + } else if (cpu < min && (max - cpu) < BITS_PER_LONG) { + ipi_bitmap <<= min - cpu; + min = cpu; + } else if (cpu > min && cpu < min + BITS_PER_LONG) { + max = cpu < max ? max : cpu; + } else { + ret = kvm_hypercall3(KVM_HC_FUNC_IPI, ipi_bitmap, min, action); + WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); + min = max = cpu; + ipi_bitmap = 0; + } + __set_bit(cpu - min, (unsigned long *)&ipi_bitmap); + } + + if (ipi_bitmap) { + ret = kvm_hypercall3(KVM_HC_FUNC_IPI, ipi_bitmap, min, action); + WARN_ONCE(ret < 0, "KVM: failed to send PV IPI: %ld", ret); + } + + local_irq_restore(flags); +} + +extern struct plat_smp_ops *mp_ops; +int __init pv_ipi_init(void) +{ + if (!cpu_has_hypervisor) + return 0; + if (!IS_ENABLED(CONFIG_SMP)) + return 0; + if (!kvm_para_available()) + return 0; + + if (!(pv_feature_disabled & (1 << KVM_FEATURE_MULTI_IPI)) && + pv_feature_support(KVM_FEATURE_MULTI_IPI)) { + mp_ops->send_ipi_mask = pv_send_ipi; + } + return 0; +} + +static int __init set_pv_ipi(char *str) +{ + pv_feature_disabled |= (1 << KVM_FEATURE_MULTI_IPI); + return 0; +} +early_param("no_pvipi", set_pv_ipi); + +static int __init set_pv_time(char *str) +{ + pv_feature_disabled |= (1 << KVM_FEATURE_STEAL_TIME); + return 0; +} +early_param("no_pvtime", set_pv_time); + +static int __init set_pv_spinlock(char *str) +{ + pv_feature_disabled |= (1 << KVM_FEATURE_PARAVIRT_SPINLOCK); + return 0; +} +early_param("no_pv_spinlock", set_pv_spinlock); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 96c62520f5f4..695d0b437fae 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -28,6 +28,7 @@ #include #include #include +#include int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ EXPORT_SYMBOL(__cpu_number_map); @@ -126,7 +127,7 @@ void calculate_cpu_foreign_map(void) &temp_foreign_map, &cpu_sibling_map[i]); } -const struct plat_smp_ops *mp_ops; +struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); void register_smp_ops(const struct plat_smp_ops *ops) @@ -134,7 +135,8 @@ void register_smp_ops(const struct plat_smp_ops *ops) if (mp_ops) printk(KERN_WARNING "Overriding previously set SMP ops\n"); - mp_ops = ops; + mp_ops = (struct plat_smp_ops *)ops; + pv_ipi_init(); } /* @@ -217,6 +219,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #endif } +static void __init kvm_smp_prepare_boot_cpu(void) +{ + kvm_spinlock_init(); +} + /* Preload SMP state for boot cpu */ void smp_prepare_boot_cpu(void) { @@ -250,6 +257,8 @@ void smp_prepare_boot_cpu(void) rr_node = next_node_in(rr_node, node_online_map); } } + + kvm_smp_prepare_boot_cpu(); } int __cpu_up(unsigned int cpu, struct task_struct *tidle) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 60323be17101..bcb6957a8103 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -21,6 +21,7 @@ #include #include #include +#include u64 cpu_clock_freq; EXPORT_SYMBOL(cpu_clock_freq); @@ -216,4 +217,6 @@ void __init time_init(void) constant_clockevent_init(); constant_clocksource_init(); + + pv_time_init(); } diff --git a/arch/loongarch/kernel/watch.c b/arch/loongarch/kernel/watch.c index 659582239709..4ef837388aaa 100644 --- a/arch/loongarch/kernel/watch.c +++ b/arch/loongarch/kernel/watch.c @@ -392,6 +392,8 @@ void loongarch_probe_watch_registers(struct cpuinfo_loongarch *c) c->watch_dreg_count = dbcn; total = ibcn + dbcn; c->watch_reg_use_cnt = (total < NUM_WATCH_REGS) ? total : NUM_WATCH_REGS; + if (total) + c->options |= LOONGARCH_CPU_WATCH; } /* diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig new file mode 100644 index 000000000000..c32e708b8961 --- /dev/null +++ b/arch/loongarch/kvm/Kconfig @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# KVM configuration +# +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + help + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM + select PREEMPT_NOTIFIERS + select ANON_INODES + select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select HAVE_KVM_VCPU_ASYNC_IOCTL + select KVM_MMIO + select MMU_NOTIFIER + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_EVENTFD + select HAVE_KVM_MSI + select SRCU + select KVM_VFIO + help + Support for hosting Guest kernels. This use the LoongArch + Virtualization (LVZ) ASE which supports running unmodified + guest kernels. + +source "drivers/vhost/Kconfig" + +endif # VIRTUALIZATION diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile new file mode 100644 index 000000000000..4cd064d548ce --- /dev/null +++ b/arch/loongarch/kvm/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for KVM support for LoongArch +# + +OBJECT_FILES_NON_STANDARD_entry.o := y + +ccflags-y := -Ivirt/kvm -Iarch/loongarch/kvm + +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ + irqchip.o eventfd.o) + +KVM := ../../../virt/kvm +common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o + +kvm-objs := $(common-objs-y) loongarch.o emulate.o interrupt.o +kvm-objs += hypcall.o +kvm-objs += mmu.o + +kvm-objs += exit.o intc/ls7a_irq.o intc/ls3a_ipi.o intc/irqchip-debug.o\ + timer.o intc/ls3a_ext_irq.o irqfd.o csr.o +obj-$(CONFIG_KVM) += kvm.o +obj-y += entry.o fpu.o diff --git a/arch/loongarch/kvm/csr.c b/arch/loongarch/kvm/csr.c new file mode 100644 index 000000000000..6d2c89eebb15 --- /dev/null +++ b/arch/loongarch/kvm/csr.c @@ -0,0 +1,678 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include "kvmcpu.h" +#include "intc/ls3a_ipi.h" +#include "intc/ls3a_ext_irq.h" +#include "ls_irq.h" +#include "kvm_compat.h" +#include "kvmcsr.h" + +#define CASE_READ_SW_GCSR(csr, regid, csrid) \ + do { \ + if (regid == csrid) { \ + return kvm_read_sw_gcsr(csr, csrid); \ + } \ + } while (0) + +unsigned long _kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + unsigned long val = 0; + + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_MERRCTL); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_MERRINFO1); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_MERRINFO2); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_MERRENTRY); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_MERRERA); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_MERRSAVE); + /* read sw csr when not config pmu to guest */ + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCTRL0); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCTRL1); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCTRL2); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCTRL3); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR0); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR1); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR2); + CASE_READ_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR3); + + val = 0; + if (csrid < 4096) + val = kvm_read_sw_gcsr(csr, csrid); + else + pr_warn_once("Unsupport csrread 0x%x with pc %lx\n", + csrid, vcpu->arch.pc); + + return val; +} + +#define CASE_WRITE_SW_GCSR(csr, regid, csrid, val) \ + do { \ + if (regid == csrid) { \ + kvm_write_sw_gcsr(csr, csrid, val); \ + return ; \ + } \ + } while (0) + +void _kvm_emu_write_csr(struct kvm_vcpu *vcpu, int csrid, + unsigned long val) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_MERRCTL, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_MERRINFO1, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_MERRINFO2, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_MERRENTRY, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_MERRERA, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_MERRSAVE, val); + + /* give pmu register to guest when config perfctrl */ + CASE_WRITE_HW_PMU(vcpu, csr, csrid, KVM_CSR_PERFCTRL0, val); + CASE_WRITE_HW_PMU(vcpu, csr, csrid, KVM_CSR_PERFCTRL1, val); + CASE_WRITE_HW_PMU(vcpu, csr, csrid, KVM_CSR_PERFCTRL2, val); + CASE_WRITE_HW_PMU(vcpu, csr, csrid, KVM_CSR_PERFCTRL3, val); + /* write sw pmu csr if not config ctrl */ + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR0, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR1, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR2, val); + CASE_WRITE_SW_GCSR(csr, csrid, KVM_CSR_PERFCNTR3, val); + + + if (csrid < 4096) + kvm_write_sw_gcsr(csr, csrid, val); + else + pr_warn_once("Unsupport csrwrite 0x%x with pc %lx\n", + csrid, vcpu->arch.pc); +} + +#define CASE_CHANGE_SW_GCSR(csr, regid, csrid, mask, val) \ + do { \ + if (regid == csrid) { \ + kvm_change_sw_gcsr(csr, csrid, mask, val); \ + return ; \ + } \ + } while (0) + +void _kvm_emu_xchg_csr(struct kvm_vcpu *vcpu, int csrid, + unsigned long csr_mask, unsigned long val) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_IMPCTL1, csr_mask, val); + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_MERRCTL, csr_mask, val); + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_MERRINFO1, csr_mask, val); + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_MERRINFO2, csr_mask, val); + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_MERRENTRY, csr_mask, val); + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_MERRERA, csr_mask, val); + CASE_CHANGE_SW_GCSR(csr, csrid, KVM_CSR_MERRSAVE, csr_mask, val); + + if (csrid < 4096) { + unsigned long orig; + + orig = kvm_read_sw_gcsr(csr, csrid); + orig &= ~csr_mask; + orig |= val & csr_mask; + kvm_write_sw_gcsr(csr, csrid, orig); + } + pr_warn_once("Unsupport csrxchg 0x%x with pc %lx\n", + csrid, vcpu->arch.pc); +} + +int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *v, int force) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + GET_HW_GCSR(id, KVM_CSR_CRMD, v); + GET_HW_GCSR(id, KVM_CSR_PRMD, v); + GET_HW_GCSR(id, KVM_CSR_EUEN, v); + GET_HW_GCSR(id, KVM_CSR_MISC, v); + GET_HW_GCSR(id, KVM_CSR_ECFG, v); + GET_HW_GCSR(id, KVM_CSR_ESTAT, v); + GET_HW_GCSR(id, KVM_CSR_ERA, v); + GET_HW_GCSR(id, KVM_CSR_BADV, v); + GET_HW_GCSR(id, KVM_CSR_BADI, v); + GET_HW_GCSR(id, KVM_CSR_EENTRY, v); + GET_HW_GCSR(id, KVM_CSR_TLBIDX, v); + GET_HW_GCSR(id, KVM_CSR_TLBEHI, v); + GET_HW_GCSR(id, KVM_CSR_TLBELO0, v); + GET_HW_GCSR(id, KVM_CSR_TLBELO1, v); + GET_HW_GCSR(id, KVM_CSR_ASID, v); + GET_HW_GCSR(id, KVM_CSR_PGDL, v); + GET_HW_GCSR(id, KVM_CSR_PGDH, v); + GET_HW_GCSR(id, KVM_CSR_PWCTL0, v); + GET_HW_GCSR(id, KVM_CSR_PWCTL1, v); + GET_HW_GCSR(id, KVM_CSR_STLBPGSIZE, v); + GET_HW_GCSR(id, KVM_CSR_RVACFG, v); + GET_HW_GCSR(id, KVM_CSR_CPUID, v); + GET_HW_GCSR(id, KVM_CSR_PRCFG1, v); + GET_HW_GCSR(id, KVM_CSR_PRCFG2, v); + GET_HW_GCSR(id, KVM_CSR_PRCFG3, v); + GET_HW_GCSR(id, KVM_CSR_KS0, v); + GET_HW_GCSR(id, KVM_CSR_KS1, v); + GET_HW_GCSR(id, KVM_CSR_KS2, v); + GET_HW_GCSR(id, KVM_CSR_KS3, v); + GET_HW_GCSR(id, KVM_CSR_KS4, v); + GET_HW_GCSR(id, KVM_CSR_KS5, v); + GET_HW_GCSR(id, KVM_CSR_KS6, v); + GET_HW_GCSR(id, KVM_CSR_KS7, v); + GET_HW_GCSR(id, KVM_CSR_TMID, v); + GET_HW_GCSR(id, KVM_CSR_TCFG, v); + GET_HW_GCSR(id, KVM_CSR_TVAL, v); + GET_HW_GCSR(id, KVM_CSR_CNTC, v); + GET_HW_GCSR(id, KVM_CSR_LLBCTL, v); + GET_HW_GCSR(id, KVM_CSR_TLBRENTRY, v); + GET_HW_GCSR(id, KVM_CSR_TLBRBADV, v); + GET_HW_GCSR(id, KVM_CSR_TLBRERA, v); + GET_HW_GCSR(id, KVM_CSR_TLBRSAVE, v); + GET_HW_GCSR(id, KVM_CSR_TLBRELO0, v); + GET_HW_GCSR(id, KVM_CSR_TLBRELO1, v); + GET_HW_GCSR(id, KVM_CSR_TLBREHI, v); + GET_HW_GCSR(id, KVM_CSR_TLBRPRMD, v); + GET_HW_GCSR(id, KVM_CSR_DMWIN0, v); + GET_HW_GCSR(id, KVM_CSR_DMWIN1, v); + GET_HW_GCSR(id, KVM_CSR_DMWIN2, v); + GET_HW_GCSR(id, KVM_CSR_DMWIN3, v); + GET_HW_GCSR(id, KVM_CSR_MWPS, v); + GET_HW_GCSR(id, KVM_CSR_FWPS, v); + + GET_SW_GCSR(csr, id, KVM_CSR_IMPCTL1, v); + GET_SW_GCSR(csr, id, KVM_CSR_IMPCTL2, v); + GET_SW_GCSR(csr, id, KVM_CSR_MERRCTL, v); + GET_SW_GCSR(csr, id, KVM_CSR_MERRINFO1, v); + GET_SW_GCSR(csr, id, KVM_CSR_MERRINFO2, v); + GET_SW_GCSR(csr, id, KVM_CSR_MERRENTRY, v); + GET_SW_GCSR(csr, id, KVM_CSR_MERRERA, v); + GET_SW_GCSR(csr, id, KVM_CSR_MERRSAVE, v); + GET_SW_GCSR(csr, id, KVM_CSR_CTAG, v); + GET_SW_GCSR(csr, id, KVM_CSR_DEBUG, v); + GET_SW_GCSR(csr, id, KVM_CSR_DERA, v); + GET_SW_GCSR(csr, id, KVM_CSR_DESAVE, v); + + GET_SW_GCSR(csr, id, KVM_CSR_TINTCLR, v); + + if (force && (id < CSR_ALL_SIZE)) { + *v = kvm_read_sw_gcsr(csr, id); + return 0; + } + + return -1; +} + +int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *v, int force) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + int ret; + + SET_HW_GCSR(csr, id, KVM_CSR_CRMD, v); + SET_HW_GCSR(csr, id, KVM_CSR_PRMD, v); + SET_HW_GCSR(csr, id, KVM_CSR_EUEN, v); + SET_HW_GCSR(csr, id, KVM_CSR_MISC, v); + SET_HW_GCSR(csr, id, KVM_CSR_ECFG, v); + SET_HW_GCSR(csr, id, KVM_CSR_ERA, v); + SET_HW_GCSR(csr, id, KVM_CSR_BADV, v); + SET_HW_GCSR(csr, id, KVM_CSR_BADI, v); + SET_HW_GCSR(csr, id, KVM_CSR_EENTRY, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBIDX, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBEHI, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBELO0, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBELO1, v); + SET_HW_GCSR(csr, id, KVM_CSR_ASID, v); + SET_HW_GCSR(csr, id, KVM_CSR_PGDL, v); + SET_HW_GCSR(csr, id, KVM_CSR_PGDH, v); + SET_HW_GCSR(csr, id, KVM_CSR_PWCTL0, v); + SET_HW_GCSR(csr, id, KVM_CSR_PWCTL1, v); + SET_HW_GCSR(csr, id, KVM_CSR_STLBPGSIZE, v); + SET_HW_GCSR(csr, id, KVM_CSR_RVACFG, v); + SET_HW_GCSR(csr, id, KVM_CSR_CPUID, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS0, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS1, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS2, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS3, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS4, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS5, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS6, v); + SET_HW_GCSR(csr, id, KVM_CSR_KS7, v); + SET_HW_GCSR(csr, id, KVM_CSR_TMID, v); + SET_HW_GCSR(csr, id, KVM_CSR_TCFG, v); + SET_HW_GCSR(csr, id, KVM_CSR_TVAL, v); + SET_HW_GCSR(csr, id, KVM_CSR_CNTC, v); + SET_HW_GCSR(csr, id, KVM_CSR_LLBCTL, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRENTRY, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRBADV, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRERA, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRSAVE, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRELO0, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRELO1, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBREHI, v); + SET_HW_GCSR(csr, id, KVM_CSR_TLBRPRMD, v); + SET_HW_GCSR(csr, id, KVM_CSR_DMWIN0, v); + SET_HW_GCSR(csr, id, KVM_CSR_DMWIN1, v); + SET_HW_GCSR(csr, id, KVM_CSR_DMWIN2, v); + SET_HW_GCSR(csr, id, KVM_CSR_DMWIN3, v); + SET_HW_GCSR(csr, id, KVM_CSR_MWPS, v); + SET_HW_GCSR(csr, id, KVM_CSR_FWPS, v); + + SET_SW_GCSR(csr, id, KVM_CSR_IMPCTL1, v); + SET_SW_GCSR(csr, id, KVM_CSR_IMPCTL2, v); + SET_SW_GCSR(csr, id, KVM_CSR_MERRCTL, v); + SET_SW_GCSR(csr, id, KVM_CSR_MERRINFO1, v); + SET_SW_GCSR(csr, id, KVM_CSR_MERRINFO2, v); + SET_SW_GCSR(csr, id, KVM_CSR_MERRENTRY, v); + SET_SW_GCSR(csr, id, KVM_CSR_MERRERA, v); + SET_SW_GCSR(csr, id, KVM_CSR_MERRSAVE, v); + SET_SW_GCSR(csr, id, KVM_CSR_CTAG, v); + SET_SW_GCSR(csr, id, KVM_CSR_DEBUG, v); + SET_SW_GCSR(csr, id, KVM_CSR_DERA, v); + SET_SW_GCSR(csr, id, KVM_CSR_DESAVE, v); + SET_SW_GCSR(csr, id, KVM_CSR_PRCFG1, v); + SET_SW_GCSR(csr, id, KVM_CSR_PRCFG2, v); + SET_SW_GCSR(csr, id, KVM_CSR_PRCFG3, v); + + SET_SW_GCSR(csr, id, KVM_CSR_PGD, v); + SET_SW_GCSR(csr, id, KVM_CSR_TINTCLR, v); + + ret = -1; + switch (id) { + case KVM_CSR_ESTAT: + kvm_write_gcsr_estat(*v); + /* estat IP0~IP7 inject through guestexcept */ + kvm_write_csr_gintc(((*v) >> 2) & 0xff); + ret = 0; + break; + default: + if (force && (id < CSR_ALL_SIZE)) { + kvm_set_sw_gcsr(csr, id, *v); + ret = 0; + } + break; + } + + return ret; +} + +struct kvm_iocsr { + u32 start, end; + int (*get) (struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, u64 *res); + int (*set) (struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, u64 val); +}; + +static struct kvm_iocsr_entry *_kvm_find_iocsr(struct kvm *kvm, u32 addr) +{ + int i = 0; + + for (i = 0; i < IOCSR_MAX; i++) { + if (addr == kvm->arch.iocsr[i].addr) + return &kvm->arch.iocsr[i]; + } + + return NULL; +} + +static int kvm_iocsr_common_get(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 addr, u64 *res) +{ + int r = EMULATE_FAIL; + struct kvm_iocsr_entry *entry; + + spin_lock(&vcpu->kvm->arch.iocsr_lock); + entry = _kvm_find_iocsr(vcpu->kvm, addr); + if (entry) { + r = EMULATE_DONE; + *res = entry->data; + } + spin_unlock(&vcpu->kvm->arch.iocsr_lock); + return r; +} + +static int kvm_iocsr_common_set(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 addr, u64 val) +{ + int r = EMULATE_FAIL; + struct kvm_iocsr_entry *entry; + + spin_lock(&vcpu->kvm->arch.iocsr_lock); + entry = _kvm_find_iocsr(vcpu->kvm, addr); + if (entry) { + r = EMULATE_DONE; + entry->data = val; + } + spin_unlock(&vcpu->kvm->arch.iocsr_lock); + return r; +} + +static int kvm_misc_set(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, + u64 val) +{ + return kvm_iocsr_common_set(run, vcpu, addr, val); +} + +static int kvm_ipi_get(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, + u64 *res) +{ + int ret; + + ++vcpu->stat.rdcsr_ipi_access_exits; + run->mmio.phys_addr = KVM_IPI_REG_ADDRESS(vcpu->vcpu_id, (addr & 0xff)); + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, + run->mmio.len, res); + if (ret) { + run->mmio.is_write = 0; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 0; + return EMULATE_DO_MMIO; + } + return EMULATE_DONE; +} + +static int kvm_extioi_isr_get(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 addr, u64 *res) +{ + int ret; + + run->mmio.phys_addr = EXTIOI_PERCORE_ADDR(vcpu->vcpu_id, (addr & 0xff)); + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, + run->mmio.len, res); + if (ret) { + run->mmio.is_write = 0; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 0; + return EMULATE_FAIL; + } + + return EMULATE_DONE; +} + +static int kvm_ipi_set(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, + u64 val) +{ + int ret; + + run->mmio.phys_addr = KVM_IPI_REG_ADDRESS(vcpu->vcpu_id, (addr & 0xff)); + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, + run->mmio.len, &val); + if (ret < 0) { + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + return EMULATE_DO_MMIO; + } + + return EMULATE_DONE; +} + +static int kvm_extioi_set(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, + u64 val) +{ + int ret; + + if ((addr & 0x1f00) == KVM_IOCSR_EXTIOI_ISR_BASE) { + run->mmio.phys_addr = EXTIOI_PERCORE_ADDR(vcpu->vcpu_id, (addr & 0xff)); + } else { + run->mmio.phys_addr = EXTIOI_ADDR((addr & 0x1fff)); + } + + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, + run->mmio.len, &val); + if (ret < 0) { + memcpy(run->mmio.data, &val, run->mmio.len); + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + return EMULATE_DO_MMIO; + } + + return EMULATE_DONE; +} + +static int kvm_nop_set(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 addr, + u64 val) +{ + return EMULATE_DONE; +} + +/* we put these iocsrs with access frequency, from high to low */ +static struct kvm_iocsr kvm_iocsrs[] = { + /* extioi iocsr */ + {KVM_IOCSR_EXTIOI_EN_BASE, KVM_IOCSR_EXTIOI_EN_BASE + 0x100, + NULL, kvm_extioi_set}, + {KVM_IOCSR_EXTIOI_NODEMAP_BASE, KVM_IOCSR_EXTIOI_NODEMAP_BASE+0x28, + NULL, kvm_extioi_set}, + {KVM_IOCSR_EXTIOI_ROUTE_BASE, KVM_IOCSR_EXTIOI_ROUTE_BASE + 0x100, + NULL, kvm_extioi_set}, + {KVM_IOCSR_EXTIOI_ISR_BASE, KVM_IOCSR_EXTIOI_ISR_BASE + 0x1c, + kvm_extioi_isr_get, kvm_extioi_set}, + + {KVM_IOCSR_IPI_STATUS, KVM_IOCSR_IPI_STATUS + 0x40, + kvm_ipi_get, kvm_ipi_set}, + {KVM_IOCSR_IPI_SEND, KVM_IOCSR_IPI_SEND + 0x1, + NULL, kvm_ipi_set}, + {KVM_IOCSR_MBUF_SEND, KVM_IOCSR_MBUF_SEND + 0x1, + NULL, kvm_ipi_set}, + + {KVM_IOCSR_FEATURES, KVM_IOCSR_FEATURES + 0x1, + kvm_iocsr_common_get, kvm_nop_set}, + {KVM_IOCSR_VENDOR, KVM_IOCSR_VENDOR + 0x1, + kvm_iocsr_common_get, kvm_nop_set}, + {KVM_IOCSR_CPUNAME, KVM_IOCSR_CPUNAME + 0x1, + kvm_iocsr_common_get, kvm_nop_set}, + {KVM_IOCSR_NODECNT, KVM_IOCSR_NODECNT + 0x1, + kvm_iocsr_common_get, kvm_nop_set}, + {KVM_IOCSR_MISC_FUNC, KVM_IOCSR_MISC_FUNC + 0x1, + kvm_iocsr_common_get, kvm_misc_set}, +}; + +static int _kvm_emu_iocsr_read(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 addr, u64 *res) +{ + enum emulation_result er = EMULATE_FAIL; + int i = 0; + struct kvm_iocsr *iocsr = NULL; + + if (!irqchip_in_kernel(vcpu->kvm)) { + run->iocsr_io.len = run->mmio.len; + run->iocsr_io.phys_addr = addr; + run->iocsr_io.is_write = 0; + return EMULATE_DO_IOCSR; + } + for (i = 0; i < sizeof(kvm_iocsrs) / sizeof(struct kvm_iocsr); i++) { + iocsr = &kvm_iocsrs[i]; + if (addr >= iocsr->start && addr < iocsr->end) { + if (iocsr->get) + er = iocsr->get(run, vcpu, addr, res); + } + } + + if (er != EMULATE_DONE) + kvm_debug("%s iocsr 0x%x not support in kvm\n", __func__, addr); + + return er; +} + +static int _kvm_emu_iocsr_write(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 addr, u64 val) +{ + enum emulation_result er = EMULATE_FAIL; + int i = 0; + struct kvm_iocsr *iocsr = NULL; + + if (!irqchip_in_kernel(vcpu->kvm)) { + run->iocsr_io.len = run->mmio.len; + memcpy(run->iocsr_io.data, &val, run->iocsr_io.len); + run->iocsr_io.phys_addr = addr; + run->iocsr_io.is_write = 1; + return EMULATE_DO_IOCSR; + } + for (i = 0; i < sizeof(kvm_iocsrs) / sizeof(struct kvm_iocsr); i++) { + iocsr = &kvm_iocsrs[i]; + if (addr >= iocsr->start && addr < iocsr->end) { + if (iocsr->set) + er = iocsr->set(run, vcpu, addr, val); + } + } + if (er != EMULATE_DONE) + kvm_debug("%s iocsr 0x%x not support in kvm\n", __func__, addr); + + return er; +} + +/* all iocsr operation should in kvm, no mmio */ +int _kvm_emu_iocsr(larch_inst inst, + struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 rd, rj, opcode; + u32 val; + u64 res = 0; + int ret; + + /* + * Each IOCSR with different opcode + */ + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + opcode = inst.reg2_format.opcode; + val = vcpu->arch.gprs[rj]; + res = vcpu->arch.gprs[rd]; + /* LoongArch is Little endian */ + switch (opcode) { + case iocsrrdb_op: + run->mmio.len = 1; + ret = _kvm_emu_iocsr_read(run, vcpu, val, &res); + vcpu->arch.gprs[rd] = (u8) res; + break; + case iocsrrdh_op: + run->mmio.len = 2; + ret = _kvm_emu_iocsr_read(run, vcpu, val, &res); + vcpu->arch.gprs[rd] = (u16) res; + break; + case iocsrrdw_op: + run->mmio.len = 4; + ret = _kvm_emu_iocsr_read(run, vcpu, val, &res); + vcpu->arch.gprs[rd] = (u32) res; + break; + case iocsrrdd_op: + run->mmio.len = 8; + ret = _kvm_emu_iocsr_read(run, vcpu, val, &res); + vcpu->arch.gprs[rd] = res; + break; + case iocsrwrb_op: + run->mmio.len = 1; + ret = _kvm_emu_iocsr_write(run, vcpu, val, (u8)res); + break; + case iocsrwrh_op: + run->mmio.len = 2; + ret = _kvm_emu_iocsr_write(run, vcpu, val, (u16)res); + break; + case iocsrwrw_op: + run->mmio.len = 4; + ret = _kvm_emu_iocsr_write(run, vcpu, val, (u32)res); + break; + case iocsrwrd_op: + run->mmio.len = 8; + ret = _kvm_emu_iocsr_write(run, vcpu, val, res); + break; + default: + ret = EMULATE_FAIL; + break; + } + + if (ret == EMULATE_DO_IOCSR) { + vcpu->arch.io_gpr = rd; + } + + return ret; +} + +int _kvm_complete_iocsr_read(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; + enum emulation_result er = EMULATE_DONE; + + switch (run->iocsr_io.len) { + case 8: + *gpr = *(s64 *)run->iocsr_io.data; + break; + case 4: + *gpr = *(int *)run->iocsr_io.data; + break; + case 2: + *gpr = *(short *)run->iocsr_io.data; + break; + case 1: + *gpr = *(char *) run->iocsr_io.data; + break; + default: + kvm_err("Bad IOCSR length: %d,addr is 0x%lx", + run->iocsr_io.len, vcpu->arch.badv); + er = EMULATE_FAIL; + break; + } + + return er; +} + +int _kvm_get_iocsr(struct kvm *kvm, struct kvm_iocsr_entry *__user argp) +{ + struct kvm_iocsr_entry *entry, tmp; + int r = -EFAULT; + + if (copy_from_user(&tmp, argp, sizeof(tmp))) + goto out; + + spin_lock(&kvm->arch.iocsr_lock); + entry = _kvm_find_iocsr(kvm, tmp.addr); + if (entry != NULL) + tmp.data = entry->data; + spin_unlock(&kvm->arch.iocsr_lock); + + if (entry) + r = copy_to_user(argp, &tmp, sizeof(tmp)); + +out: + return r; +} + +int _kvm_set_iocsr(struct kvm *kvm, struct kvm_iocsr_entry *__user argp) +{ + struct kvm_iocsr_entry *entry, tmp; + int r = -EFAULT; + + if (copy_from_user(&tmp, argp, sizeof(tmp))) + goto out; + + spin_lock(&kvm->arch.iocsr_lock); + entry = _kvm_find_iocsr(kvm, tmp.addr); + if (entry != NULL) { + r = 0; + entry->data = tmp.data; + } + spin_unlock(&kvm->arch.iocsr_lock); + +out: + return r; +} + +static struct kvm_iocsr_entry iocsr_array[IOCSR_MAX] = { + {KVM_IOCSR_FEATURES, .data = KVM_IOCSRF_NODECNT|KVM_IOCSRF_MSI + |KVM_IOCSRF_EXTIOI|KVM_IOCSRF_CSRIPI|KVM_IOCSRF_VM}, + {KVM_IOCSR_VENDOR, .data = 0x6e6f73676e6f6f4c}, /* Loongson */ + {KVM_IOCSR_CPUNAME, .data = 0x303030354133}, /* 3A5000 */ + {KVM_IOCSR_NODECNT, .data = 0x4}, + {KVM_IOCSR_MISC_FUNC, .data = 0x0}, +}; + +int _kvm_init_iocsr(struct kvm *kvm) +{ + int i = 0; + + spin_lock_init(&kvm->arch.iocsr_lock); + for (i = 0; i < IOCSR_MAX; i++) { + kvm->arch.iocsr[i].addr = iocsr_array[i].addr; + kvm->arch.iocsr[i].data = iocsr_array[i].data; + } + return 0; +} diff --git a/arch/loongarch/kvm/emulate.c b/arch/loongarch/kvm/emulate.c new file mode 100644 index 000000000000..43ba659ef03c --- /dev/null +++ b/arch/loongarch/kvm/emulate.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kvmcpu.h" +#include "trace.h" + +int _kvm_emu_idle(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.idle_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_IDLE); + if (!vcpu->arch.irq_pending) { + kvm_save_timer(vcpu); + kvm_vcpu_block(vcpu); + + /* + * We we are runnable, then definitely go off to user space to + * check if any I/O interrupts are pending. + */ + if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + } + } + + if (kvm_check_request(KVM_REQ_EVENT, vcpu)) { + vcpu->arch.pv.pv_unhalted = false; + } + + return EMULATE_DONE; +} + +int _kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst) +{ + struct kvm_run *run = vcpu->run; + unsigned int rd, op8, opcode; + unsigned long rd_val = 0; + void *data = run->mmio.data; + unsigned long curr_pc; + int ret = 0; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + update_pc(&vcpu->arch); + + op8 = (inst.word >> 24) & 0xff; + run->mmio.phys_addr = vcpu->arch.badv; + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + goto out_fail; + + if (op8 < 0x28) { + /* stptrw/d process */ + rd = inst.reg2i14_format.rd; + opcode = inst.reg2i14_format.opcode; + + switch (opcode) { + case stptrd_op: + run->mmio.len = 8; + *(unsigned long *)data = vcpu->arch.gprs[rd]; + break; + case stptrw_op: + run->mmio.len = 4; + *(unsigned int *)data = vcpu->arch.gprs[rd]; + break; + default: + break; + } + } else if (op8 < 0x30) { + /* st.b/h/w/d process */ + rd = inst.reg2i12_format.rd; + opcode = inst.reg2i12_format.opcode; + rd_val = vcpu->arch.gprs[rd]; + + switch (opcode) { + case std_op: + run->mmio.len = 8; + *(unsigned long *)data = rd_val; + break; + case stw_op: + run->mmio.len = 4; + *(unsigned int *)data = rd_val; + break; + case sth_op: + run->mmio.len = 2; + *(unsigned short *)data = rd_val; + break; + case stb_op: + run->mmio.len = 1; + *(unsigned char *)data = rd_val; + break; + default: + kvm_err("Store not yet supporded (inst=0x%08x)\n", + inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + goto out_fail; + } + } else if (op8 == 0x38) { + /* stxb/h/w/d process */ + rd = inst.reg3_format.rd; + opcode = inst.reg3_format.opcode; + + switch (opcode) { + case stxb_op: + run->mmio.len = 1; + *(unsigned char *)data = vcpu->arch.gprs[rd]; + break; + case stxh_op: + run->mmio.len = 2; + *(unsigned short *)data = vcpu->arch.gprs[rd]; + break; + case stxw_op: + run->mmio.len = 4; + *(unsigned int *)data = vcpu->arch.gprs[rd]; + break; + case stxd_op: + run->mmio.len = 8; + *(unsigned long *)data = vcpu->arch.gprs[rd]; + break; + default: + kvm_err("Store not yet supporded (inst=0x%08x)\n", + inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + goto out_fail; + } + } else { + kvm_err("Store not yet supporded (inst=0x%08x)\n", + inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + goto out_fail; + } + + /* All MMIO emulate in kernel go through the common interface */ + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, + run->mmio.len, data); + if (!ret) { + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + + return EMULATE_DO_MMIO; + +out_fail: + /* Rollback PC if emulation was unsuccessful */ + vcpu->arch.pc = curr_pc; + return EMULATE_FAIL; +} + + +int _kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst) +{ + unsigned int op8, opcode, rd; + int ret = 0; + struct kvm_run *run = vcpu->run; + + run->mmio.phys_addr = vcpu->arch.badv; + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + return EMULATE_FAIL; + + vcpu->mmio_needed = 2; /* signed */ + op8 = (inst.word >> 24) & 0xff; + + if (op8 < 0x28) { + /* ldptr.w/d process */ + rd = inst.reg2i14_format.rd; + opcode = inst.reg2i14_format.opcode; + + switch (opcode) { + case ldptrd_op: + run->mmio.len = 8; + break; + case ldptrw_op: + run->mmio.len = 4; + break; + default: + break; + } + } else if (op8 < 0x2f) { + /* ld.b/h/w/d, ld.bu/hu/wu process */ + rd = inst.reg2i12_format.rd; + opcode = inst.reg2i12_format.opcode; + + switch (opcode) { + case ldd_op: + run->mmio.len = 8; + break; + case ldwu_op: + vcpu->mmio_needed = 1; /* unsigned */ + run->mmio.len = 4; + break; + case ldw_op: + run->mmio.len = 4; + break; + case ldhu_op: + vcpu->mmio_needed = 1; /* unsigned */ + run->mmio.len = 2; + break; + case ldh_op: + run->mmio.len = 2; + break; + case ldbu_op: + vcpu->mmio_needed = 1; /* unsigned */ + run->mmio.len = 1; + break; + case ldb_op: + run->mmio.len = 1; + break; + default: + kvm_err("Load not yet supporded (inst=0x%08x)\n", + inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->mmio_needed = 0; + return EMULATE_FAIL; + } + } else if (op8 == 0x38) { + /* ldxb/h/w/d, ldxb/h/wu, ldgtb/h/w/d, ldleb/h/w/d process */ + rd = inst.reg3_format.rd; + opcode = inst.reg3_format.opcode; + + switch (opcode) { + case ldxb_op: + run->mmio.len = 1; + break; + case ldxbu_op: + run->mmio.len = 1; + vcpu->mmio_needed = 1; /* unsigned */ + break; + case ldxh_op: + run->mmio.len = 2; + break; + case ldxhu_op: + run->mmio.len = 2; + vcpu->mmio_needed = 1; /* unsigned */ + break; + case ldxw_op: + run->mmio.len = 4; + break; + case ldxwu_op: + run->mmio.len = 4; + vcpu->mmio_needed = 1; /* unsigned */ + break; + case ldxd_op: + run->mmio.len = 8; + break; + default: + kvm_err("Load not yet supporded (inst=0x%08x)\n", + inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->mmio_needed = 0; + return EMULATE_FAIL; + } + } else { + kvm_err("Load not yet supporded (inst=0x%08x) @ %lx\n", + inst.word, vcpu->arch.pc); + vcpu->mmio_needed = 0; + return EMULATE_FAIL; + } + + /* Set for _kvm_complete_mmio_read use */ + vcpu->arch.io_gpr = rd; + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, + run->mmio.len, run->mmio.data); + run->mmio.is_write = 0; + vcpu->mmio_is_write = 0; + + if (!ret) { + _kvm_complete_mmio_read(vcpu, run); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + return EMULATE_DO_MMIO; +} + +int _kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr]; + enum emulation_result er = EMULATE_DONE; + + /* update with new PC */ + update_pc(&vcpu->arch); + switch (run->mmio.len) { + case 8: + *gpr = *(s64 *)run->mmio.data; + break; + + case 4: + if (vcpu->mmio_needed == 2) { + *gpr = *(int *)run->mmio.data; + } else + *gpr = *(unsigned int *)run->mmio.data; + break; + + case 2: + if (vcpu->mmio_needed == 2) + *gpr = *(short *) run->mmio.data; + else + *gpr = *(unsigned short *)run->mmio.data; + + break; + case 1: + if (vcpu->mmio_needed == 2) + *gpr = *(char *) run->mmio.data; + else + *gpr = *(unsigned char *) run->mmio.data; + break; + default: + kvm_err("Bad MMIO length: %d,addr is 0x%lx", + run->mmio.len, vcpu->arch.badv); + er = EMULATE_FAIL; + break; + } + + return er; +} diff --git a/arch/loongarch/kvm/entry.S b/arch/loongarch/kvm/entry.S new file mode 100644 index 000000000000..4bd6d9f213be --- /dev/null +++ b/arch/loongarch/kvm/entry.S @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include "kvm_compat.h" + +#define RESUME_HOST (1 << 1) + +#define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x) +#define PT_GPR_OFFSET(x) (PT_R0 + 8*x) + + .text + +.macro kvm_save_guest_gprs base + .irp n,1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + KVM_LONG_S $r\n, \base, GGPR_OFFSET(\n) + .endr +.endm + +.macro kvm_restore_guest_gprs base + .irp n,1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + KVM_LONG_L $r\n, \base, GGPR_OFFSET(\n) + .endr +.endm + +.macro kvm_save_host_gpr base + .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31 + KVM_LONG_S $r\n, \base, PT_GPR_OFFSET(\n) + .endr +.endm + +.macro kvm_restore_host_gpr base + .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31 + KVM_LONG_L $r\n, \base, PT_GPR_OFFSET(\n) + .endr +.endm + +/* + * prepare switch to guest + * @param: + * KVM_ARCH: kvm_vcpu_arch, don't touch it until 'ertn' + * GPRNUM: KVM_ARCH gpr number + * tmp, tmp1: temp register + */ +.macro kvm_switch_to_guest KVM_ARCH GPRNUM tmp tmp1 + /* set host excfg.VS=0, all exceptions share one exception entry */ + csrrd \tmp, KVM_CSR_ECFG + bstrins.w \tmp, zero, (KVM_ECFG_VS_SHIFT + KVM_ECFG_VS_WIDTH - 1), KVM_ECFG_VS_SHIFT + csrwr \tmp, KVM_CSR_ECFG + + /* Load up the new EENTRY */ + KVM_LONG_L \tmp, \KVM_ARCH, KVM_ARCH_GEENTRY + csrwr \tmp, KVM_CSR_EENTRY + + /* Set Guest ERA */ + KVM_LONG_L \tmp, \KVM_ARCH, KVM_ARCH_GPC + csrwr \tmp, KVM_CSR_ERA + + /* Save host PGDL */ + csrrd \tmp, KVM_CSR_PGDL + KVM_LONG_S \tmp, \KVM_ARCH, KVM_ARCH_HPGD + + /* Switch to kvm */ + KVM_LONG_L \tmp1, \KVM_ARCH, KVM_VCPU_KVM - KVM_VCPU_ARCH + + /* Load guest PGDL */ + lu12i.w \tmp, KVM_GPGD + srli.w \tmp, \tmp, 12 + ldx.d \tmp, \tmp1, \tmp + csrwr \tmp, KVM_CSR_PGDL + + /* Mix GID and RID */ + csrrd \tmp1, KVM_CSR_GSTAT + bstrpick.w \tmp1, \tmp1, (KVM_GSTAT_GID_SHIFT + KVM_GSTAT_GID_WIDTH - 1), KVM_GSTAT_GID_SHIFT + csrrd \tmp, KVM_CSR_GTLBC + bstrins.w \tmp, \tmp1, (KVM_GTLBC_TGID_SHIFT + KVM_GTLBC_TGID_WIDTH - 1), KVM_GTLBC_TGID_SHIFT + csrwr \tmp, KVM_CSR_GTLBC + + /* + * Switch to guest: + * GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0 + * ertn + */ + + /* + * Enable intr in root mode with future ertn so that host interrupt + * can be responsed during VM runs + * guest crmd comes from separate gcsr_CRMD register + */ + ori \tmp, zero, KVM_PRMD_PIE + csrxchg \tmp, \tmp, KVM_CSR_PRMD + + /* Set PVM bit to setup ertn to guest context */ + ori \tmp, zero, KVM_GSTAT_PVM + csrxchg \tmp, \tmp, KVM_CSR_GSTAT + + /* Load Guest gprs */ + kvm_restore_guest_gprs \KVM_ARCH + + /* Load KVM_ARCH register */ + KVM_LONG_L \KVM_ARCH, \KVM_ARCH, GGPR_OFFSET(\GPRNUM) + + ertn +.endm + +#ifndef EXCPTION_ENTRY +#define EXCPTION_ENTRY(name) \ + .globl name ASM_NL \ + .p2align 12; \ + name: \ + .cfi_startproc; +#endif +#ifndef EXCPTION_ENDPROC +#define EXCPTION_ENDPROC(name) \ + .cfi_endproc; \ + SYM_END(name, SYM_T_FUNC) +#endif + +/* load kvm_vcpu to a2 and store a1 for free use */ +EXCPTION_ENTRY(kvm_exception_entry) + csrwr a2, KVM_TEMP_KS + csrrd a2, KVM_VCPU_KS + KVM_LONG_ADDI a2, a2, KVM_VCPU_ARCH + + /* After save gprs, free to use any gpr */ + kvm_save_guest_gprs a2 + /* Save guest a2 */ + csrrd t0, KVM_TEMP_KS + KVM_LONG_S t0, a2, GGPR_OFFSET(REG_A2) + + b kvm_exit_entry +EXCPTION_ENDPROC(kvm_exception_entry) + +/* a2: kvm_vcpu_arch, a1 is free to use */ +SYM_FUNC_START(kvm_exit_entry) + csrrd s1, KVM_VCPU_KS + KVM_LONG_L s0, s1, KVM_VCPU_RUN + + csrrd t0, KVM_CSR_ESTAT + KVM_LONG_S t0, a2, KVM_ARCH_HESTAT + csrrd t0, KVM_CSR_ERA + KVM_LONG_S t0, a2, KVM_ARCH_GPC + csrrd t0, KVM_CSR_BADV + KVM_LONG_S t0, a2, KVM_ARCH_HBADV + csrrd t0, KVM_CSR_BADI + KVM_LONG_S t0, a2, KVM_ARCH_HBADI + + /* Restore host excfg.VS */ + csrrd t0, KVM_CSR_ECFG + KVM_LONG_L t1, a2, KVM_ARCH_HECFG + or t0, t0, t1 + csrwr t0, KVM_CSR_ECFG + + /* Restore host eentry */ + KVM_LONG_L t0, a2, KVM_ARCH_HEENTRY + csrwr t0, KVM_CSR_EENTRY + + /* restore host pgd table */ + KVM_LONG_L t0, a2, KVM_ARCH_HPGD + csrwr t0, KVM_CSR_PGDL + + /* + * Disable PGM bit to enter root mode by default with next ertn + */ + ori t0, zero, KVM_GSTAT_PVM + csrxchg zero, t0, KVM_CSR_GSTAT + + /* + * Clear GTLBC.TGID field + * 0: for root tlb update in future tlb instr + * others: for guest tlb update like gpa to hpa in future tlb instr + */ + csrrd t0, KVM_CSR_GTLBC + bstrins.w t0, zero, KVM_GTLBC_TGID_SHIFT + KVM_GTLBC_TGID_WIDTH - 1, KVM_GTLBC_TGID_SHIFT + csrwr t0, KVM_CSR_GTLBC + + KVM_LONG_L tp, a2, KVM_ARCH_HGP + KVM_LONG_L sp, a2, KVM_ARCH_HSTACK + /* Restore per cpu base register */ + KVM_LONG_L $r21, a2, KVM_ARCH_HPERCPU + + KVM_LONG_ADDI sp, sp, -PT_SIZE + + /* Prepare handle exception */ + or a0, s0, zero + or a1, s1, zero + KVM_LONG_L t8, a2, KVM_ARCH_HANDLE_EXIT + jirl ra,t8, 0 + + or a2, s1, zero + KVM_LONG_ADDI a2, a2, KVM_VCPU_ARCH + + andi t0, a0, RESUME_HOST + bnez t0, ret_to_host + INT_S zero, a2, KVM_ARCH_ISHYPCALL + +ret_to_guest: + /* Save per cpu register again, maybe switched to another cpu */ + KVM_LONG_S $r21, a2, KVM_ARCH_HPERCPU + + /* Save kvm_vcpu to kscratch */ + csrwr s1, KVM_VCPU_KS + kvm_switch_to_guest a2 REG_A2 t0 t1 + +ret_to_host: + KVM_LONG_L a2, a2, KVM_ARCH_HSTACK + addi.d a2, a2, -PT_SIZE + srai.w a3, a0, 2 + or a0, a3, zero + kvm_restore_host_gpr a2 + jirl zero, ra, 0 +SYM_FUNC_END(kvm_exit_entry) + +/* + * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu) + * + * @register_param: + * a0: kvm_run* run + * a1: kvm_vcpu* vcpu + */ +SYM_FUNC_START(kvm_enter_guest) + /* allocate space in stack bottom */ + KVM_LONG_ADDI a2, sp, -PT_SIZE + + /* save host gprs */ + kvm_save_host_gpr a2 + + /* save host crmd,prmd csr to stack */ + csrrd a3, KVM_CSR_CRMD + KVM_LONG_S a3, a2, PT_CRMD + csrrd a3, KVM_CSR_PRMD + KVM_LONG_S a3, a2, PT_PRMD + + KVM_LONG_ADDI a2, a1, KVM_VCPU_ARCH + KVM_LONG_S sp, a2, KVM_ARCH_HSTACK + KVM_LONG_S tp, a2, KVM_ARCH_HGP + /* Save per cpu base register */ + KVM_LONG_S $r21, a2, KVM_ARCH_HPERCPU + + /* Save kvm_vcpu to kscratch */ + csrwr a1, KVM_VCPU_KS + + kvm_switch_to_guest a2 REG_A2 t0 t1 + +SYM_FUNC_END(kvm_enter_guest) + +SYM_FUNC_START(__kvm_save_fpu) + fpu_save_csr a0 t1 + fpu_save_double a0 t1 + fpu_save_cc a0 t1 t2 + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_save_fpu) + +SYM_FUNC_START(__kvm_restore_fpu) + fpu_restore_double a0 t1 # clobbers t1 + fpu_restore_cc a0 t1 t2 # clobbers t1, t2 + fpu_restore_csr a0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_restore_fpu) + +#ifdef CONFIG_CPU_HAS_LSX +SYM_FUNC_START(__kvm_save_lsx) + fpu_save_csr a0 t1 + fpu_save_cc a0 t1 t2 + lsx_save_data a0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_save_lsx) + +SYM_FUNC_START(__kvm_restore_lsx) + lsx_restore_data a0 t1 + fpu_restore_cc a0 t1 t2 # clobbers t1, t2 + fpu_restore_csr a0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_restore_lsx) + +SYM_FUNC_START(__kvm_restore_lsx_upper) + lsx_restore_all_upper a0 t0 t1 + + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_restore_lsx_upper) +#endif + +#ifdef CONFIG_CPU_HAS_LASX +SYM_FUNC_START(__kvm_save_lasx) + fpu_save_csr a0 t1 + fpu_save_cc a0 t1 t2 + lasx_save_data a0 t1 + + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_save_lasx) + +SYM_FUNC_START(__kvm_restore_lasx) + lasx_restore_data a0 t1 + fpu_restore_cc a0 t1 t2 # clobbers t1, t2 + fpu_restore_csr a0 t1 + jirl zero, ra, 0 +SYM_FUNC_END(__kvm_restore_lasx) +#endif diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c new file mode 100644 index 000000000000..9e2220a64ed3 --- /dev/null +++ b/arch/loongarch/kvm/exit.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kvmcpu.h" +#include + +#include "trace.h" +#include "kvm_compat.h" +#include "kvmcsr.h" +#include "intc/ls3a_ext_irq.h" + +/* + * Loongarch KVM callback handling for not implemented guest exiting + */ +static int _kvm_fault_ni(struct kvm_vcpu *vcpu) +{ + unsigned long estat, badv; + unsigned int exccode, inst; + + /* + * Fetch the instruction. + */ + badv = vcpu->arch.badv; + estat = vcpu->arch.host_estat; + exccode = (estat & KVM_ESTAT_EXC) >> KVM_ESTAT_EXC_SHIFT; + inst = vcpu->arch.badi; + kvm_err("Exccode: %d PC=%#lx inst=0x%08x BadVaddr=%#lx estat=%#llx\n", + exccode, vcpu->arch.pc, inst, badv, kvm_read_gcsr_estat()); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; +} + +static int _kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst) +{ + enum emulation_result er = EMULATE_DONE; + unsigned int rd, rj, csrid; + unsigned long csr_mask; + unsigned long val = 0; + + /* + * CSR value mask imm + * rj = 0 means csrrd + * rj = 1 means csrwr + * rj != 0,1 means csrxchg + */ + rd = inst.reg2csr_format.rd; + rj = inst.reg2csr_format.rj; + csrid = inst.reg2csr_format.csr; + + /* Process CSR ops */ + if (rj == 0) { + /* process csrrd */ + val = _kvm_emu_read_csr(vcpu, csrid); + if (er != EMULATE_FAIL) + vcpu->arch.gprs[rd] = val; + } else if (rj == 1) { + /* process csrwr */ + val = vcpu->arch.gprs[rd]; + _kvm_emu_write_csr(vcpu, csrid, val); + } else { + /* process csrxchg */ + val = vcpu->arch.gprs[rd]; + csr_mask = vcpu->arch.gprs[rj]; + _kvm_emu_xchg_csr(vcpu, csrid, csr_mask, val); + } + + return er; +} + +static int _kvm_emu_cache(struct kvm_vcpu *vcpu, larch_inst inst) +{ + return EMULATE_DONE; +} + +static int _kvm_trap_handle_gspr(struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_run *run = vcpu->run; + larch_inst inst; + unsigned long curr_pc; + int rd, rj; + unsigned int index; + + /* + * Fetch the instruction. + */ + inst.word = vcpu->arch.badi; + curr_pc = vcpu->arch.pc; + update_pc(&vcpu->arch); + + er = EMULATE_FAIL; + switch (((inst.word >> 24) & 0xff)) { + case 0x0: + /* cpucfg GSPR */ + if (inst.reg2_format.opcode == 0x1B) { + rd = inst.reg2_format.rd; + rj = inst.reg2_format.rj; + ++vcpu->stat.cpucfg_exits; + index = vcpu->arch.gprs[rj]; + vcpu->arch.gprs[rd] = vcpu->kvm->arch.cpucfgs.cpucfg[index]; + if ((index == 2) || (vcpu->arch.gprs[rd] == 0)) + /* + * Fallback to get host cpucfg info, this is just for + * compatible with older qemu. + */ + vcpu->arch.gprs[rd] = read_cpucfg(index); + if (index == 2) + /* do not support nested virtualization */ + vcpu->arch.gprs[rd] &= ~CPUCFG2_LVZP; + er = EMULATE_DONE; + } + break; + case 0x4: + /* csr GSPR */ + er = _kvm_handle_csr(vcpu, inst); + break; + case 0x6: + /* iocsr,cacop,idle GSPR */ + switch (((inst.word >> 22) & 0x3ff)) { + case 0x18: + /* cache GSPR */ + er = _kvm_emu_cache(vcpu, inst); + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + break; + case 0x19: + /* iocsr/idle GSPR */ + switch (((inst.word >> 15) & 0x1ffff)) { + case 0xc90: + /* iocsr GSPR */ + er = _kvm_emu_iocsr(inst, run, vcpu); + break; + case idle_op: + /* idle GSPR */ + er = _kvm_emu_idle(vcpu); + break; + default: + er = EMULATE_FAIL; + break; + } + break; + default: + er = EMULATE_FAIL; + break; + } + break; + default: + er = EMULATE_FAIL; + break; + } + + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) { + kvm_err("[%#lx]%s: unsupported gspr instruction 0x%08x\n", + curr_pc, __func__, inst.word); + + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->arch.pc = curr_pc; + } + return er; +} + +static int _kvm_check_hypcall(struct kvm_vcpu *vcpu) +{ + enum emulation_result ret; + larch_inst inst; + unsigned long curr_pc; + unsigned int code; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + inst.word = vcpu->arch.badi; + code = inst.reg0i15_format.simmediate; + curr_pc = vcpu->arch.pc; + update_pc(&vcpu->arch); + + ret = EMULATE_DONE; + switch (code) { + case KVM_HC_CODE_SERIVCE: + ret = EMULATE_PV_HYPERCALL; + break; + case KVM_HC_CODE_SWDBG: + /* + * Only SWDBG(SoftWare DeBug) could stop vm + * code other than 0 is ignored. + */ + ret = EMULATE_DEBUG; + break; + default: + kvm_info("[%#lx] HYPCALL %#03x unsupported\n", vcpu->arch.pc, code); + break; + } + + if (ret == EMULATE_DEBUG) + vcpu->arch.pc = curr_pc; + + return ret; +} + +/* Execute cpucfg instruction will tirggerGSPR, + * Also the access to unimplemented csrs 0x15 + * 0x16, 0x50~0x53, 0x80, 0x81, 0x90~0x95, 0x98 + * 0xc0~0xff, 0x100~0x109, 0x500~0x502, + * cacop_op, idle_op iocsr ops the same */ +static int _kvm_handle_gspr(struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + vcpu->arch.is_hypcall = 0; + + er = _kvm_trap_handle_gspr(vcpu); + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + vcpu->run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else if (er == EMULATE_DO_IOCSR) { + vcpu->run->exit_reason = KVM_EXIT_LOONGARCH_IOCSR; + ret = RESUME_HOST; + } else { + kvm_err("%s internal error\n", __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int _kvm_handle_hypcall(struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + vcpu->arch.is_hypcall = 0; + er = _kvm_check_hypcall(vcpu); + + if (er == EMULATE_PV_HYPERCALL) + ret = _kvm_handle_pv_hcall(vcpu); + else if (er == EMULATE_DEBUG) { + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + ret = RESUME_HOST; + } else + ret = RESUME_GUEST; + + return ret; +} + +static int _kvm_handle_gcm(struct kvm_vcpu *vcpu) +{ + int ret, subcode; + + vcpu->arch.is_hypcall = 0; + ret = RESUME_GUEST; + subcode = (vcpu->arch.host_estat & KVM_ESTAT_ESUBCODE) >> KVM_ESTAT_ESUBCODE_SHIFT; + if ((subcode != EXCSUBCODE_GCSC) && (subcode != EXCSUBCODE_GCHC)) { + kvm_err("%s internal error\n", __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + + return ret; +} + +/** + * _kvm_handle_fpu_disabled() - Guest used fpu however it is disabled at host + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use fpu which hasn't been allowed + * by the root context. + */ +static int _kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + /* + * If guest FPU not present, the FPU operation should have been + * treated as a reserved instruction! + * If FPU already in use, we shouldn't get this at all. + */ + if (WARN_ON(!_kvm_guest_has_fpu(&vcpu->arch) || + vcpu->arch.aux_inuse & KVM_LARCH_FPU)) { + kvm_err("%s internal error\n", __func__); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_fpu(vcpu); + return RESUME_GUEST; +} + +/** + * _kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use LSX when it is disabled in the root + * context. + */ +static int _kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + /* + * If LSX not present or not exposed to guest, the LSX operation + * should have been treated as a reserved instruction! + * If LSX already in use, we shouldn't get this at all. + */ + if (!_kvm_guest_has_lsx(&vcpu->arch) || + !(kvm_read_gcsr_euen() & KVM_EUEN_LSXEN) || + vcpu->arch.aux_inuse & KVM_LARCH_LSX) { + kvm_err("%s internal error, lsx %d guest euen %llx aux %x", + __func__, _kvm_guest_has_lsx(&vcpu->arch), + kvm_read_gcsr_euen(), vcpu->arch.aux_inuse); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_lsx(vcpu); + return RESUME_GUEST; +} + +bool _kvm_guest_has_lasx(struct kvm_vcpu *vcpu) +{ + return cpu_has_lasx && vcpu->arch.lsx_enabled && vcpu->kvm->arch.cpucfg_lasx; +} + +/** + * _kvm_handle_lasx_disabled() - Guest used LASX while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use LASX when it is disabled in the root + * context. + */ +static int _kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + /* + * If LASX not present or not exposed to guest, the LASX operation + * should have been treated as a reserved instruction! + * If LASX already in use, we shouldn't get this at all. + */ + if (!_kvm_guest_has_lasx(vcpu) || + !(kvm_read_gcsr_euen() & KVM_EUEN_LSXEN) || + !(kvm_read_gcsr_euen() & KVM_EUEN_LASXEN) || + vcpu->arch.aux_inuse & KVM_LARCH_LASX) { + kvm_err("%s internal error, lasx %d guest euen %llx aux %x", + __func__, _kvm_guest_has_lasx(vcpu), + kvm_read_gcsr_euen(), vcpu->arch.aux_inuse); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_lasx(vcpu); + + return RESUME_GUEST; +} + +/** + * _kvm_handle_fpu_disabled() - Guest used lbt however it is disabled at host + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use lbt which hasn't been allowed + * by the root context. + */ +static int _kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + /* + * If guest LBT not present, the LBT operation should have been + * treated as a reserved instruction! + * If LBT already in use, we shouldn't get this at all. + */ + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) { + kvm_err("%s internal error\n", __func__); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_lbt(vcpu); + return RESUME_GUEST; +} + +static int _kvm_handle_read_fault(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + ulong badv = vcpu->arch.badv; + larch_inst inst; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + if (kvm_handle_mm_fault(vcpu, badv, false)) { + /* A code fetch fault doesn't count as an MMIO */ + if (kvm_is_ifetch_fault(&vcpu->arch)) { + kvm_err("%s ifetch error addr:%lx\n", __func__, badv); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + inst.word = vcpu->arch.badi; + er = _kvm_emu_mmio_read(vcpu, inst); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Load failed: PC: %#lx, BadVaddr: %#lx\n", + vcpu->arch.pc, badv); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int _kvm_handle_write_fault(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + ulong badv = vcpu->arch.badv; + larch_inst inst; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + if (kvm_handle_mm_fault(vcpu, badv, true)) { + + /* Treat as MMIO */ + inst.word = vcpu->arch.badi; + er = _kvm_emu_mmio_write(vcpu, inst); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Store failed: PC: %#lx, BadVaddr: %#lx\n", + vcpu->arch.pc, badv); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int _kvm_handle_debug(struct kvm_vcpu *vcpu) +{ + uint32_t fwps, mwps; + + fwps = kvm_csr_readq(KVM_CSR_FWPS); + mwps = kvm_csr_readq(KVM_CSR_MWPS); + if (fwps & 0xff) + kvm_csr_writeq(fwps, KVM_CSR_FWPS); + if (mwps & 0xff) + kvm_csr_writeq(mwps, KVM_CSR_MWPS); + vcpu->run->debug.arch.exception = EXCCODE_WATCH; + vcpu->run->debug.arch.fwps = fwps; + vcpu->run->debug.arch.mwps = mwps; + vcpu->run->exit_reason = KVM_EXIT_DEBUG; + return RESUME_HOST; +} + +static exit_handle_fn _kvm_fault_tables[EXCCODE_INT_START] = { + [EXCCODE_TLBL] = _kvm_handle_read_fault, + [EXCCODE_TLBS] = _kvm_handle_write_fault, + [EXCCODE_TLBI] = _kvm_handle_read_fault, + [EXCCODE_TLBM] = _kvm_handle_write_fault, + [EXCCODE_TLBNR] = _kvm_handle_read_fault, + [EXCCODE_TLBNX] = _kvm_handle_read_fault, + [EXCCODE_FPDIS] = _kvm_handle_fpu_disabled, + [EXCCODE_LSXDIS] = _kvm_handle_lsx_disabled, + [EXCCODE_LASXDIS] = _kvm_handle_lasx_disabled, + [EXCCODE_WATCH] = _kvm_handle_debug, + [EXCCODE_GSPR] = _kvm_handle_gspr, + [EXCCODE_HVC] = _kvm_handle_hypcall, + [EXCCODE_GCM] = _kvm_handle_gcm, + [EXCCODE_BTDIS] = _kvm_handle_lbt_disabled, +}; + +int _kvm_handle_fault(struct kvm_vcpu *vcpu, int fault) +{ + return _kvm_fault_tables[fault](vcpu); +} + +void _kvm_init_fault(void) +{ + int i; + + for (i = 0; i < EXCCODE_INT_START; i++) + if (!_kvm_fault_tables[i]) + _kvm_fault_tables[i] = _kvm_fault_ni; +} diff --git a/arch/loongarch/kvm/fpu.c b/arch/loongarch/kvm/fpu.c new file mode 100644 index 000000000000..56a7e1ba70b7 --- /dev/null +++ b/arch/loongarch/kvm/fpu.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include + +/* FPU/LSX context management */ +void __kvm_save_fpu(struct loongarch_fpu *fpu); +void __kvm_restore_fpu(struct loongarch_fpu *fpu); + +void kvm_save_fpu(struct kvm_vcpu *cpu) +{ + return __kvm_save_fpu(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_save_fpu); + +void kvm_restore_fpu(struct kvm_vcpu *cpu) +{ + return __kvm_restore_fpu(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_restore_fpu); + +#ifdef CONFIG_CPU_HAS_LSX +void __kvm_save_lsx(struct loongarch_fpu *fpu); +void __kvm_restore_lsx(struct loongarch_fpu *fpu); +void __kvm_restore_lsx_upper(struct loongarch_fpu *fpu); + +void kvm_save_lsx(struct kvm_vcpu *cpu) +{ + return __kvm_save_lsx(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_save_lsx); + +void kvm_restore_lsx(struct kvm_vcpu *cpu) +{ + return __kvm_restore_lsx(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_restore_lsx); + +void kvm_restore_lsx_upper(struct kvm_vcpu *cpu) +{ + return __kvm_restore_lsx_upper(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_restore_lsx_upper); + +#endif + +#ifdef CONFIG_CPU_HAS_LSX +void __kvm_save_lasx(struct loongarch_fpu *fpu); +void __kvm_restore_lasx(struct loongarch_fpu *fpu); +void __kvm_restore_lasx_upper(struct loongarch_fpu *fpu); + +void kvm_save_lasx(struct kvm_vcpu *cpu) +{ + return __kvm_save_lasx(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_save_lasx); + +void kvm_restore_lasx(struct kvm_vcpu *cpu) +{ + return __kvm_restore_lasx(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_restore_lasx); + +void kvm_restore_lasx_upper(struct kvm_vcpu *cpu) +{ + return _restore_lasx_upper(&cpu->arch.fpu); +} +EXPORT_SYMBOL_GPL(kvm_restore_lasx_upper); +#endif + +#ifdef CONFIG_CPU_HAS_LBT +void kvm_restore_lbt(struct kvm_vcpu *cpu) +{ + restore_lbt_registers(&cpu->arch.lbt); +} +EXPORT_SYMBOL_GPL(kvm_restore_lbt); + +void kvm_save_lbt(struct kvm_vcpu *cpu) +{ + save_lbt_registers(&cpu->arch.lbt); +} +EXPORT_SYMBOL_GPL(kvm_save_lbt); +#endif + + +EXPORT_SYMBOL_GPL(kvm_enter_guest); +EXPORT_SYMBOL_GPL(kvm_exception_entry); + diff --git a/arch/loongarch/kvm/hypcall.c b/arch/loongarch/kvm/hypcall.c new file mode 100644 index 000000000000..83b3ef0e8a83 --- /dev/null +++ b/arch/loongarch/kvm/hypcall.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include "intc/ls3a_ipi.h" + +int kvm_virt_ipi(struct kvm_vcpu *vcpu) +{ + int ret = 0; + u64 ipi_bitmap; + unsigned int min, action, cpu; + + ipi_bitmap = vcpu->arch.gprs[REG_A1]; + min = vcpu->arch.gprs[REG_A2]; + action = vcpu->arch.gprs[REG_A3]; + + if (ipi_bitmap) { + cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG); + while (cpu < BITS_PER_LONG) { + kvm_helper_send_ipi(vcpu, cpu + min, action); + cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1); + } + } + + return ret; +} + +int kvm_save_notify(struct kvm_vcpu *vcpu) +{ + unsigned long num, id, data; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + + int ret = 0; + + num = vcpu->arch.gprs[REG_A0]; + id = vcpu->arch.gprs[REG_A1]; + data = vcpu->arch.gprs[REG_A2]; + + switch (id) { + case KVM_FEATURE_STEAL_TIME: + if (!sched_info_on()) + break; + + if (vcpu->arch.st.guest_addr && (data == 0)) + kvm_release_pfn(cache->pfn, cache->dirty, cache); + + vcpu->arch.st.guest_addr = data; + kvm_debug("cpu :%d addr:%lx\n", vcpu->vcpu_id, data); + vcpu->arch.st.last_steal = current->sched_info.run_delay; + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + break; + default: + break; + }; + + return ret; +}; + +static int _kvm_pv_feature(struct kvm_vcpu *vcpu) +{ + int feature = vcpu->arch.gprs[REG_A1]; + int ret = KVM_RET_NOT_SUPPORTED; + switch (feature) { + case KVM_FEATURE_STEAL_TIME: + if (sched_info_on()) + ret = KVM_RET_SUC; + break; + case KVM_FEATURE_MULTI_IPI: + ret = KVM_RET_SUC; + break; + case KVM_FEATURE_PARAVIRT_SPINLOCK: +#ifdef CONFIG_PARAVIRT_SPINLOCKS + ret = KVM_RET_SUC; +#endif + break; + default: + break; + } + return ret; +} + +static int kvm_pv_kick_cpu(struct kvm_vcpu *vcpu) +{ + int cpu = vcpu->arch.gprs[REG_A1]; + struct kvm_vcpu *dst_vcpu = vcpu->kvm->vcpus[cpu]; + int ret = 0; + + dst_vcpu->arch.pv.pv_unhalted = true; + kvm_make_request(KVM_REQ_EVENT, dst_vcpu); + kvm_vcpu_kick(dst_vcpu); + + return ret; +} + +/* + * hypcall emulation always return to guest, Caller should check retval. + */ +int _kvm_handle_pv_hcall(struct kvm_vcpu *vcpu) +{ + unsigned long func = vcpu->arch.gprs[REG_A0]; + int hyp_ret = KVM_RET_NOT_SUPPORTED; + + switch (func) { + case KVM_HC_FUNC_FEATURE: + hyp_ret = _kvm_pv_feature(vcpu); + break; + case KVM_HC_FUNC_NOTIFY: + hyp_ret = kvm_save_notify(vcpu); + break; + case KVM_HC_FUNC_IPI: + hyp_ret = kvm_virt_ipi(vcpu); + break; + case KVM_HC_KICK_CPU: + hyp_ret = kvm_pv_kick_cpu(vcpu); + break; + default: + kvm_info("[%#lx] hvc func:%#lx unsupported\n", vcpu->arch.pc, func); + break; + }; + + vcpu->arch.gprs[REG_A0] = hyp_ret; + + return RESUME_GUEST; +} diff --git a/arch/loongarch/kvm/intc/irqchip-debug.c b/arch/loongarch/kvm/intc/irqchip-debug.c new file mode 100644 index 000000000000..488b00366b47 --- /dev/null +++ b/arch/loongarch/kvm/intc/irqchip-debug.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include "kvmcpu.h" +#include "ls3a_ext_irq.h" +#include "ls7a_irq.h" + +#ifdef CONFIG_DEBUG_FS +static int irqchip_state_show(struct seq_file *m, void *v) +{ + struct kvm *kvm = m->private; + + kvm_get_kvm(kvm); + kvm_dump_ls3a_extirq_state(m, kvm->arch.v_extirq); + kvm_dump_ls7a_ioapic_state(m, kvm->arch.v_ioapic); + kvm_put_kvm(kvm); + + return 0; +} + +static int irqchip_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, irqchip_state_show, inode->i_private); +} + +static const struct file_operations irqchip_debug_fops = { + .open = irqchip_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void irqchip_debug_init(struct kvm *kvm) +{ + debugfs_create_file("irqchip-state", 0444, kvm->debugfs_dentry, kvm, + &irqchip_debug_fops); +} +#else + +void irqchip_debug_init(struct kvm *kvm) {} +#endif +void irqchip_debug_destroy(struct kvm *kvm) +{ +} diff --git a/arch/loongarch/kvm/intc/ls3a_ext_irq.c b/arch/loongarch/kvm/intc/ls3a_ext_irq.c new file mode 100644 index 000000000000..a110a0e7e1ce --- /dev/null +++ b/arch/loongarch/kvm/intc/ls3a_ext_irq.c @@ -0,0 +1,922 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include "kvmcpu.h" +#include "kvm_compat.h" +#include "ls3a_ipi.h" +#include "ls7a_irq.h" +#include "ls3a_ext_irq.h" + +#define ls3a_ext_irq_lock(s, flags) spin_lock_irqsave(&s->lock, flags) +#define ls3a_ext_irq_unlock(s, flags) spin_unlock_irqrestore(&s->lock, flags) + +extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_loongarch_interrupt *irq); +void ext_deactive_core_isr(struct kvm *kvm, int irq_num, int vcpu_id) +{ + int ipnum; + unsigned long found1; + struct kvm_loongarch_interrupt irq; + struct ls3a_kvm_extirq *s = ls3a_ext_irqchip(kvm); + struct kvm_ls3a_extirq_state *state = &(s->ls3a_ext_irq); + + ipnum = state->ext_sw_ipmap[irq_num]; + + bitmap_clear((void *)state->ext_isr.reg_u8, irq_num, 1); + bitmap_clear((void *)state->ext_core_isr.reg_u8[vcpu_id], irq_num, 1); + + bitmap_clear((void *)state->ext_sw_ipisr[vcpu_id][ipnum + 2], irq_num, 1); + found1 = find_next_bit((void *)state->ext_sw_ipisr[vcpu_id][ipnum + 2], EXTIOI_IRQS, 0); + kvm_debug("vcpu_id %d irqnum %d found:0x%lx ipnum %d down\n", vcpu_id, irq_num, found1, ipnum); + if (found1 == EXTIOI_IRQS) { + irq.cpu = vcpu_id; + irq.irq = -(ipnum + 2); /* IP2~IP5 */ + if (likely(kvm->vcpus[vcpu_id])) + kvm_vcpu_ioctl_interrupt(kvm->vcpus[vcpu_id], &irq); + kvm->stat.trigger_ls3a_ext_irq++; + } +} + +/** + * ext_irq_update_core() + * @kvm: KVM structure pointer + * @irq_num: 0~256 ext irq num + * @level: 0~1 High and low level + * + * Route the status of the extended interrupt to the host CPU core. + * + */ +void ext_irq_update_core(struct kvm *kvm, int irq_num, int level) +{ + int nrcpus, ipnum, vcpu_id; + unsigned long found1; + struct kvm_loongarch_interrupt irq; + struct ls3a_kvm_extirq *s = ls3a_ext_irqchip(kvm); + struct kvm_ls3a_extirq_state *state = &(s->ls3a_ext_irq); + + nrcpus = atomic_read(&kvm->online_vcpus); + vcpu_id = state->ext_sw_coremap[irq_num]; + ipnum = state->ext_sw_ipmap[irq_num]; + + if (vcpu_id > (nrcpus - 1)) { + vcpu_id = 0; + } + + if (level == 1) { + if (test_bit(irq_num, (void *)state->ext_en.reg_u8) == false) { + return; + } + if (test_bit(irq_num, (void *)state->ext_isr.reg_u8) == false) { + return; + } + bitmap_set((void *)state->ext_core_isr.reg_u8[vcpu_id], irq_num, 1); + + found1 = find_next_bit((void *)state->ext_sw_ipisr[vcpu_id][ipnum + 2], EXTIOI_IRQS, 0); + bitmap_set((void *)state->ext_sw_ipisr[vcpu_id][ipnum + 2], irq_num, 1); + kvm_debug("%s:%d --- vcpu_id %d irqnum %d found1 0x%lx ipnum %d\n", + __FUNCTION__, __LINE__, vcpu_id, irq_num, found1, ipnum); + if (found1 == EXTIOI_IRQS) { + irq.cpu = vcpu_id; + irq.irq = ipnum + 2; /* IP2~IP5 */ + kvm_debug("%s:%d --- vcpu_id %d ipnum %d raise\n", + __FUNCTION__, __LINE__, vcpu_id, ipnum); + if (likely(kvm->vcpus[vcpu_id])) + kvm_vcpu_ioctl_interrupt(kvm->vcpus[vcpu_id], &irq); + kvm->stat.trigger_ls3a_ext_irq++; + } + } else { + bitmap_clear((void *)state->ext_isr.reg_u8, irq_num, 1); + bitmap_clear((void *)state->ext_core_isr.reg_u8[vcpu_id], irq_num, 1); + + bitmap_clear((void *)state->ext_sw_ipisr[vcpu_id][ipnum + 2], irq_num, 1); + found1 = find_next_bit((void *)state->ext_sw_ipisr[vcpu_id][ipnum + 2], EXTIOI_IRQS, 0); + if (found1 == EXTIOI_IRQS) { + irq.cpu = vcpu_id; + irq.irq = -(ipnum + 2); /* IP2~IP5 */ + if (likely(kvm->vcpus[vcpu_id])) + kvm_vcpu_ioctl_interrupt(kvm->vcpus[vcpu_id], &irq); + kvm->stat.trigger_ls3a_ext_irq++; + } + + } +} + +void msi_irq_handler(struct kvm *kvm, int irq, int level) +{ + unsigned long flags; + struct ls3a_kvm_extirq *s = ls3a_ext_irqchip(kvm); + struct kvm_ls3a_extirq_state *state = &(s->ls3a_ext_irq); + + kvm_debug("ext_irq_handler:irq = %d,level = %d\n", irq, level); + + ls3a_ext_irq_lock(s, flags); + if (level == 1) { + if (test_bit(irq, (void *)&state->ext_isr)) + goto out; + __set_bit(irq, (void *)&state->ext_isr); + } else { + if (!test_bit(irq, (void *)&state->ext_isr)) + goto out; + __clear_bit(irq, (void *)&state->ext_isr); + } + + ext_irq_update_core(kvm, irq, level); +out: + ls3a_ext_irq_unlock(s, flags); +} + +static int ls3a_ext_intctl_readb(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, void *val) +{ + uint64_t offset, reg_count; + struct ls3a_kvm_extirq *s = NULL; + struct kvm_ls3a_extirq_state *state = NULL; + int vcpu_id; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + state = &(s->ls3a_ext_irq); + + offset = addr & 0xfffff; + + if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) { + reg_count = (offset - EXTIOI_ENABLE_START); + *(uint8_t *)val = state->ext_en.reg_u8[reg_count]; + } else if ((offset >= EXTIOI_BOUNCE_START) && (offset < EXTIOI_BOUNCE_END)) { + reg_count = (offset - EXTIOI_BOUNCE_START); + *(uint8_t *)val = state->bounce.reg_u8[reg_count]; + } else if ((offset >= EXTIOI_ISR_START) && (offset < EXTIOI_ISR_END)) { + reg_count = (offset - EXTIOI_ISR_START); + *(uint8_t *)val = state->ext_isr.reg_u8[reg_count]; + } else if ((offset >= EXTIOI_COREISR_START) && (offset < EXTIOI_COREISR_END)) { + /* percpu(32 bytes) coreisr reg_count is 0~31 */ + vcpu_id = (offset >> 8) & 0xff; + reg_count = offset & 0xff; + *(uint8_t *)val = state->ext_core_isr.reg_u8[vcpu_id][reg_count]; + } else if ((offset >= EXTIOI_IPMAP_START) && (offset < EXTIOI_IPMAP_END)) { + reg_count = (offset - EXTIOI_IPMAP_START); + *(uint8_t *)val = state->ip_map.reg_u8[reg_count]; + } else if ((offset >= EXTIOI_COREMAP_START) && (offset < EXTIOI_COREMAP_END)) { + reg_count = (offset - EXTIOI_COREMAP_START); + *(uint8_t *)val = state->core_map.reg_u8[reg_count]; + } else if ((offset >= EXTIOI_NODETYPE_START) && (offset < EXTIOI_NODETYPE_END)) { + reg_count = (offset - EXTIOI_NODETYPE_START); + *(uint8_t *)val = state->node_type.reg_u8[reg_count]; + } + kvm_debug("%s: addr=0x%llx,val=0x%x\n", + __FUNCTION__, addr, *(uint8_t *)val); + return 0; +} + +static int ls3a_ext_intctl_readw(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, void *val) +{ + uint64_t offset, reg_count; + struct ls3a_kvm_extirq *s = NULL; + struct kvm_ls3a_extirq_state *state = NULL; + int vcpu_id; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + state = &(s->ls3a_ext_irq); + + offset = addr & 0xfffff; + + if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) { + reg_count = (offset - EXTIOI_ENABLE_START) / 4; + *(uint32_t *)val = state->ext_en.reg_u32[reg_count]; + } else if ((offset >= EXTIOI_BOUNCE_START) && (offset < EXTIOI_BOUNCE_END)) { + reg_count = (offset - EXTIOI_BOUNCE_START) / 4; + *(uint32_t *)val = state->bounce.reg_u32[reg_count]; + } else if ((offset >= EXTIOI_ISR_START) && (offset < EXTIOI_ISR_END)) { + reg_count = (offset - EXTIOI_ISR_START) / 4; + *(uint32_t *)val = state->ext_isr.reg_u32[reg_count]; + } else if ((offset >= EXTIOI_COREISR_START) && (offset < EXTIOI_COREISR_END)) { + /* percpu(32 bytes) coreisr reg_count is 0~7*/ + vcpu_id = (offset >> 8) & 0xff; + reg_count = (offset & 0xff) / 4; + *(uint32_t *)val = state->ext_core_isr.reg_u32[vcpu_id][reg_count]; + } else if ((offset >= EXTIOI_IPMAP_START) && (offset < EXTIOI_IPMAP_END)) { + reg_count = (offset - EXTIOI_IPMAP_START) / 4; + *(uint32_t *)val = state->ip_map.reg_u32[reg_count]; + } else if ((offset >= EXTIOI_COREMAP_START) && (offset < EXTIOI_COREMAP_END)) { + reg_count = (offset - EXTIOI_COREMAP_START) / 4; + *(uint32_t *)val = state->core_map.reg_u32[reg_count]; + } else if ((offset >= EXTIOI_NODETYPE_START) && (offset < EXTIOI_NODETYPE_END)) { + reg_count = (offset - EXTIOI_NODETYPE_START) / 4; + *(uint32_t *)val = state->node_type.reg_u32[reg_count]; + } + kvm_debug("%s: addr=0x%llx,val=0x%x\n", + __FUNCTION__, addr, *(uint32_t *)val); + + return 0; +} + +static int ls3a_ext_intctl_readl(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, void *val) +{ + uint64_t offset, reg_count; + struct ls3a_kvm_extirq *s = NULL; + struct kvm_ls3a_extirq_state *state = NULL; + int vcpu_id; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + state = &(s->ls3a_ext_irq); + + offset = addr & 0xfffff; + + if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) { + reg_count = (offset - EXTIOI_ENABLE_START) / 8; + *(uint64_t *)val = state->ext_en.reg_u64[reg_count]; + } else if ((offset >= EXTIOI_BOUNCE_START) && (offset < EXTIOI_BOUNCE_END)) { + reg_count = (offset - EXTIOI_BOUNCE_START) / 8; + *(uint64_t *)val = state->bounce.reg_u64[reg_count]; + } else if ((offset >= EXTIOI_ISR_START) && (offset < EXTIOI_ISR_END)) { + reg_count = (offset - EXTIOI_ISR_START) / 8; + *(uint64_t *)val = state->ext_isr.reg_u64[reg_count]; + } else if ((offset >= EXTIOI_COREISR_START) && (offset < EXTIOI_COREISR_END)) { + /* percpu(32 bytes) coreisr reg_count is 0~3*/ + vcpu_id = (offset >> 8) & 0xff; + reg_count = (offset & 0xff) / 8; + + *(uint64_t *)val = state->ext_core_isr.reg_u64[vcpu_id][reg_count]; + } else if ((offset >= EXTIOI_IPMAP_START) && (offset < EXTIOI_IPMAP_END)) { + *(uint64_t *)val = state->ip_map.reg_u64; + } else if ((offset >= EXTIOI_COREMAP_START) && (offset < EXTIOI_COREMAP_END)) { + reg_count = (offset - EXTIOI_COREMAP_START) / 8; + *(uint64_t *)val = state->core_map.reg_u64[reg_count]; + } else if ((offset >= EXTIOI_NODETYPE_START) && (offset < EXTIOI_NODETYPE_END)) { + reg_count = (offset - EXTIOI_NODETYPE_START) / 8; + *(uint64_t *)val = state->node_type.reg_u64[reg_count]; + } + kvm_debug("%s: addr=0x%llx,val=0x%llx\n", + __FUNCTION__, addr, *(uint64_t *)val); + return 0; +} +/** + * ls3a_ext_intctl_read() + * @kvm: KVM structure pointer + * @addr: Register address + * @size: The width of the register to be read. + * @val: The pointer to the read result. + * + * Analog extended interrupt related register read. + * + */ +static int ls3a_ext_intctl_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int size, void *val) +{ + struct ls3a_kvm_extirq *s = NULL; + unsigned long flags; + uint64_t offset; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + offset = addr & 0xfffff; + if (offset & (size - 1)) { + printk("%s:unaligned address access %llx size %d\n", + __FUNCTION__, addr, size); + return 0; + } + addr = (addr & 0xfffff) - EXTIOI_ADDR_OFF; + ls3a_ext_irq_lock(s, flags); + + switch (size) { + case 1: + ls3a_ext_intctl_readb(vcpu, dev, addr, val); + break; + case 4: + ls3a_ext_intctl_readw(vcpu, dev, addr, val); + break; + case 8: + ls3a_ext_intctl_readl(vcpu, dev, addr, val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx, size %d\n", + __FUNCTION__, addr, size); + } + ls3a_ext_irq_unlock(s, flags); + kvm_debug("%s(%d):address access %llx size %d\n", + __FUNCTION__, __LINE__, offset, size); + + return 0; +} + +static int ls3a_ext_intctl_writeb(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, const void *__val) +{ + uint64_t offset, reg_count; + uint8_t val_data_u8, old_data_u8; + struct ls3a_kvm_extirq *s = NULL; + struct kvm_ls3a_extirq_state *state = NULL; + struct kvm *kvm = NULL; + int mask, level, i, irqnum, ipnum; + int vcpu_id; + + unsigned long val = *(unsigned long *)__val; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + state = &(s->ls3a_ext_irq); + kvm = s->kvm; + + offset = addr & 0xfffff; + val_data_u8 = val & 0xffUL; + + kvm_debug("%s: addr=0x%llx,val=0x%lx\n", __FUNCTION__, addr, val); + + if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) { + reg_count = (offset - EXTIOI_ENABLE_START); + old_data_u8 = state->ext_en.reg_u8[reg_count]; + if (old_data_u8 != val_data_u8) { + state->ext_en.reg_u8[reg_count] = val_data_u8; + old_data_u8 = old_data_u8 ^ val_data_u8; + mask = 0x1; + for (i = 0; i < 8; i++) { + if (old_data_u8 & mask) { + level = !!(val_data_u8 & (0x1 << i)); + if (level) + ext_irq_update_core(kvm, i + reg_count * 8, level); + } + mask = mask << 1; + } + } + } else if ((offset >= EXTIOI_BOUNCE_START) && (offset < EXTIOI_BOUNCE_END)) { + reg_count = (offset - EXTIOI_BOUNCE_START); + state->bounce.reg_u8[reg_count] = val_data_u8; + } else if ((offset >= EXTIOI_ISR_START) && (offset < EXTIOI_ISR_END)) { + /*can not be writen*/ + reg_count = (offset - EXTIOI_ISR_START) & 0x1f; + old_data_u8 = state->ext_isr.reg_u8[reg_count]; + state->ext_isr.reg_u8[reg_count] = old_data_u8 & (~val_data_u8); + + mask = 0x1; + for (i = 0; i < 8; i++) { + if ((old_data_u8 & mask) && (val_data_u8 & mask)) { + ext_irq_update_core(kvm, i + reg_count * 8, 0); + } + mask = mask << 1; + } + + } else if ((offset >= EXTIOI_COREISR_START) && (offset < EXTIOI_COREISR_END)) { + int bits; + /* percpu(32 bytes) coreisr reg_count is 0~31 */ + vcpu_id = (offset >> 8) & 0xff; + reg_count = offset & 0xff; + + state->ext_core_isr.reg_u8[vcpu_id][reg_count] &= ~val_data_u8; + + bits = sizeof(val_data_u8) * 8; + i = find_first_bit((void *)&val_data_u8, bits); + while (i < bits) { + ext_deactive_core_isr(kvm, i + reg_count * bits, vcpu_id); + bitmap_clear((void *)&val_data_u8, i, 1); + i = find_first_bit((void *)&val_data_u8, bits); + } + } else if ((offset >= EXTIOI_IPMAP_START) && (offset < EXTIOI_IPMAP_END)) { + /*drop arch.core_ip_mask use state->ip_map*/ + reg_count = (offset - EXTIOI_IPMAP_START); + state->ip_map.reg_u8[reg_count] = val_data_u8; + + ipnum = 0; + + for (i = 0; i < 4; i++) { + if (val_data_u8 & (0x1 << i)) { + ipnum = i; + break; + } + } + + if (val_data_u8) { + for (i = 0; i < 32; i++) { + irqnum = reg_count * 32 + i; + state->ext_sw_ipmap[irqnum] = ipnum; + } + } else { + for (i = 0; i < 32; i++) { + irqnum = reg_count * 32 + i; + state->ext_sw_ipmap[irqnum] = 0; + } + } + } else if ((offset >= EXTIOI_COREMAP_START) && (offset < EXTIOI_COREMAP_END)) { + reg_count = (offset - EXTIOI_COREMAP_START); + state->core_map.reg_u8[reg_count] = val_data_u8; + state->ext_sw_coremap[reg_count] = val_data_u8; + } else if ((offset >= EXTIOI_NODETYPE_START) && (offset < EXTIOI_NODETYPE_END)) { + reg_count = (offset - EXTIOI_NODETYPE_START); + state->node_type.reg_u8[reg_count] = val_data_u8; + } else { + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx\n", + __FUNCTION__, addr); + } + + return 0; +} + +static int ls3a_ext_intctl_writew(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, const void *__val) +{ + uint64_t offset, reg_count; + uint32_t val_data_u32, old_data_u32, mask; + struct ls3a_kvm_extirq *s = NULL; + struct kvm_ls3a_extirq_state *state = NULL; + struct kvm *kvm = NULL; + uint8_t tmp_data_u8; + int i, level, vcpu_id; + unsigned long val; + + val = *(unsigned long *)__val; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + state = &(s->ls3a_ext_irq); + kvm = s->kvm; + + offset = addr & 0xfffff; + val_data_u32 = val & 0xffffffffUL; + + kvm_debug("%s: addr=0x%llx,val=0x%lx\n", __FUNCTION__, addr, val); + + if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) { + reg_count = (offset - EXTIOI_ENABLE_START) / 4; + old_data_u32 = state->ext_en.reg_u32[reg_count]; + if (old_data_u32 != val_data_u32) { + state->ext_en.reg_u32[reg_count] = val_data_u32; + old_data_u32 = old_data_u32 ^ val_data_u32; + + mask = 0x1; + for (i = 0; i < 8 * sizeof(old_data_u32); i++) { + if (old_data_u32 & mask) { + level = !!(val_data_u32 & (0x1 << i)); + if (level) + ext_irq_update_core(kvm, i + reg_count * 32, level); + } + mask = mask << 1; + } + } + } else if ((offset >= EXTIOI_BOUNCE_START) && (offset < EXTIOI_BOUNCE_END)) { + reg_count = (offset - EXTIOI_BOUNCE_START) / 4; + state->bounce.reg_u32[reg_count] = val_data_u32; + } else if ((offset >= EXTIOI_ISR_START) && (offset < EXTIOI_ISR_END)) { + /*can not be writen*/ + reg_count = (offset - EXTIOI_ISR_START) / 4; + old_data_u32 = state->ext_isr.reg_u32[reg_count]; + state->ext_isr.reg_u32[reg_count] = old_data_u32 & (~val_data_u32); + + mask = 0x1; + for (i = 0; i < 8 * sizeof(old_data_u32); i++) { + if ((old_data_u32 & mask) && (val_data_u32 & mask)) { + ext_irq_update_core(kvm, i + reg_count * 32, 0); + } + mask = mask << 1; + } + } else if ((offset >= EXTIOI_COREISR_START) && (offset < EXTIOI_COREISR_END)) { + int bits; + /* percpu(32 bytes) coreisr reg_count is 0~7*/ + vcpu_id = (offset >> 8) & 0xff; + reg_count = (offset & 0xff) / 4; + + /*ext_core_ioisr*/ + state->ext_core_isr.reg_u32[vcpu_id][reg_count] &= ~val_data_u32; + + bits = sizeof(val_data_u32) * 8; + i = find_first_bit((void *)&val_data_u32, bits); + while (i < bits) { + ext_deactive_core_isr(kvm, i + reg_count * bits, vcpu_id); + bitmap_clear((void *)&val_data_u32, i, 1); + i = find_first_bit((void *)&val_data_u32, bits); + } + } else if ((offset >= EXTIOI_IPMAP_START) && (offset < EXTIOI_IPMAP_END)) { + tmp_data_u8 = val_data_u32 & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr, &tmp_data_u8); + tmp_data_u8 = (val_data_u32 >> 8) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 1, &tmp_data_u8); + tmp_data_u8 = (val_data_u32 >> 16) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 2, &tmp_data_u8); + tmp_data_u8 = (val_data_u32 >> 24) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 3, &tmp_data_u8); + } else if ((offset >= EXTIOI_COREMAP_START) && (offset < EXTIOI_COREMAP_END)) { + tmp_data_u8 = val_data_u32 & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr, &tmp_data_u8); + tmp_data_u8 = (val_data_u32 >> 8) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 1, &tmp_data_u8); + tmp_data_u8 = (val_data_u32 >> 16) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 2, &tmp_data_u8); + tmp_data_u8 = (val_data_u32 >> 24) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 3, &tmp_data_u8); + kvm_debug("%s:id:%d addr=0x%llx, offset 0x%llx val 0x%x\n", + __FUNCTION__, vcpu->vcpu_id, addr, offset, val_data_u32); + + } else if ((offset >= EXTIOI_NODETYPE_START) && (offset < EXTIOI_NODETYPE_END)) { + reg_count = (offset - EXTIOI_NODETYPE_START) / 4; + state->node_type.reg_u32[reg_count] = val_data_u32; + } else { + WARN_ONCE(1, "%s:%d Abnormal address access:addr 0x%llx\n", + __FUNCTION__, __LINE__, addr); + } + + return 0; +} + +static int ls3a_ext_intctl_writel(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, const void *__val) +{ + uint64_t offset, val_data_u64, old_data_u64, reg_count, mask, i; + struct ls3a_kvm_extirq *s = NULL; + struct kvm_ls3a_extirq_state *state = NULL; + struct kvm *kvm = NULL; + uint8_t tmp_data_u8; + int level, vcpu_id; + + unsigned long val = *(unsigned long *)__val; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + state = &(s->ls3a_ext_irq); + kvm = s->kvm; + + offset = addr & 0xfffff; + val_data_u64 = val; + + kvm_debug("%s: addr=0x%llx,val=0x%lx\n", __FUNCTION__, addr, val); + + if ((offset >= EXTIOI_ENABLE_START) && (offset < EXTIOI_ENABLE_END)) { + reg_count = (offset - EXTIOI_ENABLE_START) / 8; + old_data_u64 = state->ext_en.reg_u64[reg_count]; + if (old_data_u64 != val_data_u64) { + state->ext_en.reg_u64[reg_count] = val_data_u64; + old_data_u64 = old_data_u64 ^ val_data_u64; + + mask = 0x1; + for (i = 0; i < 8 * sizeof(old_data_u64); i++) { + if (old_data_u64 & mask) { + level = !!(val_data_u64 & (0x1 << i)); + if (level) + ext_irq_update_core(kvm, i + reg_count * 64, level); + } + mask = mask << 1; + } + } + } else if ((offset >= EXTIOI_BOUNCE_START) && (offset < EXTIOI_BOUNCE_END)) { + reg_count = (offset - EXTIOI_BOUNCE_START) / 8; + state->bounce.reg_u64[reg_count] = val_data_u64; + } else if ((offset >= EXTIOI_ISR_START) && (offset < EXTIOI_ISR_END)) { + /*can not be writen*/ + reg_count = (offset - EXTIOI_ISR_START) / 8; + old_data_u64 = state->ext_isr.reg_u64[reg_count]; + state->ext_isr.reg_u64[reg_count] = old_data_u64 & (~val_data_u64); + + mask = 0x1; + for (i = 0; i < 8 * sizeof(old_data_u64); i++) { + if ((old_data_u64 & mask) && (val_data_u64 & mask)) { + ext_irq_update_core(kvm, i + reg_count * 64, 0); + } + mask = mask << 1; + } + } else if ((offset >= EXTIOI_COREISR_START) && (offset < EXTIOI_COREISR_END)) { + int bits; + vcpu_id = (offset >> 8) & 0xff; + reg_count = (offset & 0x1f) / 8; + + /*core_ext_ioisr*/ + state->ext_core_isr.reg_u64[vcpu_id][reg_count] &= ~val_data_u64; + + bits = sizeof(val_data_u64) * 8; + i = find_first_bit((void *)&val_data_u64, bits); + while (i < bits) { + ext_deactive_core_isr(kvm, i + reg_count * bits, vcpu_id); + bitmap_clear((void *)&val_data_u64, i, 1); + i = find_first_bit((void *)&val_data_u64, bits); + } + } else if ((offset >= EXTIOI_IPMAP_START) && (offset < EXTIOI_IPMAP_END)) { + tmp_data_u8 = val_data_u64 & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 8) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 1, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 16) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 2, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 24) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 3, &tmp_data_u8); + + tmp_data_u8 = (val_data_u64 >> 32) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 4, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 40) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 5, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 48) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 6, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 56) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 7, &tmp_data_u8); + } else if ((offset >= EXTIOI_COREMAP_START) && (offset < EXTIOI_COREMAP_END)) { + tmp_data_u8 = val_data_u64 & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 8) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 1, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 16) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 2, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 24) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 3, &tmp_data_u8); + + tmp_data_u8 = (val_data_u64 >> 32) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 4, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 40) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 5, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 48) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 6, &tmp_data_u8); + tmp_data_u8 = (val_data_u64 >> 56) & 0xff; + ls3a_ext_intctl_writeb(vcpu, dev, addr + 7, &tmp_data_u8); + } else if ((offset >= EXTIOI_NODETYPE_START) && (offset < EXTIOI_NODETYPE_END)) { + reg_count = (offset - EXTIOI_NODETYPE_START) / 8; + state->node_type.reg_u64[reg_count] = val_data_u64; + } else { + WARN_ONCE(1, "%s:%d Abnormal address access:addr 0x%llx\n", + __FUNCTION__, __LINE__, addr); + } + return 0; +} +/** + * ls3a_ext_intctl_write() + * @kvm: KVM structure pointer + * @addr: Register address + * @size: The width of the register to be writen. + * @val: Value to be written. + * + * Analog extended interrupt related register write. + * + */ +static int ls3a_ext_intctl_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int size, const void *__val) +{ + struct ls3a_kvm_extirq *s = NULL; + unsigned long flags; + uint64_t offset; + + s = container_of(dev, struct ls3a_kvm_extirq, device); + + offset = addr & 0xfffff; + if (offset & (size - 1)) { + printk("%s(%d):unaligned address access %llx size %d\n", + __FUNCTION__, __LINE__, addr, size); + return 0; + } + + addr = (addr & 0xfffff) - EXTIOI_ADDR_OFF; + ls3a_ext_irq_lock(s, flags); + + switch (size) { + case 1: + ls3a_ext_intctl_writeb(vcpu, dev, addr, __val); + break; + case 4: + ls3a_ext_intctl_writew(vcpu, dev, addr, __val); + break; + case 8: + ls3a_ext_intctl_writel(vcpu, dev, addr, __val); + break; + default: + WARN_ONCE(1, "%s: Abnormal address access:addr 0x%llx,size %d\n", + __FUNCTION__, addr, size); + } + + ls3a_ext_irq_unlock(s, flags); + + kvm_debug("%s(%d):address access %llx size %d\n", + __FUNCTION__, __LINE__, offset, size); + return 0; +} + +static const struct kvm_io_device_ops kvm_ls3a_ext_irq_ops = { + .read = ls3a_ext_intctl_read, + .write = ls3a_ext_intctl_write, +}; + +void kvm_destroy_ls3a_ext_irq(struct kvm *kvm) +{ + struct ls3a_kvm_extirq *s = kvm->arch.v_extirq; + + if (!s) + return; + mutex_lock(&kvm->slots_lock); + kvm_io_bus_unregister_dev(s->kvm, KVM_MMIO_BUS, &s->device); + mutex_unlock(&kvm->slots_lock); + kfree(s); +} +/* + * kvm_create_ls3a_ext_irq() + * @kvm KVM structure pointer + * Create an extended interrupt resource instance for a virtual machine + * Returns: Extended interrupt structure pointer + */ +int kvm_create_ls3a_ext_irq(struct kvm *kvm) +{ + struct ls3a_kvm_extirq *s; + int ret; + + s = kzalloc(sizeof(struct ls3a_kvm_extirq), GFP_KERNEL); + if (!s) + return -ENOMEM; + + memset((void *)&s->ls3a_ext_irq, 0x0, sizeof(struct kvm_ls3a_extirq_state)); + + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize MMIO device + */ + kvm_iodevice_init(&s->device, &kvm_ls3a_ext_irq_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, + EXTIOI_REG_BASE, EXTIOI_ADDR_SIZE, &s->device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + printk("%s dev_ls3a_ext_irq register error ret %d\n", __FUNCTION__, ret); + goto err_register; + } + + kvm->arch.v_extirq = s; + + return 0; + +err_register: + kfree(s); + return -EFAULT; +} + +static int kvm_set_ext_sw_ipmap(struct kvm_ls3a_extirq_state *state) +{ + uint8_t val_data_u8; + int i, j, base_irq, irqnum, ipnum; + + ipnum = 0; + for (i = 0; i < EXTIOI_IRQS_IPMAP_SIZE; i++) { + val_data_u8 = state->ip_map.reg_u8[i]; + for (j = 0; j < 4; j++) { + if (val_data_u8 & (0x1 << j)) { + ipnum = j; + break; + } + } + kvm_debug("%s:%d ipnum:%d i:%d val_data_u8:0x%x\n", __FUNCTION__, __LINE__, + ipnum, i, val_data_u8); + + if (val_data_u8) { + for (base_irq = 0; base_irq < EXTIOI_IRQS_PER_GROUP; base_irq++) { + irqnum = i * EXTIOI_IRQS_PER_GROUP + base_irq; + state->ext_sw_ipmap[irqnum] = ipnum; + } + } else { + for (base_irq = 0; base_irq < EXTIOI_IRQS_PER_GROUP; base_irq++) { + irqnum = i * EXTIOI_IRQS_PER_GROUP + base_irq; + state->ext_sw_ipmap[irqnum] = 0; + } + } + } + + return 0; +} + +static int kvm_set_ext_sw_coremap(struct kvm *kvm, struct kvm_ls3a_extirq_state *state) +{ + int reg_count; + + for (reg_count = 0; reg_count < EXTIOI_IRQS; reg_count++) { + state->ext_sw_coremap[reg_count] = state->core_map.reg_u8[reg_count]; + + kvm_debug("%s:%d -- reg_count:%d vcpu %d\n", + __FUNCTION__, __LINE__, reg_count, state->core_map.reg_u8[reg_count]); + } + + return 0; +} + +static int kvm_set_ext_sw_ipisr(struct kvm *kvm, struct kvm_ls3a_extirq_state *state) +{ + int ipnum, core, irq_num; + + for (irq_num = 0; irq_num < EXTIOI_IRQS; irq_num++) { + core = state->ext_sw_coremap[irq_num]; + ipnum = state->ext_sw_ipmap[irq_num]; + + if (test_bit(irq_num, (void *)state->ext_core_isr.reg_u8[core]) == false) { + bitmap_clear((void *)state->ext_sw_ipisr[core][ipnum + 2], irq_num, 1); + } else { + bitmap_set((void *)state->ext_sw_ipisr[core][ipnum + 2], irq_num, 1); + } + + } + return 0; +} + +int kvm_get_ls3a_extirq(struct kvm *kvm, struct kvm_loongarch_ls3a_extirq_state *state) +{ + struct ls3a_kvm_extirq *v_extirq = ls3a_ext_irqchip(kvm); + struct kvm_ls3a_extirq_state *extirq_state = &(v_extirq->ls3a_ext_irq); + unsigned long flags; + if (!v_extirq) + return -EINVAL; + + ls3a_ext_irq_lock(v_extirq, flags); + memcpy(state, extirq_state, + sizeof(struct kvm_loongarch_ls3a_extirq_state)); + ls3a_ext_irq_unlock(v_extirq, flags); + kvm->stat.get_ls3a_ext_irq++; + + return 0; +} + +int kvm_set_ls3a_extirq(struct kvm *kvm, struct kvm_loongarch_ls3a_extirq_state *state) +{ + struct ls3a_kvm_extirq *v_extirq = ls3a_ext_irqchip(kvm); + struct kvm_ls3a_extirq_state *extirq_state = &(v_extirq->ls3a_ext_irq); + unsigned long flags; + if (!v_extirq) + return -EINVAL; + + ls3a_ext_irq_lock(v_extirq, flags); + memcpy(extirq_state, state, + sizeof(struct kvm_loongarch_ls3a_extirq_state)); + kvm_set_ext_sw_ipmap(extirq_state); + kvm_set_ext_sw_coremap(kvm, extirq_state); + kvm_set_ext_sw_ipisr(kvm, extirq_state); + + ls3a_ext_irq_unlock(v_extirq, flags); + kvm->stat.set_ls3a_ext_irq++; + + return 0; +} + +int kvm_setup_ls3a_extirq(struct kvm *kvm) +{ + struct ls3a_kvm_extirq *v_extirq = ls3a_ext_irqchip(kvm); + struct kvm_ls3a_extirq_state *extirq_state = &(v_extirq->ls3a_ext_irq); + unsigned long flags; + + if (!v_extirq) + return -EINVAL; + + ls3a_ext_irq_lock(v_extirq, flags); + memset(extirq_state, 0, sizeof(struct kvm_ls3a_extirq_state)); + ls3a_ext_irq_unlock(v_extirq, flags); + + return 0; +} + +void kvm_dump_ls3a_extirq_state(struct seq_file *s, + struct ls3a_kvm_extirq *irqchip) +{ + struct kvm_ls3a_extirq_state *extirq; + int i, j = 0; + unsigned long flags; + + seq_puts(s, "LS3A ext irqchip state:\n"); + + if (!irqchip) + return; + + extirq = &(irqchip->ls3a_ext_irq); + ls3a_ext_irq_lock(irqchip, flags); + seq_puts(s, "ext irq enabled"); + seq_puts(s, "\nenabled:(Not Enabled)"); + for (i = 0; i < EXTIOI_IRQS; i++) { + if (!test_bit(i, (void *)&extirq->ext_en)) + seq_printf(s, "%d ", i); + } + seq_puts(s, "\nbounce:(Not bounce)"); + for (i = 0; i < EXTIOI_IRQS; i++) { + if (!test_bit(i, (void *)&extirq->bounce)) + seq_printf(s, "%d ", i); + } + seq_puts(s, "\next_isr:"); + for (i = 0; i < EXTIOI_IRQS; i++) { + if (test_bit(i, (void *)&extirq->ext_isr)) + seq_printf(s, "%d ", i); + } + + seq_puts(s, "\ncore_isr:"); + for (i = 0; i < KVM_MAX_VCPUS && kvm_get_vcpu_by_id(irqchip->kvm, i); i++) { + seq_printf(s, "\n\t CPU%d:", i); + for (j = 0; j < EXTIOI_IRQS; j++) { + if (test_bit(j, (void *)&extirq->ext_core_isr.reg_u8[i])) + seq_printf(s, "%d ", j); + } + } + seq_printf(s, "\nip_map:%llx", extirq->ip_map.reg_u64); + seq_puts(s, "\ncore_map: (only display router to slave cpu)\n"); + for (i = 0; i < EXTIOI_IRQS_COREMAP_SIZE; i++) + if (extirq->core_map.reg_u8[i]) + seq_printf(s, "\tirq:%d -> cpu:%d\n", i, + extirq->core_map.reg_u8[i]); + ls3a_ext_irq_unlock(irqchip, flags); +} diff --git a/arch/loongarch/kvm/intc/ls3a_ext_irq.h b/arch/loongarch/kvm/intc/ls3a_ext_irq.h new file mode 100644 index 000000000000..075d78120b32 --- /dev/null +++ b/arch/loongarch/kvm/intc/ls3a_ext_irq.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __LS3A_KVM_EXT_IRQ_H +#define __LS3A_KVM_EXT_IRQ_H + +#include +#include +#include +#include +#include + +#include + +#define IOCSR_EXTIOI_ADDR KVM_IOCSR_EXTIOI_NODEMAP_BASE + +#define EXTIOI_ADDR_OFF 0x10000 +#define EXTIOI_REG_BASE (LOONGSON_VIRT_REG_BASE + EXTIOI_ADDR_OFF) +#define EXTIOI_REG_END (EXTIOI_REG_BASE + 0x20000) +#define EXTIOI_ADDR_SIZE (EXTIOI_REG_END - EXTIOI_REG_BASE) +#define EXTIOI_PERCORE_REG_OFF 0x10000 +#define EXTIOI_PERCORE_REG_END (EXTIOI_PERCORE_REG_OFF + 0x10000) + +#define EXTIOI_ADDR(off) (EXTIOI_REG_BASE + (off) - IOCSR_EXTIOI_ADDR) +#define EXTIOI_PERCORE_ADDR(id, off) \ + (EXTIOI_REG_BASE + EXTIOI_PERCORE_REG_OFF + ((id) << 8) + (off)) + +#define EXTIOI_NODETYPE_START (KVM_IOCSR_EXTIOI_NODEMAP_BASE - IOCSR_EXTIOI_ADDR) +#define EXTIOI_NODETYPE_END (EXTIOI_NODETYPE_START + 0x20) +#define EXTIOI_IPMAP_START (KVM_IOCSR_EXTIOI_IPMAP_BASE - IOCSR_EXTIOI_ADDR) +#define EXTIOI_IPMAP_END (EXTIOI_IPMAP_START + 0x8) +#define EXTIOI_ENABLE_START (KVM_IOCSR_EXTIOI_EN_BASE - IOCSR_EXTIOI_ADDR) +#define EXTIOI_ENABLE_END (EXTIOI_ENABLE_START + 0x20) +#define EXTIOI_BOUNCE_START (KVM_IOCSR_EXTIOI_BOUNCE_BASE - IOCSR_EXTIOI_ADDR) +#define EXTIOI_BOUNCE_END (EXTIOI_BOUNCE_START + 0x20) +#define EXTIOI_ISR_START (0x1700 - IOCSR_EXTIOI_ADDR) +#define EXTIOI_ISR_END (EXTIOI_ISR_START + 0x20) +#define EXTIOI_COREMAP_START (KVM_IOCSR_EXTIOI_ROUTE_BASE - IOCSR_EXTIOI_ADDR) +#define EXTIOI_COREMAP_END (EXTIOI_COREMAP_START + 0x100) +#define EXTIOI_COREISR_START (EXTIOI_PERCORE_REG_OFF) +#define EXTIOI_COREISR_END (EXTIOI_PERCORE_REG_END) + +#define LS3A_INTC_IP 8 +#define EXTIOI_IRQS KVM_EXTIOI_IRQS +#define EXTIOI_IRQS_BITMAP_SIZE (EXTIOI_IRQS / 8) +/* map to ipnum per 32 irqs */ +#define EXTIOI_IRQS_IPMAP_SIZE (EXTIOI_IRQS / 32) +#define EXTIOI_IRQS_PER_GROUP KVM_EXTIOI_IRQS_PER_GROUP +#define EXTIOI_IRQS_COREMAP_SIZE (EXTIOI_IRQS) +#define EXTIOI_IRQS_NODETYPE_SIZE KVM_EXTIOI_IRQS_NODETYPE_SIZE + +typedef struct kvm_ls3a_extirq_state { + union ext_en { + uint64_t reg_u64[EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[EXTIOI_IRQS_BITMAP_SIZE]; + } ext_en; + union bounce { + uint64_t reg_u64[EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[EXTIOI_IRQS_BITMAP_SIZE]; + } bounce; + union ext_isr { + uint64_t reg_u64[EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[EXTIOI_IRQS_BITMAP_SIZE]; + } ext_isr; + union ext_core_isr { + uint64_t reg_u64[KVM_MAX_VCPUS][EXTIOI_IRQS_BITMAP_SIZE / 8]; + uint32_t reg_u32[KVM_MAX_VCPUS][EXTIOI_IRQS_BITMAP_SIZE / 4]; + uint8_t reg_u8[KVM_MAX_VCPUS][EXTIOI_IRQS_BITMAP_SIZE]; + } ext_core_isr; + union ip_map { + uint64_t reg_u64; + uint32_t reg_u32[EXTIOI_IRQS_IPMAP_SIZE / 4]; + uint8_t reg_u8[EXTIOI_IRQS_IPMAP_SIZE]; + } ip_map; + union core_map { + uint64_t reg_u64[EXTIOI_IRQS_COREMAP_SIZE / 8]; + uint32_t reg_u32[EXTIOI_IRQS_COREMAP_SIZE / 4]; + uint8_t reg_u8[EXTIOI_IRQS_COREMAP_SIZE]; + } core_map; + union node_type { + uint64_t reg_u64[EXTIOI_IRQS_NODETYPE_SIZE / 4]; + uint32_t reg_u32[EXTIOI_IRQS_NODETYPE_SIZE / 2]; + uint16_t reg_u16[EXTIOI_IRQS_NODETYPE_SIZE]; + uint8_t reg_u8[EXTIOI_IRQS_NODETYPE_SIZE * 2]; + } node_type; + + /*software state */ + uint8_t ext_sw_ipmap[EXTIOI_IRQS]; + uint8_t ext_sw_coremap[EXTIOI_IRQS]; + uint8_t ext_sw_ipisr[KVM_MAX_VCPUS][LS3A_INTC_IP][EXTIOI_IRQS_BITMAP_SIZE]; +} LS3AExtirqState; + +struct ls3a_kvm_extirq { + spinlock_t lock; + struct kvm *kvm; + struct kvm_io_device device; + struct kvm_ls3a_extirq_state ls3a_ext_irq; +}; + +static inline struct ls3a_kvm_extirq *ls3a_ext_irqchip(struct kvm *kvm) +{ + return kvm->arch.v_extirq; +} + +static inline int ls3a_extirq_in_kernel(struct kvm *kvm) +{ + int ret; + + ret = (ls3a_ext_irqchip(kvm) != NULL); + return ret; +} + + +void ext_irq_handler(struct kvm *kvm, int irq, int level); +int kvm_create_ls3a_ext_irq(struct kvm *kvm); +int kvm_get_ls3a_extirq(struct kvm *kvm, + struct kvm_loongarch_ls3a_extirq_state *state); +int kvm_set_ls3a_extirq(struct kvm *kvm, + struct kvm_loongarch_ls3a_extirq_state *state); +void kvm_destroy_ls3a_ext_irq(struct kvm *kvm); +void msi_irq_handler(struct kvm *kvm, int irq, int level); +int kvm_setup_ls3a_extirq(struct kvm *kvm); +void kvm_dump_ls3a_extirq_state(struct seq_file *m, struct ls3a_kvm_extirq *irqchip); +#endif diff --git a/arch/loongarch/kvm/intc/ls3a_ipi.c b/arch/loongarch/kvm/intc/ls3a_ipi.c new file mode 100644 index 000000000000..d5706b39172a --- /dev/null +++ b/arch/loongarch/kvm/intc/ls3a_ipi.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include "kvmcpu.h" +#include "ls3a_ipi.h" +#include "ls7a_irq.h" +#include "ls3a_ext_irq.h" + +#define ls3a_gipi_lock(s, flags) spin_lock_irqsave(&s->lock, flags) +#define ls3a_gipi_unlock(s, flags) spin_unlock_irqrestore(&s->lock, flags) + +extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_loongarch_interrupt *irq); +int kvm_helper_send_ipi(struct kvm_vcpu *vcpu, unsigned int cpu, unsigned int action) +{ + struct kvm *kvm = vcpu->kvm; + struct ls3a_kvm_ipi *ipi = ls3a_ipi_irqchip(kvm); + gipiState *s = &(ipi->ls3a_gipistate); + unsigned long flags; + struct kvm_loongarch_interrupt irq; + + kvm->stat.pip_write_exits++; + + ls3a_gipi_lock(ipi, flags); + if (s->core[cpu].status == 0) { + irq.cpu = cpu; + irq.irq = LARCH_INT_IPI; + kvm_vcpu_ioctl_interrupt(kvm->vcpus[cpu], &irq); + } + + s->core[cpu].status |= action; + ls3a_gipi_unlock(ipi, flags); + + return 0; +} + +static int ls3a_gipi_writel(struct ls3a_kvm_ipi *ipi, gpa_t addr, + int len, const void *val) +{ + uint64_t data, offset; + struct kvm_loongarch_interrupt irq; + gipiState *s = &(ipi->ls3a_gipistate); + uint32_t cpu, action_data; + struct kvm *kvm; + void *pbuf; + int mailbox, action; + + kvm = ipi->kvm; + cpu = (addr >> 8) & 0xff; + + data = *(uint64_t *)val; + offset = addr & 0xFF; + + BUG_ON(offset & (len - 1)); + + switch (offset) { + case CORE0_STATUS_OFF: + printk("CORE0_SET_OFF Can't be write\n"); + + break; + case CORE0_EN_OFF: + s->core[cpu].en = data; + + break; + case CORE0_IPI_SEND: + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + action = (data & 0x1f); + action_data = (1 << action); + + if (s->core[cpu].status == 0) { + irq.cpu = cpu; + irq.irq = LARCH_INT_IPI; + + if (likely(kvm->vcpus[cpu])) { + kvm_vcpu_ioctl_interrupt(kvm->vcpus[cpu], &irq); + } + } + s->core[cpu].status |= action_data; + break; + case CORE0_SET_OFF: + pr_info("CORE0_SET_OFF simulation is required\n"); + break; + case CORE0_CLEAR_OFF: + s->core[cpu].status &= ~data; + if (!s->core[cpu].status) { + irq.cpu = cpu; + irq.irq = -LARCH_INT_IPI; + if (likely(kvm->vcpus[cpu])) + kvm_vcpu_ioctl_interrupt(kvm->vcpus[cpu], &irq); + else + kvm_err("Failed lower ipi irq target cpu:%d\n", cpu); + } + + break; + case CORE0_MAIL_SEND: + cpu = ((data & 0xffffffff) >> 16) & 0x3ff; + mailbox = ((data & 0xffffffff) >> 2) & 0x7; + pbuf = (void *)s->core[cpu].buf + mailbox * 4; + + *(unsigned int *)pbuf = (unsigned int)(data >> 32); + break; + case 0x20 ... 0x3c: + pbuf = (void *)s->core[cpu].buf + (offset - 0x20); + if (len == 1) + *(unsigned char *)pbuf = (unsigned char)data; + else if (len == 2) + *(unsigned short *)pbuf = (unsigned short)data; + else if (len == 4) + *(unsigned int *)pbuf = (unsigned int)data; + else if (len == 8) + *(unsigned long *)pbuf = (unsigned long)data; + + break; + default: + printk("ls3a_gipi_writel with unknown addr %llx \n", addr); + break; + } + return 0; +} + +static uint64_t ls3a_gipi_readl(struct ls3a_kvm_ipi *ipi, + gpa_t addr, int len, void *val) +{ + uint64_t offset; + uint64_t ret = 0; + + gipiState *s = &(ipi->ls3a_gipistate); + uint32_t cpu; + void *pbuf; + + cpu = (addr >> 8) & 0xff; + + offset = addr & 0xFF; + + BUG_ON(offset & (len - 1)); + switch (offset) { + case CORE0_STATUS_OFF: + ret = s->core[cpu].status; + break; + case CORE0_EN_OFF: + ret = s->core[cpu].en; + break; + case CORE0_SET_OFF: + ret = 0; + break; + case CORE0_CLEAR_OFF: + ret = 0; + break; + case 0x20 ... 0x3c: + pbuf = (void *)s->core[cpu].buf + (offset - 0x20); + if (len == 1) + ret = *(unsigned char *)pbuf; + else if (len == 2) + ret = *(unsigned short *)pbuf; + else if (len == 4) + ret = *(unsigned int *)pbuf; + else if (len == 8) + ret = *(unsigned long *)pbuf; + break; + default: + printk("ls3a_gipi_readl with unknown addr %llx \n", addr); + break; + } + + *(uint64_t *)val = ret; + + return ret; +} + +static int kvm_ls3a_ipi_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct ls3a_kvm_ipi *ipi; + ipi_io_device *ipi_device; + unsigned long flags; + + ipi_device = container_of(dev, ipi_io_device, device); + ipi = ipi_device->ipi; + ipi->kvm->stat.pip_write_exits++; + + ls3a_gipi_lock(ipi, flags); + ls3a_gipi_writel(ipi, addr, len, val); + ls3a_gipi_unlock(ipi, flags); + return 0; +} + + +static int kvm_ls3a_ipi_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + struct ls3a_kvm_ipi *ipi; + ipi_io_device *ipi_device; + unsigned long flags; + + ipi_device = container_of(dev, ipi_io_device, device); + ipi = ipi_device->ipi; + ipi->kvm->stat.pip_read_exits++; + + ls3a_gipi_lock(ipi, flags); + ls3a_gipi_readl(ipi, addr, len, val); + ls3a_gipi_unlock(ipi, flags); + return 0; +} + + +static const struct kvm_io_device_ops kvm_ls3a_ipi_ops = { + .read = kvm_ls3a_ipi_read, + .write = kvm_ls3a_ipi_write, +}; + +void kvm_destroy_ls3a_ipi(struct kvm *kvm) +{ + struct kvm_io_device *device; + struct ls3a_kvm_ipi *vipi = kvm->arch.v_gipi; + + if (!vipi) + return; + device = &vipi->dev_ls3a_ipi.device; + mutex_lock(&kvm->slots_lock); + kvm_io_bus_unregister_dev(vipi->kvm, KVM_MMIO_BUS, device); + mutex_unlock(&kvm->slots_lock); + kfree(vipi); +} + +int kvm_create_ls3a_ipi(struct kvm *kvm) +{ + struct ls3a_kvm_ipi *s; + unsigned long addr; + struct kvm_io_device *device; + int ret; + + s = kzalloc(sizeof(struct ls3a_kvm_ipi), GFP_KERNEL); + if (!s) + return -ENOMEM; + spin_lock_init(&s->lock); + s->kvm = kvm; + + /* + * Initialize MMIO device + */ + device = &s->dev_ls3a_ipi.device; + kvm_iodevice_init(device, &kvm_ls3a_ipi_ops); + addr = SMP_MAILBOX; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, + addr, KVM_IOCSR_IPI_ADDR_SIZE, device); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm_err("%s Initialize MMIO dev err ret:%d\n", __func__, ret); + goto err; + } else { + s->dev_ls3a_ipi.ipi = s; + } + + kvm->arch.v_gipi = s; + return 0; + +err: + kfree(s); + return -EFAULT; +} + +int kvm_get_ls3a_ipi(struct kvm *kvm, struct loongarch_gipiState *state) +{ + struct ls3a_kvm_ipi *ipi = ls3a_ipi_irqchip(kvm); + gipiState *ipi_state = &(ipi->ls3a_gipistate); + unsigned long flags; + + ls3a_gipi_lock(ipi, flags); + memcpy(state, ipi_state, sizeof(gipiState)); + ls3a_gipi_unlock(ipi, flags); + return 0; +} + +int kvm_set_ls3a_ipi(struct kvm *kvm, struct loongarch_gipiState *state) +{ + struct ls3a_kvm_ipi *ipi = ls3a_ipi_irqchip(kvm); + gipiState *ipi_state = &(ipi->ls3a_gipistate); + unsigned long flags; + + if (!ipi) + return -EINVAL; + + ls3a_gipi_lock(ipi, flags); + memcpy(ipi_state, state, sizeof(gipiState)); + ls3a_gipi_unlock(ipi, flags); + return 0; +} diff --git a/arch/loongarch/kvm/intc/ls3a_ipi.h b/arch/loongarch/kvm/intc/ls3a_ipi.h new file mode 100644 index 000000000000..ae65c7d1a458 --- /dev/null +++ b/arch/loongarch/kvm/intc/ls3a_ipi.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __LS3A_KVM_IPI_H +#define __LS3A_KVM_IPI_H + +#include +#include +#include +#include + +#include + +typedef struct gipi_single { + uint32_t status; + uint32_t en; + uint32_t set; + uint32_t clear; + uint64_t buf[4]; +} gipi_single; + +typedef struct gipiState { + gipi_single core[KVM_MAX_VCPUS]; +} gipiState; + +struct ls3a_kvm_ipi; + +typedef struct ipi_io_device { + struct ls3a_kvm_ipi *ipi; + struct kvm_io_device device; + int nodeNum; +} ipi_io_device; + +struct ls3a_kvm_ipi { + spinlock_t lock; + struct kvm *kvm; + gipiState ls3a_gipistate; + int nodeNum; + ipi_io_device dev_ls3a_ipi; +}; + +#define SMP_MAILBOX (LOONGSON_VIRT_REG_BASE + 0x0000) +#define KVM_IPI_REG_ADDRESS(id, off) (SMP_MAILBOX | (id << 8) | off) +#define KVM_IOCSR_IPI_ADDR_SIZE 0x10000 + +#define CORE0_STATUS_OFF 0x000 +#define CORE0_EN_OFF 0x004 +#define CORE0_SET_OFF 0x008 +#define CORE0_CLEAR_OFF 0x00c +#define CORE0_BUF_20 0x020 +#define CORE0_BUF_28 0x028 +#define CORE0_BUF_30 0x030 +#define CORE0_BUF_38 0x038 +#define CORE0_IPI_SEND 0x040 +#define CORE0_MAIL_SEND 0x048 + +static inline struct ls3a_kvm_ipi *ls3a_ipi_irqchip(struct kvm *kvm) +{ + return kvm->arch.v_gipi; +} + +static inline int ls3a_ipi_in_kernel(struct kvm *kvm) +{ + int ret; + + ret = (ls3a_ipi_irqchip(kvm) != NULL); + return ret; +} + +int kvm_create_ls3a_ipi(struct kvm *kvm); +void kvm_destroy_ls3a_ipi(struct kvm *kvm); +int kvm_set_ls3a_ipi(struct kvm *kvm, struct loongarch_gipiState *state); +int kvm_get_ls3a_ipi(struct kvm *kvm, struct loongarch_gipiState *state); +int kvm_helper_send_ipi(struct kvm_vcpu *vcpu, unsigned int cpu, unsigned int action); +#endif diff --git a/arch/loongarch/kvm/intc/ls7a_irq.c b/arch/loongarch/kvm/intc/ls7a_irq.c new file mode 100644 index 000000000000..8a044159124a --- /dev/null +++ b/arch/loongarch/kvm/intc/ls7a_irq.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include "ls3a_ipi.h" +#include "ls7a_irq.h" +#include "ls3a_ext_irq.h" + +void ls7a_ioapic_lock(struct ls7a_kvm_ioapic *s, unsigned long *flags) +{ + unsigned long tmp; + spin_lock_irqsave(&s->lock, tmp); + *flags = tmp; +} + +void ls7a_ioapic_unlock(struct ls7a_kvm_ioapic *s, unsigned long *flags) +{ + unsigned long tmp; + tmp = *flags; + spin_unlock_irqrestore(&s->lock, tmp); +} + +static void kvm_ls7a_ioapic_raise(struct kvm *kvm, unsigned long mask) +{ + unsigned long irqnum, val; + struct ls7a_kvm_ioapic *s = ls7a_ioapic_irqchip(kvm); + struct kvm_ls7a_ioapic_state *state; + struct kvm_loongarch_interrupt irq; + int i; + + state = &s->ls7a_ioapic; + irq.cpu = -1; + val = mask & state->intirr & (~state->int_mask); + val &= ~state->intisr; + for_each_set_bit(i, &val, 64) { + state->intisr |= 0x1ULL << i; + irqnum = state->htmsi_vector[i]; + kvm_debug("msi_irq_handler,%ld,up\n", irqnum); + msi_irq_handler(kvm, irqnum, 1); + } + + kvm->stat.ls7a_ioapic_update++; +} + +static void kvm_ls7a_ioapic_lower(struct kvm *kvm, unsigned long mask) +{ + unsigned long irqnum, val; + struct ls7a_kvm_ioapic *s = ls7a_ioapic_irqchip(kvm); + struct kvm_ls7a_ioapic_state *state; + struct kvm_loongarch_interrupt irq; + int i; + + state = &s->ls7a_ioapic; + irq.cpu = -1; + val = mask & state->intisr; + for_each_set_bit(i, &val, 64) { + state->intisr &= ~(0x1ULL << i); + irqnum = state->htmsi_vector[i]; + kvm_debug("msi_irq_handler,%ld,down\n", irqnum); + msi_irq_handler(kvm, irqnum, 0); + } + + kvm->stat.ls7a_ioapic_update++; +} + +int kvm_ls7a_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + if (!level) + return -1; + + kvm_debug("msi data is 0x%x\n", e->msi.data); + msi_irq_handler(kvm, e->msi.data, 1); + return 0; +} + +int kvm_ls7a_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct kvm_kernel_irq_routing_entry route; + + if (msi->flags != 0) + return -EINVAL; + + kvm->stat.ls7a_msi_irq++; + route.msi.address_lo = msi->address_lo; + route.msi.address_hi = msi->address_hi; + route.msi.data = msi->data; + + kvm_debug("msi data is 0x%x\n", route.msi.data); + return kvm_ls7a_set_msi(&route, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); + +} + +int kvm_ls7a_ioapic_set_irq(struct kvm *kvm, int irq, int level) +{ + struct ls7a_kvm_ioapic *s; + struct kvm_ls7a_ioapic_state *state; + uint64_t mask = 1ULL << irq; + s = ls7a_ioapic_irqchip(kvm); + state = &s->ls7a_ioapic; + BUG_ON(irq < 0 || irq >= LS7A_IOAPIC_NUM_PINS); + + if (state->intedge & mask) { + /* edge triggered */ + if (level) { + if ((state->last_intirr & mask) == 0) { + state->intirr |= mask; + kvm_ls7a_ioapic_raise(kvm, mask); + } + state->last_intirr |= mask; + } else + state->last_intirr &= ~mask; + } else { + /* level triggered */ + if (!!level) { + if ((state->intirr & mask) == 0) { + state->intirr |= mask; + kvm_ls7a_ioapic_raise(kvm, mask); + } + } else { + if (state->intirr & mask) { + state->intirr &= ~mask; + kvm_ls7a_ioapic_lower(kvm, mask); + } + } + } + kvm->stat.ls7a_ioapic_set_irq++; + return 0; +} + +static int ls7a_ioapic_reg_write(struct ls7a_kvm_ioapic *s, + gpa_t addr, int len, const void *val) +{ + struct kvm *kvm; + struct kvm_ls7a_ioapic_state *state; + int64_t offset_tmp; + uint64_t offset; + uint64_t old, himask, lowmask; + unsigned long data, flags; + + offset = addr & 0xfff; + kvm = s->kvm; + state = &(s->ls7a_ioapic); + lowmask = 0xFFFFFFFFUL; + himask = lowmask << 32; + + if (offset & (len - 1)) { + printk("%s(%d):unaligned address access %llx size %d \n", + __FUNCTION__, __LINE__, addr, len); + return 0; + } + + if (8 == len) { + data = *(uint64_t *)val; + switch (offset) { + case LS7A_INT_MASK_OFFSET: + old = state->int_mask; + state->int_mask = data; + if (old & ~data) + kvm_ls7a_ioapic_raise(kvm, old & ~data); + if (~old & data) + kvm_ls7a_ioapic_lower(kvm, ~old & data); + break; + case LS7A_INT_STATUS_OFFSET: + state->intisr = data; + break; + case LS7A_INT_EDGE_OFFSET: + state->intedge = data; + break; + case LS7A_INT_CLEAR_OFFSET: + /* + * For emulated device, only clear edge triggered irq + * on writing INTCLR reg no effect on level triggered + * irq + * However for pass-through device with level-triggered + * intx, here need clear interrupt + */ + old = data; + data = data & state->intedge; + state->intirr &= ~data; + kvm_ls7a_ioapic_lower(kvm, data); + state->intisr &= ~data; + + data = old & ~state->intedge; + ls7a_ioapic_unlock(kvm->arch.v_ioapic, &flags); + for_each_set_bit(offset_tmp, &data, 64) + kvm_notify_acked_irq(kvm, 0, offset_tmp); + ls7a_ioapic_lock(kvm->arch.v_ioapic, &flags); + break; + case LS7A_INT_POL_OFFSET: + state->int_polarity = data; + break; + case LS7A_HTMSI_EN_OFFSET: + state->htmsi_en = data; + break; + case LS7A_AUTO_CTRL0_OFFSET: + case LS7A_AUTO_CTRL1_OFFSET: + break; + default: + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + break; + } + } else if (4 == len) { + data = *(uint32_t *)val; + switch (offset) { + case LS7A_INT_MASK_OFFSET: + old = state->int_mask & lowmask; + state->int_mask = (state->int_mask & himask) | data; + if (old & ~data) + kvm_ls7a_ioapic_raise(kvm, old & ~data); + if (~old & data) + kvm_ls7a_ioapic_lower(kvm, ~old & data); + break; + case LS7A_INT_MASK_OFFSET + 4: + data = data << 32; + old = state->int_mask & himask; + state->int_mask = (state->int_mask & lowmask) | data; + if (old & ~data) + kvm_ls7a_ioapic_raise(kvm, old & ~data); + if (~old & data) + kvm_ls7a_ioapic_lower(kvm, ~old & data); + break; + case LS7A_INT_STATUS_OFFSET: + state->intisr = (state->intisr & himask) | data; + break; + case LS7A_INT_STATUS_OFFSET + 4: + data = data << 32; + state->intisr = (state->intisr & lowmask) | data; + break; + case LS7A_INT_EDGE_OFFSET: + state->intedge = (state->intedge & himask) | data; + break; + case LS7A_INT_EDGE_OFFSET + 4: + data = data << 32; + state->intedge = (state->intedge & lowmask) | data; + break; + case LS7A_INT_CLEAR_OFFSET + 4: + data = data << 32; + fallthrough; + case LS7A_INT_CLEAR_OFFSET: + old = data; + data = data & state->intedge; + state->intirr &= ~data; + kvm_ls7a_ioapic_lower(kvm, data); + state->intisr &= ~data; + + data = old & ~state->intedge; + ls7a_ioapic_unlock(kvm->arch.v_ioapic, &flags); + for_each_set_bit(offset_tmp, &data, 64) + kvm_notify_acked_irq(kvm, 0, offset_tmp); + ls7a_ioapic_lock(kvm->arch.v_ioapic, &flags); + break; + case LS7A_INT_POL_OFFSET: + state->int_polarity = (state->int_polarity & himask) | data; + break; + case LS7A_INT_POL_OFFSET+4: + data = data << 32; + state->int_polarity = (state->int_polarity & lowmask) | data; + break; + case LS7A_HTMSI_EN_OFFSET: + state->htmsi_en = (state->htmsi_en & himask) | data; + break; + case LS7A_HTMSI_EN_OFFSET+4: + data = data << 32; + state->htmsi_en = (state->htmsi_en & lowmask) | data; + break; + case LS7A_AUTO_CTRL0_OFFSET: + case LS7A_AUTO_CTRL0_OFFSET+4: + case LS7A_AUTO_CTRL1_OFFSET: + case LS7A_AUTO_CTRL1_OFFSET+4: + break; + default: + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + break; + } + } else if (1 == len) { + data = *(unsigned char *)val; + if (offset >= LS7A_HTMSI_VEC_OFFSET) { + offset_tmp = offset - LS7A_HTMSI_VEC_OFFSET; + if (offset_tmp >= 0 && offset_tmp < 64) { + state->htmsi_vector[offset_tmp] = + (uint8_t)(data & 0xff); + } + } else if (offset >= LS7A_ROUTE_ENTRY_OFFSET) { + offset_tmp = offset - LS7A_ROUTE_ENTRY_OFFSET; + if (offset_tmp >= 0 && offset_tmp < 64) { + state->route_entry[offset_tmp] = + (uint8_t)(data & 0xff); + } + } else { + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + } + } else { + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + } + kvm->stat.ioapic_reg_write++; + return 0; +} + +static inline struct ls7a_kvm_ioapic *to_ioapic(struct kvm_io_device *dev) +{ + return container_of(dev, struct ls7a_kvm_ioapic, dev_ls7a_ioapic); +} + +static int kvm_ls7a_ioapic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct ls7a_kvm_ioapic *s = to_ioapic(this); + unsigned long flags; + + ls7a_ioapic_lock(s->kvm->arch.v_ioapic, &flags); + ls7a_ioapic_reg_write(s, addr, len, val); + ls7a_ioapic_unlock(s->kvm->arch.v_ioapic, &flags); + + return 0; +} + +static int ls7a_ioapic_reg_read(struct ls7a_kvm_ioapic *s, + gpa_t addr, int len, void *val) +{ + uint64_t offset, offset_tmp; + struct kvm *kvm; + struct kvm_ls7a_ioapic_state *state; + uint64_t result = 0, lowmask, himask; + + state = &(s->ls7a_ioapic); + kvm = s->kvm; + offset = addr & 0xfff; + lowmask = 0xFFFFFFFFUL; + himask = lowmask << 32; + if (offset & (len - 1)) { + printk("%s(%d):unaligned address access %llx size %d \n", + __FUNCTION__, __LINE__, addr, len); + return 0; + } + + if (8 == len) { + switch (offset) { + case LS7A_INT_MASK_OFFSET: + result = state->int_mask; + break; + case LS7A_INT_STATUS_OFFSET: + result = state->intisr & (~state->int_mask); + break; + case LS7A_INT_EDGE_OFFSET: + result = state->intedge; + break; + case LS7A_INT_POL_OFFSET: + result = state->int_polarity; + break; + case LS7A_HTMSI_EN_OFFSET: + result = state->htmsi_en; + break; + case LS7A_AUTO_CTRL0_OFFSET: + case LS7A_AUTO_CTRL1_OFFSET: + break; + case LS7A_INT_ID_OFFSET: + result = LS7A_INT_ID_VER; + result = (result << 32) + LS7A_INT_ID_VAL; + break; + default: + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + break; + } + if (val != NULL) + *(uint64_t *)val = result; + } else if (4 == len) { + switch (offset) { + case LS7A_INT_MASK_OFFSET: + result = state->int_mask & lowmask; + break; + case LS7A_INT_MASK_OFFSET + 4: + result = state->int_mask & himask; + result = result >> 32; + break; + case LS7A_INT_STATUS_OFFSET: + result = state->intisr & (~state->int_mask) & lowmask; + break; + case LS7A_INT_STATUS_OFFSET + 4: + result = state->intisr & (~state->int_mask) & himask; + result = result >> 32; + break; + case LS7A_INT_EDGE_OFFSET: + result = state->intedge & lowmask; + break; + case LS7A_INT_EDGE_OFFSET + 4: + result = state->intedge & himask; + result = result >> 32; + break; + case LS7A_INT_POL_OFFSET: + result = state->int_polarity & lowmask; + break; + case LS7A_INT_POL_OFFSET + 4: + result = state->int_polarity & himask; + result = result >> 32; + break; + case LS7A_HTMSI_EN_OFFSET: + result = state->htmsi_en & lowmask; + break; + case LS7A_HTMSI_EN_OFFSET + 4: + result = state->htmsi_en & himask; + result = result >> 32; + break; + case LS7A_AUTO_CTRL0_OFFSET: + case LS7A_AUTO_CTRL0_OFFSET + 4: + case LS7A_AUTO_CTRL1_OFFSET: + case LS7A_AUTO_CTRL1_OFFSET + 4: + break; + case LS7A_INT_ID_OFFSET: + result = LS7A_INT_ID_VAL; + break; + case LS7A_INT_ID_OFFSET + 4: + result = LS7A_INT_ID_VER; + break; + default: + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + break; + } + if (val != NULL) + *(uint32_t *)val = result; + } else if (1 == len) { + if (offset >= LS7A_HTMSI_VEC_OFFSET) { + offset_tmp = offset - LS7A_HTMSI_VEC_OFFSET; + if (offset_tmp >= 0 && offset_tmp < 64) { + result = state->htmsi_vector[offset_tmp]; + } + } else if (offset >= LS7A_ROUTE_ENTRY_OFFSET) { + offset_tmp = offset - LS7A_ROUTE_ENTRY_OFFSET; + if (offset_tmp >= 0 && offset_tmp < 64) { + result = state->route_entry[offset_tmp]; + } + } else { + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + } + if (val != NULL) + *(unsigned char *)val = result; + } else { + WARN_ONCE(1, "Abnormal address access:addr 0x%llx,len %d\n", addr, len); + } + kvm->stat.ioapic_reg_read++; + return result; +} + +static int kvm_ls7a_ioapic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + struct ls7a_kvm_ioapic *s = to_ioapic(this); + unsigned long flags; + uint64_t result = 0; + + ls7a_ioapic_lock(s->kvm->arch.v_ioapic, &flags); + result = ls7a_ioapic_reg_read(s, addr, len, val); + ls7a_ioapic_unlock(s->kvm->arch.v_ioapic, &flags); + return 0; +} + +static const struct kvm_io_device_ops kvm_ls7a_ioapic_ops = { + .read = kvm_ls7a_ioapic_read, + .write = kvm_ls7a_ioapic_write, +}; + +static int kvm_ls7a_ioapic_alias_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *val) +{ + struct ls7a_kvm_ioapic *s; + unsigned long flags; + + s = container_of(this, struct ls7a_kvm_ioapic, ls7a_ioapic_alias); + ls7a_ioapic_lock(s->kvm->arch.v_ioapic, &flags); + ls7a_ioapic_reg_read(s, addr, len, val); + ls7a_ioapic_unlock(s->kvm->arch.v_ioapic, &flags); + return 0; +} + +static int kvm_ls7a_ioapic_alias_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *val) +{ + struct ls7a_kvm_ioapic *s; + unsigned long flags; + + s = container_of(this, struct ls7a_kvm_ioapic, ls7a_ioapic_alias); + ls7a_ioapic_lock(s->kvm->arch.v_ioapic, &flags); + ls7a_ioapic_reg_write(s, addr, len, val); + ls7a_ioapic_unlock(s->kvm->arch.v_ioapic, &flags); + + return 0; +} + +static const struct kvm_io_device_ops kvm_ls7a_ioapic_ops_alias = { + .read = kvm_ls7a_ioapic_alias_read, + .write = kvm_ls7a_ioapic_alias_write, +}; + +int kvm_create_ls7a_ioapic(struct kvm *kvm) +{ + struct ls7a_kvm_ioapic *s; + int ret; + unsigned long ls7a_ioapic_reg_base; + + s = kzalloc(sizeof(struct ls7a_kvm_ioapic), GFP_KERNEL); + if (!s) + return -ENOMEM; + spin_lock_init(&s->lock); + s->kvm = kvm; + + ls7a_ioapic_reg_base = LS7A_IOAPIC_GUEST_REG_BASE; + + /* + * Initialize MMIO device + */ + kvm_iodevice_init(&s->dev_ls7a_ioapic, &kvm_ls7a_ioapic_ops); + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ls7a_ioapic_reg_base, + 0x1000, &s->dev_ls7a_ioapic); + if (ret < 0) { + kvm_err("Failed register ioapic, err:%d\n", ret); + goto fail_unlock; + } + + ls7a_ioapic_reg_base = LS7A_IOAPIC_GUEST_REG_BASE_ALIAS; + kvm_iodevice_init(&s->ls7a_ioapic_alias, &kvm_ls7a_ioapic_ops_alias); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ls7a_ioapic_reg_base, + 0x1000, &s->ls7a_ioapic_alias); + if (ret < 0) { + kvm_err("Failed register alias ioapic, err:%d\n", ret); + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &s->dev_ls7a_ioapic); + goto fail_unlock; + } + mutex_unlock(&kvm->slots_lock); + + kvm->arch.v_ioapic = s; + + return 0; + +fail_unlock: + mutex_unlock(&kvm->slots_lock); + kfree(s); + + return -EFAULT; +} + + +int kvm_get_ls7a_ioapic(struct kvm *kvm, struct ls7a_ioapic_state *state) +{ + struct ls7a_kvm_ioapic *ls7a_ioapic = ls7a_ioapic_irqchip(kvm); + struct kvm_ls7a_ioapic_state *ioapic_state = + &(ls7a_ioapic->ls7a_ioapic); + unsigned long flags; + + ls7a_ioapic_lock(ls7a_ioapic, &flags); + memcpy(state, ioapic_state, sizeof(struct kvm_ls7a_ioapic_state)); + ls7a_ioapic_unlock(ls7a_ioapic, &flags); + kvm->stat.get_ls7a_ioapic++; + return 0; +} + +int kvm_set_ls7a_ioapic(struct kvm *kvm, struct ls7a_ioapic_state *state) +{ + struct ls7a_kvm_ioapic *ls7a_ioapic = ls7a_ioapic_irqchip(kvm); + struct kvm_ls7a_ioapic_state *ioapic_state = + &(ls7a_ioapic->ls7a_ioapic); + unsigned long flags; + + if (!ls7a_ioapic) + return -EINVAL; + + ls7a_ioapic_lock(ls7a_ioapic, &flags); + memcpy(ioapic_state, state, sizeof(struct kvm_ls7a_ioapic_state)); + ls7a_ioapic_unlock(ls7a_ioapic, &flags); + kvm->stat.set_ls7a_ioapic++; + return 0; +} + +void kvm_destroy_ls7a_ioapic(struct kvm *kvm) +{ + struct ls7a_kvm_ioapic *vpic = kvm->arch.v_ioapic; + if (!vpic) + return; + mutex_lock(&kvm->slots_lock); + kvm_io_bus_unregister_dev(vpic->kvm, KVM_MMIO_BUS, + &vpic->ls7a_ioapic_alias); + kvm_io_bus_unregister_dev(vpic->kvm, KVM_MMIO_BUS, + &vpic->dev_ls7a_ioapic); + mutex_unlock(&kvm->slots_lock); + kfree(vpic); +} + +void kvm_dump_ls7a_ioapic_state(struct seq_file *m, + struct ls7a_kvm_ioapic *ioapic) +{ + struct kvm_ls7a_ioapic_state *ioapic_state; + unsigned long flags; + int i = 0; + + if (!ioapic) + return; + + seq_puts(m, "\nIOAPIC state:\n"); + ioapic_state = &(ioapic->ls7a_ioapic); + + ls7a_ioapic_lock(ioapic, &flags); + seq_puts(m, "irq masked: "); + for (i = 0; i < 64; i++) { + if (!test_bit(i, (void *)&ioapic_state->int_mask)) + seq_printf(m, "%d ", i); + } + seq_printf(m, "\nhtmsi_en:0x%016llx\n" + "intedge:0x%016llx", + ioapic_state->htmsi_en, + ioapic_state->intedge); + + seq_puts(m, "\nroute_entry: "); + for (i = 0; i < 64; i++) + seq_printf(m, "%d ", ioapic_state->route_entry[i]); + + seq_puts(m, "\nhtmsi_vector: "); + for (i = 0; i < 64; i++) + seq_printf(m, "%d ", ioapic_state->htmsi_vector[i]); + + seq_printf(m, "\nintirr:%016llx\n" + "intisr:%016llx\n", + ioapic_state->intirr, + ioapic_state->intisr); + ls7a_ioapic_unlock(ioapic, &flags); +} diff --git a/arch/loongarch/kvm/intc/ls7a_irq.h b/arch/loongarch/kvm/intc/ls7a_irq.h new file mode 100644 index 000000000000..1bb7d3d8a74d --- /dev/null +++ b/arch/loongarch/kvm/intc/ls7a_irq.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __LS7A_KVM_IRQ_H +#define __LS7A_KVM_IRQ_H + +#include +#include +#include +#include +#include +#include +#include "kvmcpu.h" + +#include + +#define LS7A_APIC_NUM_PINS 64 + +#define LS7A_ROUTE_ENTRY_OFFSET 0x100 +#define LS7A_INT_ID_OFFSET 0x0 +#define LS7A_INT_ID_VAL 0x7000000UL +#define LS7A_INT_ID_VER 0x1f0001UL +#define LS7A_INT_MASK_OFFSET 0x20 +#define LS7A_INT_EDGE_OFFSET 0x60 +#define LS7A_INT_CLEAR_OFFSET 0x80 +#define LS7A_INT_STATUS_OFFSET 0x3a0 +#define LS7A_INT_POL_OFFSET 0x3e0 +#define LS7A_HTMSI_EN_OFFSET 0x40 +#define LS7A_HTMSI_VEC_OFFSET 0x200 +#define LS7A_AUTO_CTRL0_OFFSET 0xc0 +#define LS7A_AUTO_CTRL1_OFFSET 0xe0 + +#define LS7A_IOAPIC_GUEST_REG_BASE 0x10000000UL +#define LS7A_IOAPIC_GUEST_REG_BASE_ALIAS 0xe0010000000UL + +#define LS7A_IOAPIC_NUM_PINS 32 + +typedef struct kvm_ls7a_ioapic_state { + u64 int_id; + /* 0x020 interrupt mask register */ + u64 int_mask; + /* 0x040 1=msi */ + u64 htmsi_en; + /* 0x060 edge=1 level =0 */ + u64 intedge; + /* 0x080 for clean edge int,set 1 clean,set 0 is noused */ + u64 intclr; + /* 0x0c0 */ + u64 auto_crtl0; + /* 0x0e0 */ + u64 auto_crtl1; + /* 0x100 - 0x140 */ + u8 route_entry[64]; + /* 0x200 - 0x240 */ + u8 htmsi_vector[64]; + /* 0x300 */ + u64 intisr_chip0; + /* 0x320 */ + u64 intisr_chip1; + /* edge detection */ + u64 last_intirr; + /* 0x380 interrupt request register */ + u64 intirr; + /* 0x3a0 interrupt service register */ + u64 intisr; + /* 0x3e0 interrupt level polarity selection register, + * 0 for high level tirgger + */ + u64 int_polarity; +} LS7AApicState; + +struct ls7a_kvm_ioapic { + spinlock_t lock; + bool wakeup_needed; + unsigned pending_acks; + struct kvm *kvm; + struct kvm_ls7a_ioapic_state ls7a_ioapic; + struct kvm_io_device dev_ls7a_ioapic; + struct kvm_io_device ls7a_ioapic_alias; + void (*ack_notifier)(void *opaque, int irq); + unsigned long irq_states[LS7A_APIC_NUM_PINS]; +}; + +static inline struct ls7a_kvm_ioapic *ls7a_ioapic_irqchip(struct kvm *kvm) +{ + return kvm->arch.v_ioapic; +} + +static inline int ls7a_ioapic_in_kernel(struct kvm *kvm) +{ + int ret; + + ret = (ls7a_ioapic_irqchip(kvm) != NULL); + return ret; +} + +int kvm_set_ls7a_ioapic(struct kvm *kvm, struct ls7a_ioapic_state *state); +int kvm_get_ls7a_ioapic(struct kvm *kvm, struct ls7a_ioapic_state *state); +int kvm_create_ls7a_ioapic(struct kvm *kvm); +void kvm_destroy_ls7a_ioapic(struct kvm *kvm); +int kvm_ls7a_ioapic_set_irq(struct kvm *kvm, int irq, int level); + +void ls7a_ioapic_lock(struct ls7a_kvm_ioapic *s, unsigned long *flags); +void ls7a_ioapic_unlock(struct ls7a_kvm_ioapic *s, unsigned long *flags); +int kvm_ls7a_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); +int kvm_ls7a_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, + int irq_source_id, int level, bool line_status); + +void kvm_dump_ls7a_ioapic_state(struct seq_file *m, struct ls7a_kvm_ioapic *ioapic); +#endif diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c new file mode 100644 index 000000000000..fb7c4ef7f21f --- /dev/null +++ b/arch/loongarch/kvm/interrupt.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include "kvmcpu.h" +#include +#include "kvm_compat.h" + +static u32 int_to_coreint[LOONGARCH_EXC_MAX] = { + [LARCH_INT_TIMER] = CPU_TIMER, + [LARCH_INT_IPI] = CPU_IPI, + [LARCH_INT_SIP0] = CPU_SIP0, + [LARCH_INT_SIP1] = CPU_SIP1, + [LARCH_INT_IP0] = CPU_IP0, + [LARCH_INT_IP1] = CPU_IP1, + [LARCH_INT_IP2] = CPU_IP2, + [LARCH_INT_IP3] = CPU_IP3, + [LARCH_INT_IP4] = CPU_IP4, + [LARCH_INT_IP5] = CPU_IP5, + [LARCH_INT_IP6] = CPU_IP6, + [LARCH_INT_IP7] = CPU_IP7, +}; + +static int _kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority) +{ + unsigned int irq = 0; + + clear_bit(priority, &vcpu->arch.irq_pending); + if (priority < LOONGARCH_EXC_MAX) + irq = int_to_coreint[priority]; + + switch (priority) { + case LARCH_INT_TIMER: + case LARCH_INT_IPI: + case LARCH_INT_SIP0: + case LARCH_INT_SIP1: + kvm_set_gcsr_estat(irq); + break; + + case LARCH_INT_IP0: + case LARCH_INT_IP1: + case LARCH_INT_IP2: + case LARCH_INT_IP3: + case LARCH_INT_IP4: + case LARCH_INT_IP5: + case LARCH_INT_IP6: + case LARCH_INT_IP7: + kvm_set_csr_gintc(irq); + break; + + default: + break; + } + + return 1; +} + +static int _kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority) +{ + unsigned int irq = 0; + + clear_bit(priority, &vcpu->arch.irq_clear); + if (priority < LOONGARCH_EXC_MAX) + irq = int_to_coreint[priority]; + + switch (priority) { + case LARCH_INT_TIMER: + case LARCH_INT_IPI: + case LARCH_INT_SIP0: + case LARCH_INT_SIP1: + kvm_clear_gcsr_estat(irq); + break; + + case LARCH_INT_IP0: + case LARCH_INT_IP1: + case LARCH_INT_IP2: + case LARCH_INT_IP3: + case LARCH_INT_IP4: + case LARCH_INT_IP5: + case LARCH_INT_IP6: + case LARCH_INT_IP7: + kvm_clear_csr_gintc(irq); + break; + + default: + break; + } + + return 1; +} + +void _kvm_deliver_intr(struct kvm_vcpu *vcpu) +{ + unsigned long *pending = &vcpu->arch.irq_pending; + unsigned long *pending_clr = &vcpu->arch.irq_clear; + unsigned int priority; + + if (!(*pending) && !(*pending_clr)) + return; + + if (*pending_clr) { + priority = __ffs(*pending_clr); + while (priority <= LOONGARCH_EXC_IPNUM) { + _kvm_irq_clear(vcpu, priority); + priority = find_next_bit(pending_clr, + BITS_PER_BYTE * sizeof(*pending_clr), + priority + 1); + } + } + + if (*pending) { + priority = __ffs(*pending); + while (priority <= LOONGARCH_EXC_IPNUM) { + _kvm_irq_deliver(vcpu, priority); + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } + } + +} + +int _kvm_pending_timer(struct kvm_vcpu *vcpu) +{ + return test_bit(LARCH_INT_TIMER, &vcpu->arch.irq_pending); +} diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c new file mode 100644 index 000000000000..fef3af530969 --- /dev/null +++ b/arch/loongarch/kvm/irqfd.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include "intc/ls7a_irq.h" + +static int kvm_ls7a_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + /* ioapic pin (0 ~ 64) <---> gsi(0 ~ 64) */ + u32 irq = e->irqchip.pin; + unsigned long flags; + int ret; + + if (!ls7a_ioapic_in_kernel(kvm)) + return -ENXIO; + + ls7a_ioapic_lock(ls7a_ioapic_irqchip(kvm), &flags); + ret = kvm_ls7a_ioapic_set_irq(kvm, irq, level); + ls7a_ioapic_unlock(ls7a_ioapic_irqchip(kvm), &flags); + + return ret; +} + +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = kvm_ls7a_set_ioapic_irq; + + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + + if (e->irqchip.pin >= LS7A_APIC_NUM_PINS) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + /* + * linux4.19 kernel have flags and devid + * e->msi.flags = ue->flags; + * e->msi.devid = ue->u.msi.devid; + */ + break; + default: + goto out; + } + r = 0; +out: + return r; +} + +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + if (e->type == KVM_IRQ_ROUTING_MSI) + return kvm_ls7a_set_msi(e, kvm, irq_source_id, 1, false); + + return -EWOULDBLOCK; +} + +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + unsigned int ret; + + if (!level) + return -1; + + ret = kvm_ls7a_set_msi(e, kvm, irq_source_id, 1, false); + return ret; +} + +int kvm_ls7a_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + + u32 nr = LS7A_APIC_NUM_PINS; + int i, ret; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + + return 0; +} diff --git a/arch/loongarch/kvm/kvm_compat.h b/arch/loongarch/kvm/kvm_compat.h new file mode 100644 index 000000000000..b497a1b74860 --- /dev/null +++ b/arch/loongarch/kvm/kvm_compat.h @@ -0,0 +1,762 @@ +#ifndef __LOONGARCH_KVM_COMPAT_H__ +#define __LOONGARCH_KVM_COMPAT_H__ + +#ifdef __ASSEMBLY__ +#define _ULCAST_ +#else +#define _ULCAST_ (unsigned long) +#endif + +#define KVM_CSR_CRMD 0x0 /* Current mode info */ +#define KVM_CRMD_WE_SHIFT 9 +#define KVM_CRMD_WE (_ULCAST_(0x1) << KVM_CRMD_WE_SHIFT) +#define KVM_CRMD_DACM_SHIFT 7 +#define KVM_CRMD_DACM_WIDTH 2 +#define KVM_CRMD_DACM (_ULCAST_(0x3) << KVM_CRMD_DACM_SHIFT) +#define KVM_CRMD_DACF_SHIFT 5 +#define KVM_CRMD_DACF_WIDTH 2 +#define KVM_CRMD_DACF (_ULCAST_(0x3) << KVM_CRMD_DACF_SHIFT) +#define KVM_CRMD_PG_SHIFT 4 +#define KVM_CRMD_PG (_ULCAST_(0x1) << KVM_CRMD_PG_SHIFT) +#define KVM_CRMD_DA_SHIFT 3 +#define KVM_CRMD_DA (_ULCAST_(0x1) << KVM_CRMD_DA_SHIFT) +#define KVM_CRMD_IE_SHIFT 2 +#define KVM_CRMD_IE (_ULCAST_(0x1) << KVM_CRMD_IE_SHIFT) +#define KVM_CRMD_PLV_SHIFT 0 +#define KVM_CRMD_PLV_WIDTH 2 +#define KVM_CRMD_PLV (_ULCAST_(0x3) << KVM_CRMD_PLV_SHIFT) + +#define KVM_CSR_PRMD 0x1 /* Prev-exception mode info */ +#define KVM_PRMD_PIE_SHIFT 2 +#define KVM_PRMD_PWE_SHIFT 3 +#define KVM_PRMD_PIE (_ULCAST_(0x1) << KVM_PRMD_PIE_SHIFT) +#define KVM_PRMD_PWE (_ULCAST_(0x1) << KVM_PRMD_PWE_SHIFT) +#define KVM_PRMD_PPLV_SHIFT 0 +#define KVM_PRMD_PPLV_WIDTH 2 +#define KVM_PRMD_PPLV (_ULCAST_(0x3) << KVM_PRMD_PPLV_SHIFT) + +#define KVM_CSR_EUEN 0x2 /* Extended unit enable */ +#define KVM_EUEN_LBTEN_SHIFT 3 +#define KVM_EUEN_LBTEN (_ULCAST_(0x1) << KVM_EUEN_LBTEN_SHIFT) +#define KVM_EUEN_LASXEN_SHIFT 2 +#define KVM_EUEN_LASXEN (_ULCAST_(0x1) << KVM_EUEN_LASXEN_SHIFT) +#define KVM_EUEN_LSXEN_SHIFT 1 +#define KVM_EUEN_LSXEN (_ULCAST_(0x1) << KVM_EUEN_LSXEN_SHIFT) +#define KVM_EUEN_FPEN_SHIFT 0 +#define KVM_EUEN_FPEN (_ULCAST_(0x1) << KVM_EUEN_FPEN_SHIFT) + +#define KVM_CSR_MISC 0x3 /* Misc config */ +#define KVM_CSR_ECFG 0x4 /* Exception config */ +#define KVM_ECFG_VS_SHIFT 16 +#define KVM_ECFG_VS_WIDTH 3 +#define KVM_ECFG_VS (_ULCAST_(0x7) << KVM_ECFG_VS_SHIFT) +#define KVM_ECFG_IM_SHIFT 0 +#define KVM_ECFG_IM_WIDTH 13 +#define KVM_ECFG_IM (_ULCAST_(0x1fff) << KVM_ECFG_IM_SHIFT) + +#define KVM_CSR_ESTAT 0x5 /* Exception status */ +#define KVM_ESTAT_ESUBCODE_SHIFT 22 +#define KVM_ESTAT_ESUBCODE_WIDTH 9 +#define KVM_ESTAT_ESUBCODE (_ULCAST_(0x1ff) << KVM_ESTAT_ESUBCODE_SHIFT) +#define KVM_ESTAT_EXC_SHIFT 16 +#define KVM_ESTAT_EXC_WIDTH 6 +#define KVM_ESTAT_EXC (_ULCAST_(0x3f) << KVM_ESTAT_EXC_SHIFT) +#define KVM_ESTAT_IS_SHIFT 0 +#define KVM_ESTAT_IS_WIDTH 15 +#define KVM_ESTAT_IS (_ULCAST_(0x7fff) << KVM_ESTAT_IS_SHIFT) + +#define KVM_CSR_ERA 0x6 /* ERA */ +#define KVM_CSR_BADV 0x7 /* Bad virtual address */ +#define KVM_CSR_BADI 0x8 /* Bad instruction */ +#define KVM_CSR_EENTRY 0xc /* Exception entry base address */ +#define KVM_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize, NP */ +#define KVM_CSR_TLBEHI 0x11 /* TLB EntryHi */ +#define KVM_CSR_TLBELO0 0x12 /* TLB EntryLo0 */ +#define KVM_CSR_TLBELO1 0x13 /* TLB EntryLo1 */ +#define KVM_CSR_GTLBC 0x15 /* Guest TLB control */ +#define KVM_GTLBC_TGID_SHIFT 16 +#define KVM_GTLBC_TGID_WIDTH 8 +#define KVM_GTLBC_TGID (_ULCAST_(0xff) << KVM_GTLBC_TGID_SHIFT) +#define KVM_GTLBC_TOTI_SHIFT 13 +#define KVM_GTLBC_TOTI (_ULCAST_(0x1) << KVM_GTLBC_TOTI_SHIFT) +#define KVM_GTLBC_USETGID_SHIFT 12 +#define KVM_GTLBC_USETGID (_ULCAST_(0x1) << KVM_GTLBC_USETGID_SHIFT) +#define KVM_GTLBC_GMTLBSZ_SHIFT 0 +#define KVM_GTLBC_GMTLBSZ_WIDTH 6 +#define KVM_GTLBC_GMTLBSZ (_ULCAST_(0x3f) << KVM_GTLBC_GMTLBSZ_SHIFT) + +#define KVM_CSR_TRGP 0x16 /* TLBR read guest info */ +#define KVM_CSR_ASID 0x18 /* ASID */ +#define KVM_CSR_PGDL 0x19 /* Page table base address when VA[47] = 0 */ +#define KVM_CSR_PGDH 0x1a /* Page table base address when VA[47] = 1 */ +#define KVM_CSR_PGD 0x1b /* Page table base */ +#define KVM_CSR_PWCTL0 0x1c /* PWCtl0 */ +#define KVM_CSR_PWCTL1 0x1d /* PWCtl1 */ +#define KVM_CSR_STLBPGSIZE 0x1e +#define KVM_CSR_RVACFG 0x1f +#define KVM_CSR_CPUID 0x20 /* CPU core number */ +#define KVM_CSR_PRCFG1 0x21 /* Config1 */ +#define KVM_CSR_PRCFG2 0x22 /* Config2 */ +#define KVM_CSR_PRCFG3 0x23 /* Config3 */ +#define KVM_CSR_KS0 0x30 +#define KVM_CSR_KS1 0x31 +#define KVM_CSR_KS2 0x32 +#define KVM_CSR_KS3 0x33 +#define KVM_CSR_KS4 0x34 +#define KVM_CSR_KS5 0x35 +#define KVM_CSR_KS6 0x36 +#define KVM_CSR_KS7 0x37 +#define KVM_CSR_KS8 0x38 +#define KVM_CSR_TMID 0x40 /* Timer ID */ +#define KVM_CSR_TCFG 0x41 /* Timer config */ +#define KVM_TCFG_VAL_SHIFT 2 +#define KVM_TCFG_VAL_WIDTH 48 +#define KVM_TCFG_VAL (_ULCAST_(0x3fffffffffff) << KVM_TCFG_VAL_SHIFT) +#define KVM_TCFG_PERIOD_SHIFT 1 +#define KVM_TCFG_PERIOD (_ULCAST_(0x1) << KVM_TCFG_PERIOD_SHIFT) +#define KVM_TCFG_EN (_ULCAST_(0x1)) + +#define KVM_CSR_TVAL 0x42 /* Timer value */ +#define KVM_CSR_CNTC 0x43 /* Timer offset */ +#define KVM_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define KVM_CSR_GSTAT 0x50 /* Guest status */ +#define KVM_GSTAT_GID_SHIFT 16 +#define KVM_GSTAT_GID_WIDTH 8 +#define KVM_GSTAT_GID (_ULCAST_(0xff) << KVM_GSTAT_GID_SHIFT) +#define KVM_GSTAT_GIDBIT_SHIFT 4 +#define KVM_GSTAT_GIDBIT_WIDTH 6 +#define KVM_GSTAT_GIDBIT (_ULCAST_(0x3f) << KVM_GSTAT_GIDBIT_SHIFT) +#define KVM_GSTAT_PVM_SHIFT 1 +#define KVM_GSTAT_PVM (_ULCAST_(0x1) << KVM_GSTAT_PVM_SHIFT) +#define KVM_GSTAT_VM_SHIFT 0 +#define KVM_GSTAT_VM (_ULCAST_(0x1) << KVM_GSTAT_VM_SHIFT) + +#define KVM_CSR_GCFG 0x51 /* Guest config */ +#define KVM_GCFG_GPERF_SHIFT 24 +#define KVM_GCFG_GPERF_WIDTH 3 +#define KVM_GCFG_GPERF (_ULCAST_(0x7) << KVM_GCFG_GPERF_SHIFT) +#define KVM_GCFG_GCI_SHIFT 20 +#define KVM_GCFG_GCI_WIDTH 2 +#define KVM_GCFG_GCI (_ULCAST_(0x3) << KVM_GCFG_GCI_SHIFT) +#define KVM_GCFG_GCI_ALL (_ULCAST_(0x0) << KVM_GCFG_GCI_SHIFT) +#define KVM_GCFG_GCI_HIT (_ULCAST_(0x1) << KVM_GCFG_GCI_SHIFT) +#define KVM_GCFG_GCI_SECURE (_ULCAST_(0x2) << KVM_GCFG_GCI_SHIFT) +#define KVM_GCFG_GCIP_SHIFT 16 +#define KVM_GCFG_GCIP (_ULCAST_(0xf) << KVM_GCFG_GCIP_SHIFT) +#define KVM_GCFG_GCIP_ALL (_ULCAST_(0x1) << KVM_GCFG_GCIP_SHIFT) +#define KVM_GCFG_GCIP_HIT (_ULCAST_(0x1) << (KVM_GCFG_GCIP_SHIFT + 1)) +#define KVM_GCFG_GCIP_SECURE (_ULCAST_(0x1) << (KVM_GCFG_GCIP_SHIFT + 2)) +#define KVM_GCFG_TORU_SHIFT 15 +#define KVM_GCFG_TORU (_ULCAST_(0x1) << KVM_GCFG_TORU_SHIFT) +#define KVM_GCFG_TORUP_SHIFT 14 +#define KVM_GCFG_TORUP (_ULCAST_(0x1) << KVM_GCFG_TORUP_SHIFT) +#define KVM_GCFG_TOP_SHIFT 13 +#define KVM_GCFG_TOP (_ULCAST_(0x1) << KVM_GCFG_TOP_SHIFT) +#define KVM_GCFG_TOPP_SHIFT 12 +#define KVM_GCFG_TOPP (_ULCAST_(0x1) << KVM_GCFG_TOPP_SHIFT) +#define KVM_GCFG_TOE_SHIFT 11 +#define KVM_GCFG_TOE (_ULCAST_(0x1) << KVM_GCFG_TOE_SHIFT) +#define KVM_GCFG_TOEP_SHIFT 10 +#define KVM_GCFG_TOEP (_ULCAST_(0x1) << KVM_GCFG_TOEP_SHIFT) +#define KVM_GCFG_TIT_SHIFT 9 +#define KVM_GCFG_TIT (_ULCAST_(0x1) << KVM_GCFG_TIT_SHIFT) +#define KVM_GCFG_TITP_SHIFT 8 +#define KVM_GCFG_TITP (_ULCAST_(0x1) << KVM_GCFG_TITP_SHIFT) +#define KVM_GCFG_SIT_SHIFT 7 +#define KVM_GCFG_SIT (_ULCAST_(0x1) << KVM_GCFG_SIT_SHIFT) +#define KVM_GCFG_SITP_SHIFT 6 +#define KVM_GCFG_SITP (_ULCAST_(0x1) << KVM_GCFG_SITP_SHIFT) +#define KVM_GCFG_MATC_SHITF 4 +#define KVM_GCFG_MATC_WIDTH 2 +#define KVM_GCFG_MATC_MASK (_ULCAST_(0x3) << KVM_GCFG_MATC_SHITF) +#define KVM_GCFG_MATC_GUEST (_ULCAST_(0x0) << KVM_GCFG_MATC_SHITF) +#define KVM_GCFG_MATC_ROOT (_ULCAST_(0x1) << KVM_GCFG_MATC_SHITF) +#define KVM_GCFG_MATC_NEST (_ULCAST_(0x2) << KVM_GCFG_MATC_SHITF) +#define KVM_GCFG_MATP_SHITF 0 +#define KVM_GCFG_MATP_WIDTH 4 +#define KVM_GCFG_MATR_MASK (_ULCAST_(0x3) << KVM_GCFG_MATP_SHITF) +#define KVM_GCFG_MATP_GUEST (_ULCAST_(0x0) << KVM_GCFG_MATP_SHITF) +#define KVM_GCFG_MATP_ROOT (_ULCAST_(0x1) << KVM_GCFG_MATP_SHITF) +#define KVM_GCFG_MATP_NEST (_ULCAST_(0x2) << KVM_GCFG_MATP_SHITF) + +#define KVM_CSR_GINTC 0x52 /* Guest interrupt control */ +#define KVM_CSR_GCNTC 0x53 /* Guest timer offset */ +#define KVM_CSR_LLBCTL 0x60 /* LLBit control */ +#define KVM_LLBCTL_ROLLB_SHIFT 0 +#define KVM_LLBCTL_ROLLB (_ULCAST_(1) << KVM_LLBCTL_ROLLB_SHIFT) +#define KVM_LLBCTL_WCLLB_SHIFT 1 +#define KVM_LLBCTL_WCLLB (_ULCAST_(1) << KVM_LLBCTL_WCLLB_SHIFT) +#define KVM_LLBCTL_KLO_SHIFT 2 +#define KVM_LLBCTL_KLO (_ULCAST_(1) << KVM_LLBCTL_KLO_SHIFT) + +#define KVM_CSR_IMPCTL1 0x80 /* Loongson config1 */ +#define KVM_CSR_IMPCTL2 0x81 /* Loongson config2 */ +#define KVM_CSR_GNMI 0x82 +#define KVM_CSR_TLBRENTRY 0x88 /* TLB refill exception base address */ +#define KVM_CSR_TLBRBADV 0x89 /* TLB refill badvaddr */ +#define KVM_CSR_TLBRERA 0x8a /* TLB refill ERA */ +#define KVM_CSR_TLBRSAVE 0x8b /* KScratch for TLB refill exception */ +#define KVM_CSR_TLBRELO0 0x8c /* TLB refill entrylo0 */ +#define KVM_CSR_TLBRELO1 0x8d /* TLB refill entrylo1 */ +#define KVM_CSR_TLBREHI 0x8e /* TLB refill entryhi */ +#define KVM_CSR_TLBRPRMD 0x8f /* TLB refill mode info */ +#define KVM_CSR_MERRCTL 0x90 /* ERRCTL */ +#define KVM_CSR_MERRINFO1 0x91 /* Error info1 */ +#define KVM_CSR_MERRINFO2 0x92 /* Error info2 */ +#define KVM_CSR_MERRENTRY 0x93 /* Error exception base address */ +#define KVM_CSR_MERRERA 0x94 /* Error exception PC */ +#define KVM_CSR_MERRSAVE 0x95 /* KScratch for machine error exception */ +#define KVM_CSR_CTAG 0x98 /* TagLo + TagHi */ +#define KVM_CSR_DMWIN0 0x180 /* 64 direct map win0: MEM & IF */ +#define KVM_CSR_DMWIN1 0x181 /* 64 direct map win1: MEM & IF */ +#define KVM_CSR_DMWIN2 0x182 /* 64 direct map win2: MEM */ +#define KVM_CSR_DMWIN3 0x183 /* 64 direct map win3: MEM */ +#define KVM_CSR_PERFCTRL0 0x200 /* 32 perf event 0 config */ +#define KVM_CSR_PERFCNTR0 0x201 /* 64 perf event 0 count value */ +#define KVM_CSR_PERFCTRL1 0x202 /* 32 perf event 1 config */ +#define KVM_CSR_PERFCNTR1 0x203 /* 64 perf event 1 count value */ +#define KVM_CSR_PERFCTRL2 0x204 /* 32 perf event 2 config */ +#define KVM_CSR_PERFCNTR2 0x205 /* 64 perf event 2 count value */ +#define KVM_CSR_PERFCTRL3 0x206 /* 32 perf event 3 config */ +#define KVM_CSR_PERFCNTR3 0x207 /* 64 perf event 3 count value */ +#define KVM_CSR_MWPC 0x300 /* data breakpoint config */ +#define KVM_CSR_MWPS 0x301 /* data breakpoint status */ +#define KVM_CSR_FWPC 0x380 /* instruction breakpoint config */ +#define KVM_CSR_FWPS 0x381 /* instruction breakpoint status */ +#define KVM_CSR_DEBUG 0x500 /* debug config */ +#define KVM_CSR_DERA 0x501 /* debug era */ +#define KVM_CSR_DESAVE 0x502 /* debug save */ + +#define KVM_IOCSR_FEATURES 0x8 +#define KVM_IOCSRF_TEMP BIT_ULL(0) +#define KVM_IOCSRF_NODECNT BIT_ULL(1) +#define KVM_IOCSRF_MSI BIT_ULL(2) +#define KVM_IOCSRF_EXTIOI BIT_ULL(3) +#define KVM_IOCSRF_CSRIPI BIT_ULL(4) +#define KVM_IOCSRF_FREQCSR BIT_ULL(5) +#define KVM_IOCSRF_FREQSCALE BIT_ULL(6) +#define KVM_IOCSRF_DVFSV1 BIT_ULL(7) +#define KVM_IOCSRF_EXTIOI_DECODE BIT_ULL(9) +#define KVM_IOCSRF_FLATMODE BIT_ULL(10) +#define KVM_IOCSRF_VM BIT_ULL(11) + +#define KVM_IOCSR_VENDOR 0x10 +#define KVM_IOCSR_CPUNAME 0x20 +#define KVM_IOCSR_NODECNT 0x408 + +#define KVM_IOCSR_MISC_FUNC 0x420 +#define KVM_IOCSRF_MISC_FUNC_EXT_IOI_EN BIT_ULL(48) + +/* PerCore CSR, only accessable by local cores */ +#define KVM_IOCSR_IPI_STATUS 0x1000 +#define KVM_IOCSR_IPI_SEND 0x1040 +#define KVM_IOCSR_MBUF_SEND 0x1048 +#define KVM_IOCSR_EXTIOI_NODEMAP_BASE 0x14a0 +#define KVM_IOCSR_EXTIOI_IPMAP_BASE 0x14c0 +#define KVM_IOCSR_EXTIOI_EN_BASE 0x1600 +#define KVM_IOCSR_EXTIOI_BOUNCE_BASE 0x1680 +#define KVM_IOCSR_EXTIOI_ISR_BASE 0x1800 +#define KVM_IOCSR_EXTIOI_ROUTE_BASE 0x1c00 + +#ifndef __ASSEMBLY__ + +/* CSR */ +static inline u32 kvm_csr_readl(u32 reg) +{ + u32 val; + + asm volatile ( + "csrrd %[val], %[reg] \n" + : [val] "=r" (val) + : [reg] "i" (reg) + : "memory"); + return val; +} + +static inline u64 kvm_csr_readq(u32 reg) +{ + u64 val; + + asm volatile ( + "csrrd %[val], %[reg] \n" + : [val] "=r" (val) + : [reg] "i" (reg) + : "memory"); + return val; +} + +static inline void kvm_csr_writel(u32 val, u32 reg) +{ + asm volatile ( + "csrwr %[val], %[reg] \n" + : [val] "+r" (val) + : [reg] "i" (reg) + : "memory"); +} + +static inline void kvm_csr_writeq(u64 val, u32 reg) +{ + asm volatile ( + "csrwr %[val], %[reg] \n" + : [val] "+r" (val) + : [reg] "i" (reg) + : "memory"); +} + +static inline u32 kvm_csr_xchgl(u32 val, u32 mask, u32 reg) +{ + asm volatile ( + "csrxchg %[val], %[mask], %[reg] \n" + : [val] "+r" (val) + : [mask] "r" (mask), [reg] "i" (reg) + : "memory"); + return val; +} + +static inline u64 kvm_csr_xchgq(u64 val, u64 mask, u32 reg) +{ + asm volatile ( + "csrxchg %[val], %[mask], %[reg] \n" + : [val] "+r" (val) + : [mask] "r" (mask), [reg] "i" (reg) + : "memory"); + return val; +} + + +/* IOCSR */ +static inline u32 kvm_iocsr_readl(u32 reg) +{ + u32 val; + + asm volatile ( + "iocsrrd.w %[val], %[reg] \n" + : [val] "=r" (val) + : [reg] "r" (reg) + : "memory"); + return val; +} + +static inline u64 kvm_iocsr_readq(u32 reg) +{ + u64 val; + + asm volatile ( + "iocsrrd.d %[val], %[reg] \n" + : [val] "=r" (val) + : [reg] "r" (reg) + : "memory"); + return val; +} + +static inline void kvm_iocsr_writeb(u8 val, u32 reg) +{ + asm volatile ( + "iocsrwr.b %[val], %[reg] \n" + : + : [val] "r" (val), [reg] "r" (reg) + : "memory"); +} + +static inline void kvm_iocsr_writel(u32 val, u32 reg) +{ + asm volatile ( + "iocsrwr.w %[val], %[reg] \n" + : + : [val] "r" (val), [reg] "r" (reg) + : "memory"); +} + +static inline void kvm_iocsr_writeq(u64 val, u32 reg) +{ + asm volatile ( + "iocsrwr.d %[val], %[reg] \n" + : + : [val] "r" (val), [reg] "r" (reg) + : "memory"); +} + + +/* GCSR */ +static inline u64 kvm_gcsr_read(u32 reg) +{ + u64 val = 0; + + asm volatile ( + "parse_r __reg, %[val] \n" + ".word 0x5 << 24 | %[reg] << 10 | 0 << 5 | __reg \n" + : [val] "+r" (val) + : [reg] "i" (reg) + : "memory"); + return val; +} + +static inline void kvm_gcsr_write(u64 val, u32 reg) +{ + asm volatile ( + "parse_r __reg, %[val] \n" + ".word 0x5 << 24 | %[reg] << 10 | 1 << 5 | __reg \n" + : [val] "+r" (val) + : [reg] "i" (reg) + : "memory"); +} + +static inline u64 kvm_gcsr_xchg(u64 val, u64 mask, u32 reg) +{ + asm volatile ( + "parse_r __rd, %[val] \n" + "parse_r __rj, %[mask] \n" + ".word 0x5 << 24 | %[reg] << 10 | __rj << 5 | __rd \n" + : [val] "+r" (val) + : [mask] "r" (mask), [reg] "i" (reg) + : "memory"); + return val; +} + +#endif /* !__ASSEMBLY__ */ + +#define kvm_read_csr_euen() kvm_csr_readq(KVM_CSR_EUEN) +#define kvm_write_csr_euen(val) kvm_csr_writeq(val, KVM_CSR_EUEN) +#define kvm_read_csr_ecfg() kvm_csr_readq(KVM_CSR_ECFG) +#define kvm_write_csr_ecfg(val) kvm_csr_writeq(val, KVM_CSR_ECFG) +#define kvm_write_csr_perfctrl0(val) kvm_csr_writeq(val, KVM_CSR_PERFCTRL0) +#define kvm_write_csr_perfcntr0(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCNTR0) +#define kvm_write_csr_perfctrl1(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCTRL1) +#define kvm_write_csr_perfcntr1(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCNTR1) +#define kvm_write_csr_perfctrl2(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCTRL2) +#define kvm_write_csr_perfcntr2(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCNTR2) +#define kvm_write_csr_perfctrl3(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCTRL3) +#define kvm_write_csr_perfcntr3(val) kvm_csr_writeq(val, LOONGARCH_CSR_PERFCNTR3) +#define kvm_read_csr_impctl1() kvm_csr_readq(LOONGARCH_CSR_IMPCTL1) +#define kvm_write_csr_impctl1(val) kvm_csr_writeq(val, LOONGARCH_CSR_IMPCTL1) + + +/* Guest related CSRS */ +#define kvm_read_csr_gtlbc() kvm_csr_readq(KVM_CSR_GTLBC) +#define kvm_write_csr_gtlbc(val) kvm_csr_writeq(val, KVM_CSR_GTLBC) +#define kvm_read_csr_trgp() kvm_csr_readq(KVM_CSR_TRGP) +#define kvm_read_csr_gcfg() kvm_csr_readq(KVM_CSR_GCFG) +#define kvm_write_csr_gcfg(val) kvm_csr_writeq(val, KVM_CSR_GCFG) +#define kvm_read_csr_gstat() kvm_csr_readq(KVM_CSR_GSTAT) +#define kvm_write_csr_gstat(val) kvm_csr_writeq(val, KVM_CSR_GSTAT) +#define kvm_read_csr_gintc() kvm_csr_readq(KVM_CSR_GINTC) +#define kvm_write_csr_gintc(val) kvm_csr_writeq(val, KVM_CSR_GINTC) +#define kvm_read_csr_gcntc() kvm_csr_readq(KVM_CSR_GCNTC) +#define kvm_write_csr_gcntc(val) kvm_csr_writeq(val, KVM_CSR_GCNTC) + +/* Guest CSRS read and write */ +#define kvm_read_gcsr_crmd() kvm_gcsr_read(KVM_CSR_CRMD) +#define kvm_write_gcsr_crmd(val) kvm_gcsr_write(val, KVM_CSR_CRMD) +#define kvm_read_gcsr_prmd() kvm_gcsr_read(KVM_CSR_PRMD) +#define kvm_write_gcsr_prmd(val) kvm_gcsr_write(val, KVM_CSR_PRMD) +#define kvm_read_gcsr_euen() kvm_gcsr_read(KVM_CSR_EUEN) +#define kvm_write_gcsr_euen(val) kvm_gcsr_write(val, KVM_CSR_EUEN) +#define kvm_read_gcsr_misc() kvm_gcsr_read(KVM_CSR_MISC) +#define kvm_write_gcsr_misc(val) kvm_gcsr_write(val, KVM_CSR_MISC) +#define kvm_read_gcsr_ecfg() kvm_gcsr_read(KVM_CSR_ECFG) +#define kvm_write_gcsr_ecfg(val) kvm_gcsr_write(val, KVM_CSR_ECFG) +#define kvm_read_gcsr_estat() kvm_gcsr_read(KVM_CSR_ESTAT) +#define kvm_write_gcsr_estat(val) kvm_gcsr_write(val, KVM_CSR_ESTAT) +#define kvm_read_gcsr_era() kvm_gcsr_read(KVM_CSR_ERA) +#define kvm_write_gcsr_era(val) kvm_gcsr_write(val, KVM_CSR_ERA) +#define kvm_read_gcsr_badv() kvm_gcsr_read(KVM_CSR_BADV) +#define kvm_write_gcsr_badv(val) kvm_gcsr_write(val, KVM_CSR_BADV) +#define kvm_read_gcsr_badi() kvm_gcsr_read(KVM_CSR_BADI) +#define kvm_write_gcsr_badi(val) kvm_gcsr_write(val, KVM_CSR_BADI) +#define kvm_read_gcsr_eentry() kvm_gcsr_read(KVM_CSR_EENTRY) +#define kvm_write_gcsr_eentry(val) kvm_gcsr_write(val, KVM_CSR_EENTRY) + +#define kvm_read_gcsr_tlbidx() kvm_gcsr_read(KVM_CSR_TLBIDX) +#define kvm_write_gcsr_tlbidx(val) kvm_gcsr_write(val, KVM_CSR_TLBIDX) +#define kvm_read_gcsr_tlbhi() kvm_gcsr_read(KVM_CSR_TLBEHI) +#define kvm_write_gcsr_tlbhi(val) kvm_gcsr_write(val, KVM_CSR_TLBEHI) +#define kvm_read_gcsr_tlblo0() kvm_gcsr_read(KVM_CSR_TLBELO0) +#define kvm_write_gcsr_tlblo0(val) kvm_gcsr_write(val, KVM_CSR_TLBELO0) +#define kvm_read_gcsr_tlblo1() kvm_gcsr_read(KVM_CSR_TLBELO1) +#define kvm_write_gcsr_tlblo1(val) kvm_gcsr_write(val, KVM_CSR_TLBELO1) + +#define kvm_read_gcsr_asid() kvm_gcsr_read(KVM_CSR_ASID) +#define kvm_write_gcsr_asid(val) kvm_gcsr_write(val, KVM_CSR_ASID) +#define kvm_read_gcsr_pgdl() kvm_gcsr_read(KVM_CSR_PGDL) +#define kvm_write_gcsr_pgdl(val) kvm_gcsr_write(val, KVM_CSR_PGDL) +#define kvm_read_gcsr_pgdh() kvm_gcsr_read(KVM_CSR_PGDH) +#define kvm_write_gcsr_pgdh(val) kvm_gcsr_write(val, KVM_CSR_PGDH) +#define kvm_write_gcsr_pgd(val) kvm_gcsr_write(val, KVM_CSR_PGD) +#define kvm_read_gcsr_pgd() kvm_gcsr_read(KVM_CSR_PGD) +#define kvm_read_gcsr_pwctl0() kvm_gcsr_read(KVM_CSR_PWCTL0) +#define kvm_write_gcsr_pwctl0(val) kvm_gcsr_write(val, KVM_CSR_PWCTL0) +#define kvm_read_gcsr_pwctl1() kvm_gcsr_read(KVM_CSR_PWCTL1) +#define kvm_write_gcsr_pwctl1(val) kvm_gcsr_write(val, KVM_CSR_PWCTL1) +#define kvm_read_gcsr_stlbpgsize() kvm_gcsr_read(KVM_CSR_STLBPGSIZE) +#define kvm_write_gcsr_stlbpgsize(val) kvm_gcsr_write(val, KVM_CSR_STLBPGSIZE) +#define kvm_read_gcsr_rvacfg() kvm_gcsr_read(KVM_CSR_RVACFG) +#define kvm_write_gcsr_rvacfg(val) kvm_gcsr_write(val, KVM_CSR_RVACFG) + +#define kvm_read_gcsr_cpuid() kvm_gcsr_read(KVM_CSR_CPUID) +#define kvm_write_gcsr_cpuid(val) kvm_gcsr_write(val, KVM_CSR_CPUID) +#define kvm_read_gcsr_prcfg1() kvm_gcsr_read(KVM_CSR_PRCFG1) +#define kvm_write_gcsr_prcfg1(val) kvm_gcsr_write(val, KVM_CSR_PRCFG1) +#define kvm_read_gcsr_prcfg2() kvm_gcsr_read(KVM_CSR_PRCFG2) +#define kvm_write_gcsr_prcfg2(val) kvm_gcsr_write(val, KVM_CSR_PRCFG2) +#define kvm_read_gcsr_prcfg3() kvm_gcsr_read(KVM_CSR_PRCFG3) +#define kvm_write_gcsr_prcfg3(val) kvm_gcsr_write(val, KVM_CSR_PRCFG3) + +#define kvm_read_gcsr_kscratch0() kvm_gcsr_read(KVM_CSR_KS0) +#define kvm_write_gcsr_kscratch0(val) kvm_gcsr_write(val, KVM_CSR_KS0) +#define kvm_read_gcsr_kscratch1() kvm_gcsr_read(KVM_CSR_KS1) +#define kvm_write_gcsr_kscratch1(val) kvm_gcsr_write(val, KVM_CSR_KS1) +#define kvm_read_gcsr_kscratch2() kvm_gcsr_read(KVM_CSR_KS2) +#define kvm_write_gcsr_kscratch2(val) kvm_gcsr_write(val, KVM_CSR_KS2) +#define kvm_read_gcsr_kscratch3() kvm_gcsr_read(KVM_CSR_KS3) +#define kvm_write_gcsr_kscratch3(val) kvm_gcsr_write(val, KVM_CSR_KS3) +#define kvm_read_gcsr_kscratch4() kvm_gcsr_read(KVM_CSR_KS4) +#define kvm_write_gcsr_kscratch4(val) kvm_gcsr_write(val, KVM_CSR_KS4) +#define kvm_read_gcsr_kscratch5() kvm_gcsr_read(KVM_CSR_KS5) +#define kvm_write_gcsr_kscratch5(val) kvm_gcsr_write(val, KVM_CSR_KS5) +#define kvm_read_gcsr_kscratch6() kvm_gcsr_read(KVM_CSR_KS6) +#define kvm_write_gcsr_kscratch6(val) kvm_gcsr_write(val, KVM_CSR_KS6) +#define kvm_read_gcsr_kscratch7() kvm_gcsr_read(KVM_CSR_KS7) +#define kvm_write_gcsr_kscratch7(val) kvm_gcsr_write(val, KVM_CSR_KS7) + +#define kvm_read_gcsr_timerid() kvm_gcsr_read(KVM_CSR_TMID) +#define kvm_write_gcsr_timerid(val) kvm_gcsr_write(val, KVM_CSR_TMID) +#define kvm_read_gcsr_timercfg() kvm_gcsr_read(KVM_CSR_TCFG) +#define kvm_write_gcsr_timercfg(val) kvm_gcsr_write(val, KVM_CSR_TCFG) +#define kvm_read_gcsr_timertick() kvm_gcsr_read(KVM_CSR_TVAL) +#define kvm_write_gcsr_timertick(val) kvm_gcsr_write(val, KVM_CSR_TVAL) +#define kvm_read_gcsr_timeroffset() kvm_gcsr_read(KVM_CSR_CNTC) +#define kvm_write_gcsr_timeroffset(val) kvm_gcsr_write(val, KVM_CSR_CNTC) + +#define kvm_read_gcsr_llbctl() kvm_gcsr_read(KVM_CSR_LLBCTL) +#define kvm_write_gcsr_llbctl(val) kvm_gcsr_write(val, KVM_CSR_LLBCTL) + +#define kvm_read_gcsr_tlbrentry() kvm_gcsr_read(KVM_CSR_TLBRENTRY) +#define kvm_write_gcsr_tlbrentry(val) kvm_gcsr_write(val, KVM_CSR_TLBRENTRY) +#define kvm_read_gcsr_tlbrbadv() kvm_gcsr_read(KVM_CSR_TLBRBADV) +#define kvm_write_gcsr_tlbrbadv(val) kvm_gcsr_write(val, KVM_CSR_TLBRBADV) +#define kvm_read_gcsr_tlbrera() kvm_gcsr_read(KVM_CSR_TLBRERA) +#define kvm_write_gcsr_tlbrera(val) kvm_gcsr_write(val, KVM_CSR_TLBRERA) +#define kvm_read_gcsr_tlbrsave() kvm_gcsr_read(KVM_CSR_TLBRSAVE) +#define kvm_write_gcsr_tlbrsave(val) kvm_gcsr_write(val, KVM_CSR_TLBRSAVE) +#define kvm_read_gcsr_tlbrelo0() kvm_gcsr_read(KVM_CSR_TLBRELO0) +#define kvm_write_gcsr_tlbrelo0(val) kvm_gcsr_write(val, KVM_CSR_TLBRELO0) +#define kvm_read_gcsr_tlbrelo1() kvm_gcsr_read(KVM_CSR_TLBRELO1) +#define kvm_write_gcsr_tlbrelo1(val) kvm_gcsr_write(val, KVM_CSR_TLBRELO1) +#define kvm_read_gcsr_tlbrehi() kvm_gcsr_read(KVM_CSR_TLBREHI) +#define kvm_write_gcsr_tlbrehi(val) kvm_gcsr_write(val, KVM_CSR_TLBREHI) +#define kvm_read_gcsr_tlbrprmd() kvm_gcsr_read(KVM_CSR_TLBRPRMD) +#define kvm_write_gcsr_tlbrprmd(val) kvm_gcsr_write(val, KVM_CSR_TLBRPRMD) + +#define kvm_read_gcsr_directwin0() kvm_gcsr_read(KVM_CSR_DMWIN0) +#define kvm_write_gcsr_directwin0(val) kvm_gcsr_write(val, KVM_CSR_DMWIN0) +#define kvm_read_gcsr_directwin1() kvm_gcsr_read(KVM_CSR_DMWIN1) +#define kvm_write_gcsr_directwin1(val) kvm_gcsr_write(val, KVM_CSR_DMWIN1) +#define kvm_read_gcsr_directwin2() kvm_gcsr_read(KVM_CSR_DMWIN2) +#define kvm_write_gcsr_directwin2(val) kvm_gcsr_write(val, KVM_CSR_DMWIN2) +#define kvm_read_gcsr_directwin3() kvm_gcsr_read(KVM_CSR_DMWIN3) +#define kvm_write_gcsr_directwin3(val) kvm_gcsr_write(val, KVM_CSR_DMWIN3) + +#ifndef __ASSEMBLY__ + +static inline unsigned long +kvm_set_csr_gtlbc(unsigned long set) +{ + unsigned long res, new; + + res = kvm_read_csr_gtlbc(); + new = res | set; + kvm_write_csr_gtlbc(new); + + return res; +} + +static inline unsigned long +kvm_set_csr_euen(unsigned long set) +{ + unsigned long res, new; + + res = kvm_read_csr_euen(); + new = res | set; + kvm_write_csr_euen(new); + + return res; +} + +static inline unsigned long +kvm_set_csr_gintc(unsigned long set) +{ + unsigned long res, new; + + res = kvm_read_csr_gintc(); + new = res | set; + kvm_write_csr_gintc(new); + + return res; +} + +static inline unsigned long +kvm_set_gcsr_llbctl(unsigned long set) +{ + unsigned long res, new; + + res = kvm_read_gcsr_llbctl(); + new = res | set; + kvm_write_gcsr_llbctl(new); + + return res; +} + + +static inline unsigned long +kvm_clear_csr_gtlbc(unsigned long clear) +{ + unsigned long res, new; + + res = kvm_read_csr_gtlbc(); + new = res & ~clear; + kvm_write_csr_gtlbc(new); + + return res; +} + +static inline unsigned long +kvm_clear_csr_euen(unsigned long clear) +{ + unsigned long res, new; + + res = kvm_read_csr_euen(); + new = res & ~clear; + kvm_write_csr_euen(new); + + return res; +} + +static inline unsigned long +kvm_clear_csr_gintc(unsigned long clear) +{ + unsigned long res, new; + + res = kvm_read_csr_gintc(); + new = res & ~clear; + kvm_write_csr_gintc(new); + + return res; +} + +static inline unsigned long +kvm_change_csr_gstat(unsigned long change, unsigned long val) +{ + unsigned long res, new; + + res = read_csr_gstat(); + new = res & ~change; + new |= (val & change); + write_csr_gstat(new); + + return res; +} + +static inline unsigned long +kvm_change_csr_gcfg(unsigned long change, unsigned long val) +{ + unsigned long res, new; + + res = read_csr_gcfg(); + new = res & ~change; + new |= (val & change); + write_csr_gcfg(new); + + return res; +} + + +#define kvm_set_gcsr_estat(val) \ + kvm_gcsr_xchg(val, val, KVM_CSR_ESTAT) +#define kvm_clear_gcsr_estat(val) \ + kvm_gcsr_xchg(~(val), val, KVM_CSR_ESTAT) + +#endif + + +#if (_LOONGARCH_SZLONG == 32) +#define KVM_LONG_ADD add.w +#define KVM_LONG_ADDI addi.w +#define KVM_LONG_SUB sub.w +#define KVM_LONG_L ld.w +#define KVM_LONG_S st.w +#define KVM_LONG_SLL slli.w +#define KVM_LONG_SLLV sll.w +#define KVM_LONG_SRL srli.w +#define KVM_LONG_SRLV srl.w +#define KVM_LONG_SRA srai.w +#define KVM_LONG_SRAV sra.w + +#define KVM_LONGSIZE 4 +#define KVM_LONGMASK 3 +#define KVM_LONGLOG 2 + +/* + * How to add/sub/load/store/shift pointers. + */ + +#define KVM_PTR_ADD add.w +#define KVM_PTR_ADDI addi.w +#define KVM_PTR_SUB sub.w +#define KVM_PTR_L ld.w +#define KVM_PTR_S st.w +#define KVM_PTR_LI li.w +#define KVM_PTR_SLL slli.w +#define KVM_PTR_SLLV sll.w +#define KVM_PTR_SRL srli.w +#define KVM_PTR_SRLV srl.w +#define KVM_PTR_SRA srai.w +#define KVM_PTR_SRAV sra.w + +#define KVM_PTR_SCALESHIFT 2 + +#define KVM_PTRSIZE 4 +#define KVM_PTRLOG 2 + +#endif + +#if (_LOONGARCH_SZLONG == 64) +#define KVM_LONG_ADD add.d +#define KVM_LONG_ADDI addi.d +#define KVM_LONG_SUB sub.d +#define KVM_LONG_L ld.d +#define KVM_LONG_S st.d +#define KVM_LONG_SLL slli.d +#define KVM_LONG_SLLV sll.d +#define KVM_LONG_SRL srli.d +#define KVM_LONG_SRLV srl.d +#define KVM_LONG_SRA sra.w +#define KVM_LONG_SRAV sra.d + +#define KVM_LONGSIZE 8 +#define KVM_LONGMASK 7 +#define KVM_LONGLOG 3 + +/* + * How to add/sub/load/store/shift pointers. + */ + +#define KVM_PTR_ADD add.d +#define KVM_PTR_ADDI addi.d +#define KVM_PTR_SUB sub.d +#define KVM_PTR_L ld.d +#define KVM_PTR_S st.d +#define KVM_PTR_LI li.d +#define KVM_PTR_SLL slli.d +#define KVM_PTR_SLLV sll.d +#define KVM_PTR_SRL srli.d +#define KVM_PTR_SRLV srl.d +#define KVM_PTR_SRA srai.d +#define KVM_PTR_SRAV sra.d + +#define KVM_PTR_SCALESHIFT 3 + +#define KVM_PTRSIZE 8 +#define KVM_PTRLOG 3 +#endif + +#endif /* __LOONGARCH_KVM_COMPAT_H__ */ diff --git a/arch/loongarch/kvm/kvmcpu.h b/arch/loongarch/kvm/kvmcpu.h new file mode 100644 index 000000000000..8cb411ba9f17 --- /dev/null +++ b/arch/loongarch/kvm/kvmcpu.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __ASM_LOONGARCH_KVMCPU_H__ +#define __ASM_LOONGARCH_KVMCPU_H__ + +#include +#include + +#define LARCH_INT_SIP0 0 +#define LARCH_INT_SIP1 1 +#define LARCH_INT_IP0 2 +#define LARCH_INT_IP1 3 +#define LARCH_INT_IP2 4 +#define LARCH_INT_IP3 5 +#define LARCH_INT_IP4 6 +#define LARCH_INT_IP5 7 +#define LARCH_INT_IP6 8 +#define LARCH_INT_IP7 9 +#define LARCH_INT_PMU 10 +#define LARCH_INT_TIMER 11 +#define LARCH_INT_IPI 12 +#define LOONGARCH_EXC_MAX (LARCH_INT_IPI + 1) +#define LOONGARCH_EXC_IPNUM (LOONGARCH_EXC_MAX) + +/* Controlled by 0x5 guest exst */ +#define CPU_SIP0 (_ULCAST_(1)) +#define CPU_SIP1 (_ULCAST_(1) << 1) +#define CPU_PMU (_ULCAST_(1) << 10) +#define CPU_TIMER (_ULCAST_(1) << 11) +#define CPU_IPI (_ULCAST_(1) << 12) + +/* Controlled by 0x52 guest exception VIP + * aligned to exst bit 5~12 + */ +#define CPU_IP0 (_ULCAST_(1)) +#define CPU_IP1 (_ULCAST_(1) << 1) +#define CPU_IP2 (_ULCAST_(1) << 2) +#define CPU_IP3 (_ULCAST_(1) << 3) +#define CPU_IP4 (_ULCAST_(1) << 4) +#define CPU_IP5 (_ULCAST_(1) << 5) +#define CPU_IP6 (_ULCAST_(1) << 6) +#define CPU_IP7 (_ULCAST_(1) << 7) + +#define MNSEC_PER_SEC (NSEC_PER_SEC >> 20) + +/* KVM_IRQ_LINE irq field index values */ +#define KVM_LOONGSON_IRQ_TYPE_SHIFT 24 +#define KVM_LOONGSON_IRQ_TYPE_MASK 0xff +#define KVM_LOONGSON_IRQ_VCPU_SHIFT 16 +#define KVM_LOONGSON_IRQ_VCPU_MASK 0xff +#define KVM_LOONGSON_IRQ_NUM_SHIFT 0 +#define KVM_LOONGSON_IRQ_NUM_MASK 0xffff + +/* irq_type field */ +#define KVM_LOONGSON_IRQ_TYPE_CPU_IP 0 +#define KVM_LOONGSON_IRQ_TYPE_CPU_IO 1 +#define KVM_LOONGSON_IRQ_TYPE_HT 2 +#define KVM_LOONGSON_IRQ_TYPE_MSI 3 +#define KVM_LOONGSON_IRQ_TYPE_IOAPIC 4 +#define KVM_LOONGSON_IRQ_TYPE_ROUTE 5 + +/* out-of-kernel GIC cpu interrupt injection irq_number field */ +#define KVM_LOONGSON_IRQ_CPU_IRQ 0 +#define KVM_LOONGSON_IRQ_CPU_FIQ 1 +#define KVM_LOONGSON_CPU_IP_NUM 8 + +typedef union loongarch_instruction larch_inst; +typedef int (*exit_handle_fn)(struct kvm_vcpu *); + +int _kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst); +int _kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst); +int _kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run); +int _kvm_complete_iocsr_read(struct kvm_vcpu *vcpu, struct kvm_run *run); +int _kvm_emu_idle(struct kvm_vcpu *vcpu); +int _kvm_handle_pv_hcall(struct kvm_vcpu *vcpu); +int _kvm_pending_timer(struct kvm_vcpu *vcpu); +int _kvm_handle_fault(struct kvm_vcpu *vcpu, int fault); +void _kvm_deliver_intr(struct kvm_vcpu *vcpu); +void irqchip_debug_init(struct kvm *kvm); +void irqchip_debug_destroy(struct kvm *kvm); + +void kvm_own_fpu(struct kvm_vcpu *vcpu); +void kvm_own_lsx(struct kvm_vcpu *vcpu); +void kvm_lose_fpu(struct kvm_vcpu *vcpu); +void kvm_own_lasx(struct kvm_vcpu *vcpu); +void kvm_own_lbt(struct kvm_vcpu *vcpu); +void kvm_restore_lbt(struct kvm_vcpu *cpu); +void kvm_save_lbt(struct kvm_vcpu *cpu); +void kvm_save_fpu(struct kvm_vcpu *cpu); +void kvm_restore_fpu(struct kvm_vcpu *cpu); +void kvm_save_lsx(struct kvm_vcpu *cpu); +void kvm_restore_lsx(struct kvm_vcpu *cpu); +void kvm_restore_lsx_upper(struct kvm_vcpu *cpu); +void kvm_save_lasx(struct kvm_vcpu *cpu); +void kvm_restore_lasx(struct kvm_vcpu *cpu); +void kvm_restore_lasx_upper(struct kvm_vcpu *cpu); + +void kvm_lose_hw_perf(struct kvm_vcpu *vcpu); +void kvm_restore_hw_perf(struct kvm_vcpu *vcpu); + +void kvm_acquire_timer(struct kvm_vcpu *vcpu); +void kvm_reset_timer(struct kvm_vcpu *vcpu); +enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu); +void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); +void kvm_restore_timer(struct kvm_vcpu *vcpu); +void kvm_save_timer(struct kvm_vcpu *vcpu); + +/* + * Loongarch KVM guest interrupt handling. + */ +static inline void _kvm_queue_irq(struct kvm_vcpu *vcpu, unsigned int irq) +{ + set_bit(irq, &vcpu->arch.irq_pending); + clear_bit(irq, &vcpu->arch.irq_clear); +} + +static inline void _kvm_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int irq) +{ + clear_bit(irq, &vcpu->arch.irq_pending); + set_bit(irq, &vcpu->arch.irq_clear); +} + +#endif /* __ASM_LOONGARCH_KVMCPU_H__ */ diff --git a/arch/loongarch/kvm/kvmcsr.h b/arch/loongarch/kvm/kvmcsr.h new file mode 100644 index 000000000000..a93ed920402f --- /dev/null +++ b/arch/loongarch/kvm/kvmcsr.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __LOONGARCH_KVM_CSR_H__ +#define __LOONGARCH_KVM_CSR_H__ +#include +#include +#include +#include +#include "kvmcpu.h" + +#define kvm_read_hw_gcsr(id) gcsr_read(id) +#define kvm_write_hw_gcsr(csr, id, val) gcsr_write(val, id) + +int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *v, int force); +int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *v, int force); +unsigned long _kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid); +void _kvm_emu_write_csr(struct kvm_vcpu *vcpu, int csrid, unsigned long val); +void _kvm_emu_xchg_csr(struct kvm_vcpu *vcpu, int csrid, + unsigned long csr_mask, unsigned long val); +int _kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu); + +static inline void kvm_save_hw_gcsr(struct loongarch_csrs *csr, int gid) +{ + csr->csrs[gid] = gcsr_read(gid); +} + +static inline void kvm_restore_hw_gcsr(struct loongarch_csrs *csr, int gid) +{ + gcsr_write(csr->csrs[gid], gid); +} + +static inline unsigned long kvm_read_sw_gcsr(struct loongarch_csrs *csr, int gid) +{ + return csr->csrs[gid]; +} + +static inline void kvm_write_sw_gcsr(struct loongarch_csrs *csr, int gid, unsigned long val) +{ + csr->csrs[gid] = val; +} + +static inline void kvm_set_sw_gcsr(struct loongarch_csrs *csr, int gid, unsigned long val) +{ + csr->csrs[gid] |= val; +} + +static inline void kvm_change_sw_gcsr(struct loongarch_csrs *csr, int gid, unsigned mask, + unsigned long val) +{ + unsigned long _mask = mask; + csr->csrs[gid] &= ~_mask; + csr->csrs[gid] |= val & _mask; +} + + +#define GET_HW_GCSR(id, csrid, v) \ + do { \ + if (csrid == id) { \ + *v = (long)kvm_read_hw_gcsr(csrid); \ + return 0; \ + } \ + } while (0) + +#define GET_SW_GCSR(csr, id, csrid, v) \ + do { \ + if (csrid == id) { \ + *v = kvm_read_sw_gcsr(csr, id); \ + return 0; \ + } \ + } while (0) + +#define SET_HW_GCSR(csr, id, csrid, v) \ + do { \ + if (csrid == id) { \ + kvm_write_hw_gcsr(csr, csrid, *v); \ + return 0; \ + } \ + } while (0) + +#define SET_SW_GCSR(csr, id, csrid, v) \ + do { \ + if (csrid == id) { \ + kvm_write_sw_gcsr(csr, csrid, *v); \ + return 0; \ + } \ + } while (0) + +int _kvm_init_iocsr(struct kvm *kvm); +int _kvm_set_iocsr(struct kvm *kvm, struct kvm_iocsr_entry *__user argp); +int _kvm_get_iocsr(struct kvm *kvm, struct kvm_iocsr_entry *__user argp); + +#define KVM_PMU_PLV_ENABLE (CSR_PERFCTRL_PLV0 | \ + CSR_PERFCTRL_PLV1 | \ + CSR_PERFCTRL_PLV2 | \ + CSR_PERFCTRL_PLV3) + +#define CASE_WRITE_HW_PMU(vcpu, csr, id, csrid, v) \ + do { \ + if (csrid == id) { \ + if (v & KVM_PMU_PLV_ENABLE) { \ + write_csr_gcfg(read_csr_gcfg() | CSR_GCFG_GPERF); \ + kvm_write_hw_gcsr(csr, csrid, v | CSR_PERFCTRL_GMOD); \ + vcpu->arch.aux_inuse |= KVM_LARCH_PERF; \ + return ; \ + } else { \ + kvm_write_sw_gcsr(csr, csrid, v); \ + return; \ + } \ + } \ + } while (0) + +#endif /* __LOONGARCH_KVM_CSR_H__ */ diff --git a/arch/loongarch/kvm/loongarch.c b/arch/loongarch/kvm/loongarch.c new file mode 100644 index 000000000000..104f0e31d748 --- /dev/null +++ b/arch/loongarch/kvm/loongarch.c @@ -0,0 +1,2208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kvmcpu.h" +#include +#include +#include + +#include "intc/ls3a_ipi.h" +#include "intc/ls7a_irq.h" +#include "intc/ls3a_ext_irq.h" +#include "kvm_compat.h" +#include "kvmcsr.h" +#include "ls_irq.h" + +/* + * Define loongarch kvm version. + * Add version number when qemu/kvm interface changed + */ +#define KVM_LOONGARCH_VERSION 1 +#define CREATE_TRACE_POINTS +#include "trace.h" + +struct kvm_stats_debugfs_item vcpu_debugfs_entries[] = { + VCPU_STAT("idle", idle_exits), + VCPU_STAT("signal", signal_exits), + VCPU_STAT("interrupt", int_exits), + VCPU_STAT("tlbmiss_ld", excep_exits[EXCCODE_TLBL]), + VCPU_STAT("tlbmiss_st", excep_exits[EXCCODE_TLBS]), + VCPU_STAT("tlb_ifetch", excep_exits[EXCCODE_TLBI]), + VCPU_STAT("tlbmod", excep_exits[EXCCODE_TLBM]), + VCPU_STAT("tlbri", excep_exits[EXCCODE_TLBNR]), + VCPU_STAT("tlbxi", excep_exits[EXCCODE_TLBNX]), + VCPU_STAT("fp_disabled", excep_exits[EXCCODE_FPDIS]), + VCPU_STAT("lsx_disabled", excep_exits[EXCCODE_LSXDIS]), + VCPU_STAT("lasx_disabled", excep_exits[EXCCODE_LASXDIS]), + VCPU_STAT("fpe", excep_exits[EXCCODE_FPE]), + VCPU_STAT("watch", excep_exits[EXCCODE_WATCH]), + VCPU_STAT("gspr", excep_exits[EXCCODE_GSPR]), + VCPU_STAT("vz_gsfc", excep_exits[EXCCODE_GCM]), + VCPU_STAT("vz_hc", excep_exits[EXCCODE_HVC]), + + VCPU_STAT("rdcsr_cpu_feature", rdcsr_cpu_feature_exits), + VCPU_STAT("rdcsr_misc_func", rdcsr_misc_func_exits), + VCPU_STAT("rdcsr_ipi_access", rdcsr_ipi_access_exits), + VCPU_STAT("cpucfg", cpucfg_exits), + VCPU_STAT("huge_dec", huge_dec_exits), + VCPU_STAT("huge_thp", huge_thp_exits), + VCPU_STAT("huge_adj", huge_adjust_exits), + VCPU_STAT("huge_set", huge_set_exits), + VCPU_STAT("huge_merg", huge_merge_exits), + + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + {NULL} +}; + +struct kvm_stats_debugfs_item debugfs_entries[] = { + VM_STAT("remote_tlb_flush", remote_tlb_flush), + VM_STAT("pip_read_exits", pip_read_exits), + VM_STAT("pip_write_exits", pip_write_exits), + VM_STAT("vm_ioctl_irq_line", vm_ioctl_irq_line), + VM_STAT("ls7a_ioapic_update", ls7a_ioapic_update), + VM_STAT("ls7a_ioapic_set_irq", ls7a_ioapic_set_irq), + VM_STAT("ls7a_msi_irq", ls7a_msi_irq), + VM_STAT("ioapic_reg_write", ioapic_reg_write), + VM_STAT("ioapic_reg_read", ioapic_reg_read), + VM_STAT("set_ls7a_ioapic", set_ls7a_ioapic), + VM_STAT("get_ls7a_ioapic", get_ls7a_ioapic), + VM_STAT("set_ls3a_ext_irq", set_ls3a_ext_irq), + VM_STAT("get_ls3a_ext_irq", get_ls3a_ext_irq), + VM_STAT("ls3a_ext_irq", trigger_ls3a_ext_irq), + {NULL} +}; + +static int lvcpu_stat_get(void *address, u64 *val) +{ + *val = *(u64 *)address; + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(lvcpu_stat_fops, lvcpu_stat_get, NULL, "%llu\n"); + +static int vcpu_pid_get(void *arg, u64 *val) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg; + if (vcpu) + *val = pid_vnr(vcpu->pid); + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(vcpu_pid_fops, vcpu_pid_get, NULL, "%llu\n"); + +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) +{ + struct kvm_stats_debugfs_item *p; + debugfs_create_file("pid", 0444, debugfs_dentry, vcpu, &vcpu_pid_fops); + for (p = vcpu_debugfs_entries; p->name && p->kind == KVM_STAT_VCPU; ++p) { + debugfs_create_file(p->name, 0444, debugfs_dentry, + (void *)vcpu + p->offset, &lvcpu_stat_fops); + } +} + +bool kvm_trace_guest_mode_change; +static struct kvm_context __percpu *vmcs; + +int kvm_guest_mode_change_trace_reg(void) +{ + kvm_trace_guest_mode_change = 1; + return 0; +} + +void kvm_guest_mode_change_trace_unreg(void) +{ + kvm_trace_guest_mode_change = 0; +} + +static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.pv.pv_unhalted) + return true; + + return false; +} + +/* + * XXXKYMA: We are simulatoring a processor that has the WII bit set in + * Config7, so we are "runnable" if interrupts are pending + */ +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.irq_pending) || kvm_vcpu_has_events(vcpu); +} + +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return false; +} + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +#ifdef CONFIG_PARAVIRT +void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + struct kvm_host_map map; + struct kvm_steal_time *st; + int ret = 0; + + if (vcpu->arch.st.guest_addr == 0) + return; + + ret = kvm_map_gfn(vcpu, vcpu->arch.st.guest_addr >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false); + if (ret) { + kvm_info("%s ret:%d\n", __func__, ret); + return; + } + st = map.hva + offset_in_page(vcpu->arch.st.guest_addr); + if (st->version & 1) + st->version += 1; /* first time write, random junk */ + st->version += 1; + smp_wmb(); + st->steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; + smp_wmb(); + st->version += 1; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); +} + +bool _kvm_pvtime_supported(void) +{ + return !!sched_info_on(); +} + +int _kvm_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + u64 ipa; + int ret = 0; + int idx; + + if (!_kvm_pvtime_supported() || + attr->attr != KVM_LARCH_VCPU_PVTIME_IPA) + return -ENXIO; + + if (get_user(ipa, user)) + return -EFAULT; + if (!IS_ALIGNED(ipa, 64)) + return -EINVAL; + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) + vcpu->arch.st.guest_addr = ipa; + + return ret; +} + +int _kvm_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + u64 ipa; + + if (!_kvm_pvtime_supported() || + attr->attr != KVM_LARCH_VCPU_PVTIME_IPA) + return -ENXIO; + + ipa = vcpu->arch.st.guest_addr; + + if (put_user(ipa, user)) + return -EFAULT; + + return 0; +} + +int _kvm_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_LARCH_VCPU_PVTIME_IPA: + if (_kvm_pvtime_supported()) + return 0; + } + + return -ENXIO; +} + +static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) +{ + struct kvm_host_map map; + struct kvm_steal_time *st; + int ret = 0; + + if (vcpu->arch.st.guest_addr == 0) + return; + + ret = kvm_map_gfn(vcpu, vcpu->arch.st.guest_addr >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false); + if (ret) { + kvm_info("%s ret:%d\n", __func__, ret); + return; + } + st = map.hva + offset_in_page(vcpu->arch.st.guest_addr); + if (st->version & 1) + st->version += 1; /* first time write, random junk */ + st->version += 1; + smp_wmb(); + st->preempted = KVM_VCPU_PREEMPTED; + smp_wmb(); + st->version += 1; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); +} + +static void kvm_steal_time_clear_preempted(struct kvm_vcpu *vcpu) +{ + struct kvm_host_map map; + struct kvm_steal_time *st; + int ret = 0; + + if (vcpu->arch.st.guest_addr == 0) + return; + + ret = kvm_map_gfn(vcpu, vcpu->arch.st.guest_addr >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false); + if (ret) { + kvm_info("%s ret:%d\n", __func__, ret); + return; + } + st = map.hva + offset_in_page(vcpu->arch.st.guest_addr); + if (st->version & 1) + st->version += 1; /* first time write, random junk */ + st->version += 1; + smp_wmb(); + st->preempted = 0; + smp_wmb(); + st->version += 1; + + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); +} +#endif + +int kvm_arch_hardware_enable(void) +{ + unsigned long gcfg = 0; + + /* First init gtlbc, gcfg, gstat, gintc. All guest use the same config */ + kvm_clear_csr_gtlbc(KVM_GTLBC_USETGID | KVM_GTLBC_TOTI); + kvm_write_csr_gcfg(0); + kvm_write_csr_gstat(0); + kvm_write_csr_gintc(0); + + /* + * Enable virtualization features granting guest direct control of + * certain features: + * GCI=2: Trap on init or unimplement cache instruction. + * TORU=0: Trap on Root Unimplement. + * CACTRL=1: Root control cache. + * TOP=0: Trap on Previlege. + * TOE=0: Trap on Exception. + * TIT=0: Trap on Timer. + */ + if (cpu_has_gcip_all) + gcfg |= KVM_GCFG_GCI_SECURE; + if (cpu_has_matc_root) + gcfg |= KVM_GCFG_MATC_ROOT; + + gcfg |= KVM_GCFG_TIT; + kvm_write_csr_gcfg(gcfg); + + kvm_flush_tlb_all(); + + /* Enable using TGID */ + kvm_set_csr_gtlbc(KVM_GTLBC_USETGID); + kvm_debug("gtlbc:%llx gintc:%llx gstat:%llx gcfg:%llx", + kvm_read_csr_gtlbc(), kvm_read_csr_gintc(), + kvm_read_csr_gstat(), kvm_read_csr_gcfg()); + return 0; +} + +void kvm_arch_hardware_disable(void) +{ + kvm_clear_csr_gtlbc(KVM_GTLBC_USETGID | KVM_GTLBC_TOTI); + kvm_write_csr_gcfg(0); + kvm_write_csr_gstat(0); + kvm_write_csr_gintc(0); + + /* Flush any remaining guest TLB entries */ + kvm_flush_tlb_all(); +} + +int kvm_arch_hardware_setup(void *opaque) +{ + return 0; +} + +int kvm_arch_check_processor_compat(void *rtn) +{ + return 0; +} + +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + /* Allocate page table to map GPA -> RPA */ + kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); + if (!kvm->arch.gpa_mm.pgd) + return -ENOMEM; + + kvm->arch.cpucfg_lasx = (read_cpucfg(LOONGARCH_CPUCFG2) & + CPUCFG2_LASX); + + _kvm_init_iocsr(kvm); + kvm->arch.vmcs = vmcs; + + return 0; +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + kvm_vcpu_destroy(vcpu); + } + + mutex_lock(&kvm->lock); + + for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) + kvm->vcpus[i] = NULL; + + atomic_set(&kvm->online_vcpus, 0); + + mutex_unlock(&kvm->lock); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvm_destroy_ls3a_ipi(kvm); + kvm_destroy_ls7a_ioapic(kvm); + kvm_destroy_ls3a_ext_irq(kvm); + kvm_free_vcpus(kvm); + _kvm_destroy_mm(kvm); +} + +long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + return 0; +} + +static void _kvm_new_vpid(unsigned long cpu, struct kvm_vcpu *vcpu) +{ + struct kvm_context *context; + unsigned long vpid; + + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu); + vpid = context->vpid_cache; + if (!(++vpid & context->gid_mask)) { + if (!vpid) /* fix version if needed */ + vpid = context->gid_fisrt_ver; + + ++vpid; /* vpid 0 reserved for root */ + + /* start new vpid cycle */ + kvm_flush_tlb_all(); + } + + context->vpid_cache = vpid; + vcpu->arch.vpid[cpu] = vpid; +} + +/* Returns 1 if the guest TLB may be clobbered */ +static int _kvm_check_requests(struct kvm_vcpu *vcpu, int cpu) +{ + int ret = 0; + int i; + + if (!kvm_request_pending(vcpu)) + return 0; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + /* Drop all vpids for this VCPU */ + for_each_possible_cpu(i) + vcpu->arch.vpid[i] = 0; + /* This will clobber guest TLB contents too */ + ret = 1; + } + + return ret; +} + +static void _kvm_update_vmid(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm_context *context; + bool migrated; + unsigned int gstinfo_gidmask, gstinfo_gid = 0; + + /* + * Are we entering guest context on a different CPU to last time? + * If so, the VCPU's guest TLB state on this CPU may be stale. + */ + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu); + migrated = (vcpu->arch.last_exec_cpu != cpu); + vcpu->arch.last_exec_cpu = cpu; + + /* + * Check if our vpid is of an older version and thus invalid. + * + * We also discard the stored vpid if we've executed on + * another CPU, as the guest mappings may have changed without + * hypervisor knowledge. + */ + gstinfo_gidmask = context->gid_mask << KVM_GSTAT_GID_SHIFT; + if (migrated || + (vcpu->arch.vpid[cpu] ^ context->vpid_cache) & + context->gid_ver_mask) { + _kvm_new_vpid(cpu, vcpu); + trace_kvm_vpid_change(vcpu, vcpu->arch.vpid[cpu]); + } + gstinfo_gid = (vcpu->arch.vpid[cpu] & context->gid_mask) << + KVM_GSTAT_GID_SHIFT; + + /* Restore GSTAT(0x50).vpid */ + kvm_change_csr_gstat(gstinfo_gidmask, gstinfo_gid); +} + +/* + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +static int _kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + unsigned long exst = vcpu->arch.host_estat; + u32 intr = exst & 0x1fff; /* ignore NMI */ + u32 exccode = (exst & KVM_ESTAT_EXC) >> KVM_ESTAT_EXC_SHIFT; + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + int ret = RESUME_GUEST, cpu; + + vcpu->mode = OUTSIDE_GUEST_MODE; + + /* Set a default exit reason */ + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + /* + * Set the appropriate status bits based on host CPU features, + * before we hit the scheduler + */ + + local_irq_enable(); + + kvm_debug("%s: exst: %lx, PC: %p, kvm_run: %p, kvm_vcpu: %p\n", + __func__, exst, opc, run, vcpu); + trace_kvm_exit(vcpu, exccode); + if (exccode) { + vcpu->stat.excep_exits[exccode]++; + ret = _kvm_handle_fault(vcpu, exccode); + } else { + WARN(!intr, "suspicious vm exiting"); + ++vcpu->stat.int_exits; + + if (need_resched()) + cond_resched(); + + ret = RESUME_GUEST; + } + +#ifdef CONFIG_PARAVIRT + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); +#endif + + cond_resched(); + + local_irq_disable(); + + if (ret == RESUME_GUEST) { + /* Only check for signals if not already exiting to userspace */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + ret = (-EINTR << 2) | RESUME_HOST; + ++vcpu->stat.signal_exits; + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL); + return ret; + } + + trace_kvm_reenter(vcpu); + + kvm_acquire_timer(vcpu); + _kvm_deliver_intr(vcpu); + + /* + * Make sure the read of VCPU requests in vcpu_reenter() + * callback is not reordered ahead of the write to vcpu->mode, + * or we could miss a TLB flush request while the requester sees + * the VCPU as outside of guest mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + cpu = smp_processor_id(); + _kvm_check_requests(vcpu, cpu); + _kvm_update_vmid(vcpu, cpu); + } + + return ret; +} + +/* low level hrtimer wake routine */ +static enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.swtimer); + + _kvm_queue_irq(vcpu, LARCH_INT_TIMER); + + rcuwait_wake_up(&vcpu->wait); + + return kvm_count_timeout(vcpu); +} + +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + int i; + unsigned long timer_hz; + struct loongarch_csrs *csr = vcpu->arch.csr; + + for_each_possible_cpu(i) + vcpu->arch.vpid[i] = 0; + + hrtimer_init(&vcpu->arch.swtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + vcpu->arch.swtimer.function = kvm_swtimer_wakeup; + vcpu->arch.fpu_enabled = true; + vcpu->arch.lsx_enabled = true; + + vcpu->kvm->arch.online_vcpus = vcpu->vcpu_id + 1; + + vcpu->arch.host_eentry = kvm_csr_readq(KVM_CSR_EENTRY); + vcpu->arch.guest_eentry = (unsigned long)kvm_exception_entry; + vcpu->arch.vcpu_run = kvm_enter_guest; + vcpu->arch.handle_exit = _kvm_handle_exit; + vcpu->arch.csr = kzalloc(sizeof(struct loongarch_csrs), GFP_KERNEL); + /* + * kvm all exceptions share one exception entry, and host <-> guest switch + * also switch excfg.VS field, keep host excfg.VS info here + */ + vcpu->arch.host_ecfg = (kvm_read_csr_ecfg() & KVM_ECFG_VS); + + if (!vcpu->arch.csr) + return -ENOMEM; + + /* Init */ + vcpu->arch.last_sched_cpu = -1; + vcpu->arch.last_exec_cpu = -1; + + /* + * Initialize guest register state to valid architectural reset state. + */ + timer_hz = calc_const_freq(); + kvm_init_timer(vcpu, timer_hz); + + /* Set Initialize mode for GUEST */ + kvm_write_sw_gcsr(csr, KVM_CSR_CRMD, KVM_CRMD_DA); + + /* Set cpuid */ + kvm_write_sw_gcsr(csr, KVM_CSR_TMID, vcpu->vcpu_id); + + /* start with no pending virtual guest interrupts */ + csr->csrs[KVM_CSR_GINTC] = 0; + + return 0; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + int cpu; + struct kvm_context *context; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + + hrtimer_cancel(&vcpu->arch.swtimer); + kvm_mmu_free_memory_caches(vcpu); + if (vcpu->arch.st.guest_addr) + kvm_release_pfn(cache->pfn, cache->dirty, cache); + kfree(vcpu->arch.csr); + + /* + * If the VCPU is freed and reused as another VCPU, we don't want the + * matching pointer wrongly hanging around in last_vcpu. + */ + for_each_possible_cpu(cpu) { + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu); + if (context->last_vcpu == vcpu) + context->last_vcpu = NULL; + } +} + +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_SINGLESTEP) +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg) +{ + int ret = 0; + + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) { + ret = -EINVAL; + goto out; + } + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + /* No hardware breakpoint */ + } else { + vcpu->guest_debug = 0; + } +out: + return ret; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) +{ + int r = -EINTR; + int cpu; + + vcpu_load(vcpu); + + kvm_sigset_activate(vcpu); + + if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + _kvm_complete_mmio_read(vcpu, vcpu->run); + vcpu->mmio_needed = 0; + } else if (vcpu->arch.is_hypcall) { + /* set return value for hypercall v0 register */ + vcpu->arch.gprs[REG_A0] = vcpu->run->hypercall.ret; + vcpu->arch.is_hypcall = 0; + } + + if (vcpu->run->exit_reason == KVM_EXIT_LOONGARCH_IOCSR) { + if (!vcpu->run->iocsr_io.is_write) + _kvm_complete_iocsr_read(vcpu, vcpu->run); + } + + /* clear exit_reason */ + vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; + if (vcpu->run->immediate_exit) + goto out; + + lose_fpu(1); + lose_lbt(1); + +#ifdef CONFIG_PARAVIRT + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); +#endif + local_irq_disable(); + guest_enter_irqoff(); + trace_kvm_enter(vcpu); + + /* + * Make sure the read of VCPU requests in vcpu_run() callback is not + * reordered ahead of the write to vcpu->mode, or we could miss a TLB + * flush request while the requester sees the VCPU as outside of guest + * mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + cpu = smp_processor_id(); + kvm_acquire_timer(vcpu); + /* Check if we have any exceptions/interrupts pending */ + _kvm_deliver_intr(vcpu); + + _kvm_check_requests(vcpu, cpu); + _kvm_update_vmid(vcpu, cpu); + r = kvm_enter_guest(vcpu->run, vcpu); + + trace_kvm_out(vcpu); + guest_exit_irqoff(); + local_irq_enable(); + +out: + kvm_sigset_deactivate(vcpu); + + vcpu_put(vcpu); + return r; +} + +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, + struct kvm_loongarch_interrupt *irq) +{ + int intr = (int)irq->irq; + + if (intr < 0) { + _kvm_dequeue_irq(vcpu, -intr); + return 0; + } + + _kvm_queue_irq(vcpu, intr); + kvm_vcpu_kick(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -ENOIOCTLCMD; +} + +/** + * kvm_migrate_count() - Migrate timer. + * @vcpu: Virtual CPU. + * + * Migrate hrtimer to the current CPU by cancelling and restarting it + * if it was running prior to being cancelled. + * + * Must be called when the VCPU is migrated to a different CPU to ensure that + * timer expiry during guest execution interrupts the guest and causes the + * interrupt to be delivered in a timely manner. + */ +static void kvm_migrate_count(struct kvm_vcpu *vcpu) +{ + if (hrtimer_cancel(&vcpu->arch.swtimer)) + hrtimer_restart(&vcpu->arch.swtimer); +} + +static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm_context *context; + struct loongarch_csrs *csr = vcpu->arch.csr; + bool migrated, all; + + /* + * Have we migrated to a different CPU? + * If so, any old guest TLB state may be stale. + */ + migrated = (vcpu->arch.last_sched_cpu != cpu); + + /* + * Was this the last VCPU to run on this CPU? + * If not, any old guest state from this VCPU will have been clobbered. + */ + context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu); + all = migrated || (context->last_vcpu != vcpu); + context->last_vcpu = vcpu; + + /* + * Restore timer state regardless + */ + kvm_restore_timer(vcpu); + + /* Control guest page CCA attribute */ + kvm_change_csr_gcfg(KVM_GCFG_MATC_MASK, KVM_GCFG_MATC_ROOT); + /* Restore hardware perf csr */ + kvm_restore_hw_perf(vcpu); + +#ifdef CONFIG_PARAVIRT + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); +#endif + /* Don't bother restoring registers multiple times unless necessary */ + if (!all) + return 0; + + kvm_write_csr_gcntc((ulong)vcpu->kvm->arch.stablecounter_gftoffset); + /* + * Restore guest CSR registers + */ + kvm_restore_hw_gcsr(csr, KVM_CSR_CRMD); + kvm_restore_hw_gcsr(csr, KVM_CSR_PRMD); + kvm_restore_hw_gcsr(csr, KVM_CSR_EUEN); + kvm_restore_hw_gcsr(csr, KVM_CSR_MISC); + kvm_restore_hw_gcsr(csr, KVM_CSR_ECFG); + kvm_restore_hw_gcsr(csr, KVM_CSR_ERA); + kvm_restore_hw_gcsr(csr, KVM_CSR_BADV); + kvm_restore_hw_gcsr(csr, KVM_CSR_BADI); + kvm_restore_hw_gcsr(csr, KVM_CSR_EENTRY); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBIDX); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBEHI); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBELO0); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBELO1); + kvm_restore_hw_gcsr(csr, KVM_CSR_ASID); + kvm_restore_hw_gcsr(csr, KVM_CSR_PGDL); + kvm_restore_hw_gcsr(csr, KVM_CSR_PGDH); + kvm_restore_hw_gcsr(csr, KVM_CSR_PWCTL0); + kvm_restore_hw_gcsr(csr, KVM_CSR_PWCTL1); + kvm_restore_hw_gcsr(csr, KVM_CSR_STLBPGSIZE); + kvm_restore_hw_gcsr(csr, KVM_CSR_RVACFG); + kvm_restore_hw_gcsr(csr, KVM_CSR_CPUID); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS0); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS1); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS2); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS3); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS4); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS5); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS6); + kvm_restore_hw_gcsr(csr, KVM_CSR_KS7); + kvm_restore_hw_gcsr(csr, KVM_CSR_TMID); + kvm_restore_hw_gcsr(csr, KVM_CSR_CNTC); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRENTRY); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRBADV); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRERA); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRSAVE); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRELO0); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRELO1); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBREHI); + kvm_restore_hw_gcsr(csr, KVM_CSR_TLBRPRMD); + kvm_restore_hw_gcsr(csr, KVM_CSR_DMWIN0); + kvm_restore_hw_gcsr(csr, KVM_CSR_DMWIN1); + kvm_restore_hw_gcsr(csr, KVM_CSR_DMWIN2); + kvm_restore_hw_gcsr(csr, KVM_CSR_DMWIN3); + kvm_restore_hw_gcsr(csr, KVM_CSR_LLBCTL); + + /* restore Root.Guestexcept from unused Guest guestexcept register */ + kvm_write_csr_gintc(csr->csrs[KVM_CSR_GINTC]); + + /* + * We should clear linked load bit to break interrupted atomics. This + * prevents a SC on the next VCPU from succeeding by matching a LL on + * the previous VCPU. + */ + if (vcpu->kvm->created_vcpus > 1) + kvm_set_gcsr_llbctl(KVM_LLBCTL_WCLLB); + + return 0; +} + +/* Restore ASID once we are scheduled back after preemption */ +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + vcpu->cpu = cpu; + if (vcpu->arch.last_sched_cpu != cpu) { + kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", + vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); + /* + * Migrate the timer interrupt to the current CPU so that it + * always interrupts the guest and synchronously triggers a + * guest timer interrupt. + */ + kvm_migrate_count(vcpu); + } + + /* restore guest state to registers */ + _kvm_vcpu_load(vcpu, cpu); + kvm_steal_time_clear_preempted(vcpu); + local_irq_restore(flags); +} + +static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + + kvm_lose_fpu(vcpu); + kvm_lose_hw_perf(vcpu); + + kvm_save_hw_gcsr(csr, KVM_CSR_CRMD); + kvm_save_hw_gcsr(csr, KVM_CSR_PRMD); + kvm_save_hw_gcsr(csr, KVM_CSR_EUEN); + kvm_save_hw_gcsr(csr, KVM_CSR_MISC); + kvm_save_hw_gcsr(csr, KVM_CSR_ECFG); + kvm_save_hw_gcsr(csr, KVM_CSR_ERA); + kvm_save_hw_gcsr(csr, KVM_CSR_BADV); + kvm_save_hw_gcsr(csr, KVM_CSR_BADI); + kvm_save_hw_gcsr(csr, KVM_CSR_EENTRY); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBIDX); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBEHI); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBELO0); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBELO1); + kvm_save_hw_gcsr(csr, KVM_CSR_ASID); + kvm_save_hw_gcsr(csr, KVM_CSR_PGDL); + kvm_save_hw_gcsr(csr, KVM_CSR_PGDH); + kvm_save_hw_gcsr(csr, KVM_CSR_PGD); + kvm_save_hw_gcsr(csr, KVM_CSR_PWCTL0); + kvm_save_hw_gcsr(csr, KVM_CSR_PWCTL1); + kvm_save_hw_gcsr(csr, KVM_CSR_STLBPGSIZE); + kvm_save_hw_gcsr(csr, KVM_CSR_RVACFG); + kvm_save_hw_gcsr(csr, KVM_CSR_CPUID); + kvm_save_hw_gcsr(csr, KVM_CSR_PRCFG1); + kvm_save_hw_gcsr(csr, KVM_CSR_PRCFG2); + kvm_save_hw_gcsr(csr, KVM_CSR_PRCFG3); + kvm_save_hw_gcsr(csr, KVM_CSR_KS0); + kvm_save_hw_gcsr(csr, KVM_CSR_KS1); + kvm_save_hw_gcsr(csr, KVM_CSR_KS2); + kvm_save_hw_gcsr(csr, KVM_CSR_KS3); + kvm_save_hw_gcsr(csr, KVM_CSR_KS4); + kvm_save_hw_gcsr(csr, KVM_CSR_KS5); + kvm_save_hw_gcsr(csr, KVM_CSR_KS6); + kvm_save_hw_gcsr(csr, KVM_CSR_KS7); + kvm_save_hw_gcsr(csr, KVM_CSR_TMID); + kvm_save_hw_gcsr(csr, KVM_CSR_CNTC); + kvm_save_hw_gcsr(csr, KVM_CSR_LLBCTL); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRENTRY); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRBADV); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRERA); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRSAVE); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRELO0); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRELO1); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBREHI); + kvm_save_hw_gcsr(csr, KVM_CSR_TLBRPRMD); + kvm_save_hw_gcsr(csr, KVM_CSR_DMWIN0); + kvm_save_hw_gcsr(csr, KVM_CSR_DMWIN1); + kvm_save_hw_gcsr(csr, KVM_CSR_DMWIN2); + kvm_save_hw_gcsr(csr, KVM_CSR_DMWIN3); + + /* save Root.Guestexcept in unused Guest guestexcept register */ + kvm_save_timer(vcpu); + csr->csrs[KVM_CSR_GINTC] = kvm_read_csr_gintc(); + return 0; +} + +/* ASID can change if another task is scheduled during preemption */ +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + int cpu; + + local_irq_save(flags); + cpu = smp_processor_id(); + vcpu->arch.last_sched_cpu = cpu; + vcpu->cpu = -1; + + /* save guest state in registers */ + _kvm_vcpu_put(vcpu, cpu); + kvm_steal_time_set_preempted(vcpu); + local_irq_restore(flags); +} + +static int _kvm_get_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, s64 *v) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + int reg_idx, ret; + + if ((reg->id & KVM_IOC_CSRID(0)) == KVM_IOC_CSRID(0)) { + reg_idx = KVM_GET_IOC_CSRIDX(reg->id); + ret = _kvm_getcsr(vcpu, reg_idx, v, 0); + if (ret == 0) + return ret; + } + + switch (reg->id) { + case KVM_REG_LBT_SCR0: + *v = vcpu->arch.lbt.scr0; + break; + case KVM_REG_LBT_SCR1: + *v = vcpu->arch.lbt.scr1; + break; + case KVM_REG_LBT_SCR2: + *v = vcpu->arch.lbt.scr2; + break; + case KVM_REG_LBT_SCR3: + *v = vcpu->arch.lbt.scr3; + break; + case KVM_REG_LBT_FLAGS: + *v = vcpu->arch.lbt.eflags; + break; + case KVM_REG_LBT_FTOP: + *v = vcpu->arch.fpu.ftop; + break; + + case KVM_REG_LOONGARCH_COUNTER: + *v = drdtime() + vcpu->kvm->arch.stablecounter_gftoffset; + break; + default: + if ((reg->id & KVM_REG_LOONGARCH_MASK) != KVM_REG_LOONGARCH_CSR) + return -EINVAL; + + reg_idx = KVM_GET_IOC_CSRIDX(reg->id); + if (reg_idx < CSR_ALL_SIZE) + *v = kvm_read_sw_gcsr(csr, reg_idx); + else + return -EINVAL; + } + return 0; +} + +static int _kvm_set_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 v) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + struct gfn_to_pfn_cache *cache; + int ret = 0; + unsigned long flags; + u64 val; + int reg_idx; + + val = v; + if ((reg->id & KVM_IOC_CSRID(0)) == KVM_IOC_CSRID(0)) { + reg_idx = KVM_GET_IOC_CSRIDX(reg->id); + ret = _kvm_setcsr(vcpu, reg_idx, &val, 0); + if (ret == 0) + return ret; + } + + switch (reg->id) { + case KVM_REG_LBT_SCR0: + vcpu->arch.lbt.scr0 = val; + break; + case KVM_REG_LBT_SCR1: + vcpu->arch.lbt.scr1 = val; + break; + case KVM_REG_LBT_SCR2: + vcpu->arch.lbt.scr2 = val; + break; + case KVM_REG_LBT_SCR3: + vcpu->arch.lbt.scr3 = val; + break; + case KVM_REG_LBT_FLAGS: + vcpu->arch.lbt.eflags = val; + break; + case KVM_REG_LBT_FTOP: + vcpu->arch.fpu.ftop = val; + break; + + case KVM_REG_LOONGARCH_COUNTER: + local_irq_save(flags); + /* + * gftoffset is relative with board, not vcpu + * only set for the first time for smp system + */ + if (!vcpu->kvm->arch.stablecounter_gftoffset) + vcpu->kvm->arch.stablecounter_gftoffset = (signed long)(v - drdtime()); + kvm_write_csr_gcntc((ulong)vcpu->kvm->arch.stablecounter_gftoffset); + local_irq_restore(flags); + break; + case KVM_REG_LOONGARCH_VCPU_RESET: + cache = &vcpu->arch.st.cache; + kvm_reset_timer(vcpu); + if (vcpu->vcpu_id == 0) + kvm_setup_ls3a_extirq(vcpu->kvm); + memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); + memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); + + if (vcpu->arch.st.guest_addr) { + kvm_release_pfn(cache->pfn, cache->dirty, cache); + /* disable pv timer when cpu resetting */ + vcpu->arch.st.guest_addr = 0; + } + vcpu->kvm->arch.stablecounter_gftoffset = 0; + break; + default: + if ((reg->id & KVM_REG_LOONGARCH_MASK) != KVM_REG_LOONGARCH_CSR) + return -EINVAL; + + reg_idx = KVM_GET_IOC_CSRIDX(reg->id); + if (reg_idx < CSR_ALL_SIZE) + kvm_write_sw_gcsr(csr, reg_idx, v); + else + return -EINVAL; + } + return ret; +} + +static int _kvm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + int ret; + s64 v; + + ret = _kvm_get_one_reg(vcpu, reg, &v); + if (ret) + return ret; + + ret = -EINVAL; + if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { + u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + + ret = put_user(v, uaddr64); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; + u32 v32 = (u32)v; + + ret = put_user(v32, uaddr32); + } + + return ret; +} + +static int _kvm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + s64 v; + int ret; + + ret = -EINVAL; + if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { + u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; + ret = get_user(v, uaddr64); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) { + u32 __user *uaddr32 = (u32 __user *)(long)reg->addr; + s32 v32; + + ret = get_user(v32, uaddr32); + v = (s64)v32; + } + + if (ret) + return -EFAULT; + + return _kvm_set_one_reg(vcpu, reg, v); +} + +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r = 0; + + if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap)) + return -EINVAL; + if (cap->flags) + return -EINVAL; + if (cap->args[0]) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_LOONGARCH_FPU: + case KVM_CAP_LOONGARCH_LSX: + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_INTERRUPT) { + struct kvm_loongarch_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, + irq.irq); + + return kvm_vcpu_ioctl_interrupt(vcpu, &irq); + } + + return -ENOIOCTLCMD; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) +{ + u32 irq = irq_level->irq; + unsigned int irq_type, vcpu_idx, irq_num, ret; + int nrcpus = atomic_read(&kvm->online_vcpus); + bool level = irq_level->level; + unsigned long flags; + + irq_type = (irq >> KVM_LOONGSON_IRQ_TYPE_SHIFT) & KVM_LOONGSON_IRQ_TYPE_MASK; + vcpu_idx = (irq >> KVM_LOONGSON_IRQ_VCPU_SHIFT) & KVM_LOONGSON_IRQ_VCPU_MASK; + irq_num = (irq >> KVM_LOONGSON_IRQ_NUM_SHIFT) & KVM_LOONGSON_IRQ_NUM_MASK; + + switch (irq_type) { + case KVM_LOONGSON_IRQ_TYPE_IOAPIC: + if (!ls7a_ioapic_in_kernel(kvm)) + return -ENXIO; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + ls7a_ioapic_lock(ls7a_ioapic_irqchip(kvm), &flags); + ret = kvm_ls7a_ioapic_set_irq(kvm, irq_num, level); + ls7a_ioapic_unlock(ls7a_ioapic_irqchip(kvm), &flags); + return ret; + } + kvm->stat.vm_ioctl_irq_line++; + + return -EINVAL; +} + +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct loongarch_kvm_irqchip *chip) +{ + int r, dlen; + + r = 0; + dlen = chip->len - sizeof(struct loongarch_kvm_irqchip); + switch (chip->chip_id) { + case KVM_IRQCHIP_LS7A_IOAPIC: + if (dlen != sizeof(struct kvm_ls7a_ioapic_state)) { + kvm_err("get ls7a state err dlen:%d\n", dlen); + goto dlen_err; + } + r = kvm_get_ls7a_ioapic(kvm, (void *)chip->data); + break; + case KVM_IRQCHIP_LS3A_GIPI: + if (dlen != sizeof(gipiState)) { + kvm_err("get gipi state err dlen:%d\n", dlen); + goto dlen_err; + } + r = kvm_get_ls3a_ipi(kvm, (void *)chip->data); + break; + case KVM_IRQCHIP_LS3A_HT_IRQ: + case KVM_IRQCHIP_LS3A_ROUTE: + break; + case KVM_IRQCHIP_LS3A_EXTIRQ: + if (dlen != sizeof(struct kvm_loongarch_ls3a_extirq_state)) { + kvm_err("get extioi state err dlen:%d\n", dlen); + goto dlen_err; + } + r = kvm_get_ls3a_extirq(kvm, (void *)chip->data); + break; + case KVM_IRQCHIP_LS3A_IPMASK: + break; + default: + r = -EINVAL; + break; + } + return r; +dlen_err: + r = -EINVAL; + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct loongarch_kvm_irqchip *chip) +{ + int r, dlen; + + r = 0; + dlen = chip->len - sizeof(struct loongarch_kvm_irqchip); + switch (chip->chip_id) { + case KVM_IRQCHIP_LS7A_IOAPIC: + if (dlen != sizeof(struct kvm_ls7a_ioapic_state)) { + kvm_err("set ls7a state err dlen:%d\n", dlen); + goto dlen_err; + } + r = kvm_set_ls7a_ioapic(kvm, (void *)chip->data); + break; + case KVM_IRQCHIP_LS3A_GIPI: + if (dlen != sizeof(gipiState)) { + kvm_err("set gipi state err dlen:%d\n", dlen); + goto dlen_err; + } + r = kvm_set_ls3a_ipi(kvm, (void *)chip->data); + break; + case KVM_IRQCHIP_LS3A_HT_IRQ: + case KVM_IRQCHIP_LS3A_ROUTE: + break; + case KVM_IRQCHIP_LS3A_EXTIRQ: + if (dlen != sizeof(struct kvm_loongarch_ls3a_extirq_state)) { + kvm_err("set extioi state err dlen:%d\n", dlen); + goto dlen_err; + } + r = kvm_set_ls3a_extirq(kvm, (void *)chip->data); + break; + case KVM_IRQCHIP_LS3A_IPMASK: + break; + default: + r = -EINVAL; + break; + } + return r; +dlen_err: + r = -EINVAL; + return r; +} + +/* + * Read or write a bunch of msrs. All parameters are kernel addresses. + * + * @return number of msrs set successfully. + */ +static int _kvm_csr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, + struct kvm_csr_entry *entries, + int (*do_csr)(struct kvm_vcpu *vcpu, + unsigned index, u64 *data, int force)) +{ + int i; + + for (i = 0; i < msrs->ncsrs; ++i) + if (do_csr(vcpu, entries[i].index, &entries[i].data, 1)) + break; + + return i; +} + +static int kvm_csr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, + int (*do_csr)(struct kvm_vcpu *vcpu, + unsigned index, u64 *data, int force)) +{ + struct kvm_msrs msrs; + struct kvm_csr_entry *entries; + int r, n; + unsigned size; + + r = -EFAULT; + if (copy_from_user(&msrs, user_msrs, sizeof msrs)) + goto out; + + r = -E2BIG; + if (msrs.ncsrs >= CSR_ALL_SIZE) + goto out; + + size = sizeof(struct kvm_csr_entry) * msrs.ncsrs; + entries = memdup_user(user_msrs->entries, size); + if (IS_ERR(entries)) { + r = PTR_ERR(entries); + goto out; + } + + r = n = _kvm_csr_io(vcpu, &msrs, entries, do_csr); + if (r < 0) + goto out_free; + + r = -EFAULT; + if (copy_to_user(user_msrs->entries, entries, size)) + goto out_free; + + r = n; + +out_free: + kfree(entries); +out: + return r; +} + +static int _kvm_vcpu_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { +#ifdef CONFIG_PARAVIRT + case KVM_LARCH_VCPU_PVTIME_CTRL: + ret = _kvm_pvtime_set_attr(vcpu, attr); + break; +#endif + default: + ret = -ENXIO; + break; + } + + return ret; +} + +static int _kvm_vcpu_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { +#ifdef CONFIG_PARAVIRT + case KVM_LARCH_VCPU_PVTIME_CTRL: + ret = _kvm_pvtime_get_attr(vcpu, attr); + break; +#endif + default: + ret = -ENXIO; + break; + } + + return ret; +} + +static int _kvm_vcpu_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { +#ifdef CONFIG_PARAVIRT + case KVM_LARCH_VCPU_PVTIME_CTRL: + ret = _kvm_pvtime_has_attr(vcpu, attr); + break; +#endif + default: + ret = -ENXIO; + break; + } + + return ret; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; + long r; + + vcpu_load(vcpu); + + switch (ioctl) { + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + if (ioctl == KVM_SET_ONE_REG) + r = _kvm_set_reg(vcpu, ®); + else + r = _kvm_get_reg(vcpu, ®); + break; + } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + break; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } + case KVM_CHECK_EXTENSION: { + unsigned int ext; + if (copy_from_user(&ext, argp, sizeof(ext))) + return -EFAULT; + switch (ext) { + case KVM_CAP_LOONGARCH_FPU: + r = !!cpu_has_fpu; + break; + case KVM_CAP_LOONGARCH_LSX: + r = !!cpu_has_lsx; + break; + default: + break; + } + break; + } + + case KVM_LOONGARCH_GET_VCPU_STATE: + { + int i; + struct kvm_loongarch_vcpu_state vcpu_state; + r = -EFAULT; + + vcpu_state.online_vcpus = vcpu->kvm->arch.online_vcpus; + vcpu_state.is_migrate = 1; + for (i = 0; i < 4; i++) + vcpu_state.core_ext_ioisr[i] = vcpu->arch.core_ext_ioisr[i]; + + vcpu_state.irq_pending = vcpu->arch.irq_pending; + vcpu_state.irq_clear = vcpu->arch.irq_clear; + + if (copy_to_user(argp, &vcpu_state, sizeof(struct kvm_loongarch_vcpu_state))) + break; + r = 0; + break; + } + + case KVM_LOONGARCH_SET_VCPU_STATE: + { + int i; + struct kvm_loongarch_vcpu_state vcpu_state; + r = -EFAULT; + + if (copy_from_user(&vcpu_state, argp, sizeof(struct kvm_loongarch_vcpu_state))) + return -EFAULT; + + vcpu->kvm->arch.online_vcpus = vcpu_state.online_vcpus; + vcpu->kvm->arch.is_migrate = vcpu_state.is_migrate; + for (i = 0; i < 4; i++) + vcpu->arch.core_ext_ioisr[i] = vcpu_state.core_ext_ioisr[i]; + + vcpu->arch.irq_pending = vcpu_state.irq_pending; + vcpu->arch.irq_clear = vcpu_state.irq_clear; + r = 0; + break; + } + case KVM_GET_MSRS: { + r = kvm_csr_io(vcpu, argp, _kvm_getcsr); + break; + } + case KVM_SET_MSRS: { + r = kvm_csr_io(vcpu, argp, _kvm_setcsr); + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = _kvm_vcpu_set_attr(vcpu, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = _kvm_vcpu_get_attr(vcpu, &attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = _kvm_vcpu_has_attr(vcpu, &attr); + break; + } + default: + r = -ENOIOCTLCMD; + } + + vcpu_put(vcpu); + return r; +} + +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + +} + +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + /* + * FIXME: disable THP to improve vm migration success ratio, + * how to know migration failure to enable THP again + */ + memslot->arch.flags |= KVM_MEMSLOT_DISABLE_THP; + + /* Let implementation handle TLB/GVA invalidation */ + kvm_flush_remote_tlbs(kvm); +} + +long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: + { + mutex_lock(&kvm->lock); + r = -EEXIST; + if (kvm->arch.v_ioapic) + goto create_irqchip_unlock; + + r = kvm_create_ls7a_ioapic(kvm); + if (r < 0) + goto create_irqchip_unlock; + r = kvm_create_ls3a_ipi(kvm); + if (r < 0) { + mutex_lock(&kvm->slots_lock); + kvm_destroy_ls7a_ioapic(kvm); + mutex_unlock(&kvm->slots_lock); + goto create_irqchip_unlock; + } + r = kvm_create_ls3a_ext_irq(kvm); + if (r < 0) { + mutex_lock(&kvm->slots_lock); + kvm_destroy_ls3a_ipi(kvm); + kvm_destroy_ls7a_ioapic(kvm); + mutex_unlock(&kvm->slots_lock); + } + kvm_ls7a_setup_default_irq_routing(kvm); + irqchip_debug_init(kvm); + /* Write kvm->irq_routing before kvm->arch.vpic. */ + smp_wmb(); +create_irqchip_unlock: + mutex_unlock(&kvm->lock); + break; + } + case KVM_GET_IRQCHIP: { + struct loongarch_kvm_irqchip *kchip; + struct loongarch_kvm_irqchip uchip; + if (copy_from_user(&uchip, argp, sizeof(struct loongarch_kvm_irqchip))) + goto out; + kchip = memdup_user(argp, uchip.len); + if (IS_ERR(kchip)) { + r = PTR_ERR(kchip); + goto out; + } + + r = -ENXIO; + if (!ls7a_ioapic_in_kernel(kvm)) + goto get_irqchip_out; + r = kvm_vm_ioctl_get_irqchip(kvm, kchip); + if (r) + goto get_irqchip_out; + if (copy_to_user(argp, kchip, kchip->len)) + goto get_irqchip_out; + r = 0; +get_irqchip_out: + kfree(kchip); + break; + } + case KVM_SET_IRQCHIP: { + struct loongarch_kvm_irqchip *kchip; + struct loongarch_kvm_irqchip uchip; + if (copy_from_user(&uchip, argp, sizeof(struct loongarch_kvm_irqchip))) + goto out; + + kchip = memdup_user(argp, uchip.len); + if (IS_ERR(kchip)) { + r = PTR_ERR(kchip); + goto out; + } + + r = -ENXIO; + if (!ls7a_ioapic_in_kernel(kvm)) + goto set_irqchip_out; + r = kvm_vm_ioctl_set_irqchip(kvm, kchip); + if (r) + goto set_irqchip_out; + r = 0; +set_irqchip_out: + kfree(kchip); + break; + } + case KVM_LOONGARCH_GET_IOCSR: + { + r = _kvm_get_iocsr(kvm, argp); + break; + } + case KVM_LOONGARCH_SET_IOCSR: + { + r = _kvm_set_iocsr(kvm, argp); + break; + } + case KVM_LOONGARCH_SET_CPUCFG: + { + r = 0; + if (copy_from_user(&kvm->arch.cpucfgs, argp, sizeof(struct kvm_cpucfg))) + r = -EFAULT; + break; + } + case KVM_LOONGARCH_GET_CPUCFG: + { + r = 0; + if (copy_to_user(argp, &kvm->arch.cpucfgs, sizeof(struct kvm_cpucfg))) + r = -EFAULT; + break; + } + default: + r = -ENOIOCTLCMD; + } +out: + + return r; +} + +int kvm_arch_init(void *opaque) +{ + struct kvm_context *context; + unsigned long vpid_mask; + int cpu; + + vmcs = alloc_percpu(struct kvm_context); + if (!vmcs) { + printk(KERN_ERR "kvm: failed to allocate percpu kvm_context\n"); + return -ENOMEM; + } + + vpid_mask = kvm_read_csr_gstat(); + vpid_mask = (vpid_mask & KVM_GSTAT_GIDBIT) >> KVM_GSTAT_GIDBIT_SHIFT; + if (vpid_mask) + vpid_mask = GENMASK(vpid_mask - 1, 0); + + for_each_possible_cpu(cpu) { + context = per_cpu_ptr(vmcs, cpu); + context->gid_mask = vpid_mask; + context->gid_ver_mask = ~context->gid_mask; + context->gid_fisrt_ver = context->gid_mask + 1; + context->vpid_cache = context->gid_mask + 1; + context->last_vcpu = NULL; + } + + _kvm_init_fault(); + return 0; +} + +void kvm_arch_exit(void) +{ + free_percpu(vmcs); +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOIOCTLCMD; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOIOCTLCMD; +} + +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + int i = 0; + + /* no need vcpu_load and vcpu_put */ + fpu->fcsr = vcpu->arch.fpu.fcsr; + fpu->fcc = vcpu->arch.fpu.fcc; + for (i = 0; i < NUM_FPU_REGS; i++) + memcpy(&fpu->fpr[i], &vcpu->arch.fpu.fpr[i], FPU_REG_WIDTH / 64); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + int i = 0; + + /* no need vcpu_load and vcpu_put */ + vcpu->arch.fpu.fcsr = fpu->fcsr; + vcpu->arch.fpu.fcc = fpu->fcc; + for (i = 0; i < NUM_FPU_REGS; i++) + memcpy(&vcpu->arch.fpu.fpr[i], &fpu->fpr[i], FPU_REG_WIDTH / 64); + + return 0; +} + +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_HAVE_LS_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif + case KVM_CAP_IMMEDIATE_EXIT: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; + case KVM_CAP_LOONGARCH_FPU: + /* We don't handle systems with inconsistent cpu_has_fpu */ + r = !!cpu_has_fpu; + break; + case KVM_CAP_LOONGARCH_LSX: + /* + * We don't support LSX vector partitioning yet: + * 1) It would require explicit support which can't be tested + * yet due to lack of support in current hardware. + * 2) It extends the state that would need to be saved/restored + * by e.g. QEMU for migration. + * + * When vector partitioning hardware becomes available, support + * could be added by requiring a flag when enabling + * KVM_CAP_LOONGARCH_LSX capability to indicate that userland knows + * to save/restore the appropriate extra state. + */ + r = cpu_has_lsx; + break; + case KVM_CAP_IRQCHIP: + case KVM_CAP_IOEVENTFD: + /* we wouldn't be here unless cpu_has_lvz */ + r = 1; + break; + case KVM_CAP_LOONGARCH_VZ: + /* get user defined kvm version */ + r = KVM_LOONGARCH_VERSION; + break; + default: + r = 0; + break; + } + return r; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return _kvm_pending_timer(vcpu) || + kvm_read_hw_gcsr(KVM_CSR_ESTAT) & (1 << INT_TI); +} + +int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) +{ + int i; + struct loongarch_csrs *csr; + + if (!vcpu) + return -1; + + kvm_debug("VCPU Register Dump:\n"); + kvm_debug("\tpc = 0x%08lx\n", vcpu->arch.pc); + kvm_debug("\texceptions: %08lx\n", vcpu->arch.irq_pending); + + for (i = 0; i < 32; i += 4) { + kvm_debug("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i, + vcpu->arch.gprs[i], + vcpu->arch.gprs[i + 1], + vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]); + } + + csr = vcpu->arch.csr; + kvm_debug("\tCRMOD: 0x%08lx, exst: 0x%08lx\n", + kvm_read_hw_gcsr(KVM_CSR_CRMD), + kvm_read_hw_gcsr(KVM_CSR_ESTAT)); + + kvm_debug("\tERA: 0x%08lx\n", kvm_read_hw_gcsr(KVM_CSR_ERA)); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu_load(vcpu); + + for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++) + vcpu->arch.gprs[i] = regs->gpr[i]; + vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */ + vcpu->arch.pc = regs->pc; + + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu_load(vcpu); + + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++) + regs->gpr[i] = vcpu->arch.gprs[i]; + + regs->pc = vcpu->arch.pc; + + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return 0; +} + +#ifdef CONFIG_CPU_HAS_LBT +/* Enable FPU for guest and restore context */ +void kvm_own_lbt(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) { + kvm_set_csr_euen(KVM_EUEN_LBTEN); + + /* If guest lbt state not active, restore it now */ + kvm_restore_lbt(vcpu); + vcpu->arch.aux_inuse |= KVM_LARCH_LBT; + } + preempt_enable(); +} + +static void kvm_enable_lbt_fpu(struct kvm_vcpu *vcpu, unsigned long fcsr) +{ + /* + * if TM is enabled, top register save/restore will + * cause lbt exception, here enable lbt in advanced + */ + if (fcsr & FPU_CSR_TM) + kvm_own_lbt(vcpu); +} + +static void kvm_lose_lbt(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) { + kvm_save_lbt(vcpu); + kvm_clear_csr_euen(KVM_EUEN_LBTEN); + vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT; + } + preempt_enable(); +} + +#else +void kvm_own_lbt(struct kvm_vcpu *vcpu) { } +static void kvm_enable_lbt_fpu(struct kvm_vcpu *vcpu, unsigned long fcsr) { } +static void kvm_lose_lbt(struct kvm_vcpu *vcpu) { } +#endif + +/* Enable FPU for guest and restore context */ +void kvm_own_fpu(struct kvm_vcpu *vcpu) +{ + + preempt_disable(); + + /* + * Enable FPU for guest + * We set FR and FRE according to guest context + */ + kvm_enable_lbt_fpu(vcpu, vcpu->arch.fpu.fcsr); + kvm_set_csr_euen(KVM_EUEN_FPEN); + + /* If guest FPU state not active, restore it now */ + kvm_restore_fpu(vcpu); + vcpu->arch.aux_inuse |= KVM_LARCH_FPU; + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU); + + preempt_enable(); +} + +#ifdef CONFIG_CPU_HAS_LSX +/* Enable LSX for guest and restore context */ +void kvm_own_lsx(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + + /* Enable LSX for guest */ + kvm_enable_lbt_fpu(vcpu, vcpu->arch.fpu.fcsr); + kvm_set_csr_euen(KVM_EUEN_LSXEN | KVM_EUEN_FPEN); + switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + case KVM_LARCH_FPU: + /* + * Guest FPU state already loaded, + * only restore upper LSX state + */ + kvm_restore_lsx_upper(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, + KVM_TRACE_AUX_LSX); + break; + default: + /* Neither FP or LSX already active, + * restore full LSX state + */ + kvm_restore_lsx(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, + KVM_TRACE_AUX_FPU_LSX); + break; + } + + vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU; + preempt_enable(); +} +#endif + +#ifdef CONFIG_CPU_HAS_LASX +/* Enable LASX for guest and restore context */ +void kvm_own_lasx(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + + kvm_enable_lbt_fpu(vcpu, vcpu->arch.fpu.fcsr); + kvm_set_csr_euen(KVM_EUEN_FPEN | KVM_EUEN_LSXEN | KVM_EUEN_LASXEN); + switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) { + case KVM_LARCH_LSX | KVM_LARCH_FPU: + case KVM_LARCH_LSX: + /* + * Guest LSX state already loaded, only restore upper LASX state + */ + kvm_restore_lasx_upper(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX); + break; + case KVM_LARCH_FPU: + /* + * Guest FP state already loaded, only restore 64~256 LASX state + */ + kvm_restore_lsx_upper(vcpu); + kvm_restore_lasx_upper(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX); + break; + default: + /* Neither FP or LSX already active, restore full LASX state */ + kvm_restore_lasx(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, + KVM_TRACE_AUX_FPU_LSX_LASX); + break; + } + + vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU; + preempt_enable(); +} +#endif + +/* Save and disable FPU & LSX & LASX */ +void kvm_lose_fpu(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_CPU_HAS_LBT + unsigned long fcsr; +#endif + + preempt_disable(); +#ifdef CONFIG_CPU_HAS_LBT + if (vcpu->arch.aux_inuse & KVM_LARCH_FP_ALL) { + if (!(vcpu->arch.aux_inuse & KVM_LARCH_LBT)) { + fcsr = read_fcsr(LOONGARCH_FCSR0); + kvm_enable_lbt_fpu(vcpu, fcsr); + } + } +#endif + if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) { + kvm_save_lasx(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_LSX_LASX); + /* Disable LASX & MAS & FPU */ + kvm_clear_csr_euen(KVM_EUEN_FPEN | KVM_EUEN_LSXEN | KVM_EUEN_LASXEN); + } else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) { + kvm_save_lsx(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_LSX); + /* Disable LSX & FPU */ + kvm_clear_csr_euen(KVM_EUEN_FPEN | KVM_EUEN_LSXEN); + } else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + kvm_save_fpu(vcpu); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU); + /* Disable FPU */ + kvm_clear_csr_euen(KVM_EUEN_FPEN); + } + vcpu->arch.aux_inuse &= ~KVM_LARCH_FP_ALL; + + kvm_lose_lbt(vcpu); + preempt_enable(); +} + +void kvm_lose_hw_perf(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.aux_inuse & KVM_LARCH_PERF) { + struct loongarch_csrs *csr = vcpu->arch.csr; + /* save guest pmu csr */ + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCTRL0); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCNTR0); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCTRL1); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCNTR1); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCTRL2); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCNTR2); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCTRL3); + kvm_save_hw_gcsr(csr, KVM_CSR_PERFCNTR3); + if (((kvm_read_sw_gcsr(csr, KVM_CSR_PERFCTRL0) | + kvm_read_sw_gcsr(csr, KVM_CSR_PERFCTRL1) | + kvm_read_sw_gcsr(csr, KVM_CSR_PERFCTRL2) | + kvm_read_sw_gcsr(csr, KVM_CSR_PERFCTRL3)) + & KVM_PMU_PLV_ENABLE) == 0) + vcpu->arch.aux_inuse &= ~KVM_LARCH_PERF; + /* config host pmu csr */ + kvm_write_csr_gcfg(kvm_read_csr_gcfg() & ~KVM_GCFG_GPERF); + /* TODO: pmu csr used by host and guest at the same time */ + kvm_write_csr_perfctrl0(0); + kvm_write_csr_perfcntr0(0); + kvm_write_csr_perfctrl1(0); + kvm_write_csr_perfcntr1(0); + kvm_write_csr_perfctrl2(0); + kvm_write_csr_perfcntr2(0); + kvm_write_csr_perfctrl3(0); + kvm_write_csr_perfcntr3(0); + } +} + +void kvm_restore_hw_perf(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.aux_inuse & KVM_LARCH_PERF) { + struct loongarch_csrs *csr = vcpu->arch.csr; + /* enable guest pmu */ + kvm_write_csr_gcfg(kvm_read_csr_gcfg() | KVM_GCFG_GPERF); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCTRL0); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCNTR0); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCTRL1); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCNTR1); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCTRL2); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCNTR2); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCTRL3); + kvm_restore_hw_gcsr(csr, KVM_CSR_PERFCNTR3); + } +} + +static int __init kvm_loongarch_init(void) +{ + int ret; + + if (!cpu_has_lvz) + return 0; + + ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + + if (ret) + return ret; + + return 0; +} + +static void __exit kvm_loongarch_exit(void) +{ + kvm_exit(); +} + +module_init(kvm_loongarch_init); +module_exit(kvm_loongarch_exit); + +static const struct cpu_feature loongarch_kvm_feature[] = { + { .feature = cpu_feature(LOONGARCH_LVZ) }, + {}, +}; +MODULE_DEVICE_TABLE(cpu, loongarch_kvm_feature); + +EXPORT_TRACEPOINT_SYMBOL(kvm_exit); diff --git a/arch/loongarch/kvm/ls_irq.h b/arch/loongarch/kvm/ls_irq.h new file mode 100644 index 000000000000..ef0af6032a69 --- /dev/null +++ b/arch/loongarch/kvm/ls_irq.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef __LOONGHARCH_KVM_IRQ_H__ +#define __LOONGHARCH_KVM_IRQ_H__ + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return kvm->arch.v_ioapic ? 1 : 0; +} + +int kvm_ls7a_setup_default_irq_routing(struct kvm *kvm); +#endif diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c new file mode 100644 index 000000000000..2fdaa05f15a6 --- /dev/null +++ b/arch/loongarch/kvm/mmu.c @@ -0,0 +1,1316 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "kvm_compat.h" + +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels + * for which pages need to be cached. + */ +#if defined(__PAGETABLE_PMD_FOLDED) +#define KVM_MMU_CACHE_MIN_PAGES 1 +#else +#define KVM_MMU_CACHE_MIN_PAGES 2 +#endif + +static inline int kvm_pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } +static inline pte_t kvm_pte_mksmall(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_HUGE; + return pte; +} +static inline void kvm_set_pte(pte_t *ptep, pte_t val) +{ + WRITE_ONCE(*ptep, val); +} + +static int kvm_tlb_flush_gpa(struct kvm_vcpu *vcpu, unsigned long gpa) +{ + preempt_disable(); + gpa &= (PAGE_MASK << 1); + invtlb(INVTLB_GID_ADDR, kvm_read_csr_gstat() & KVM_GSTAT_GID, gpa); + preempt_enable(); + return 0; +} + +static inline pmd_t kvm_pmd_mkhuge(pmd_t pmd) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return pmd_mkhuge(pmd); +#elif defined(CONFIG_HUGETLB_PAGE) + pte_t entry; + + pte_val(entry) = pmd_val(pmd); + entry = pte_mkhuge(entry); + pmd_val(pmd) = pte_val(entry); +#endif + return pmd; +} + +static inline pmd_t kvm_pmd_mkclean(pmd_t pmd) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return pmd_mkclean(pmd); +#elif defined(CONFIG_HUGETLB_PAGE) + pte_t entry; + + pte_val(entry) = pmd_val(pmd); + entry = pte_mkclean(entry); + pmd_val(pmd) = pte_val(entry); +#endif + return pmd; +} + +static inline pmd_t kvm_pmd_mkold(pmd_t pmd) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return pmd_mkold(pmd); +#elif defined(CONFIG_HUGETLB_PAGE) + pte_t entry; + + pte_val(entry) = pmd_val(pmd); + entry = pte_mkold(entry); + pmd_val(pmd) = pte_val(entry); +#endif + return pmd; +} + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +/** + * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. + * + * Allocate a blank KVM GPA page directory (PGD) for representing guest physical + * to host physical page mappings. + * + * Returns: Pointer to new KVM GPA page directory. + * NULL on allocation failure. + */ +pgd_t *kvm_pgd_alloc(void) +{ + pgd_t *ret; + struct page *page; + + page = alloc_pages(GFP_KERNEL, 0); + if (!page) + return NULL; + ret = (pgd_t *) page_address(page); + if (ret) + pgd_init((unsigned long)ret); + + return ret; +} + +/** + * kvm_walk_pgd() - Walk page table with optional allocation. + * @pgd: Page directory pointer. + * @addr: Address to index page table using. + * @cache: MMU page cache to allocate new page tables from, or NULL. + * + * Walk the page tables pointed to by @pgd to find the PTE corresponding to the + * address @addr. If page tables don't exist for @addr, they will be created + * from the MMU cache if @cache is not NULL. + * + * Returns: Pointer to pte_t corresponding to @addr. + * NULL if a page table doesn't exist for @addr and !@cache. + * NULL if a page table allocation failed. + */ +static pte_t *kvm_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd += pgd_index(addr); + if (pgd_none(*pgd)) { + /* Not used yet */ + BUG(); + return NULL; + } + p4d = p4d_offset(pgd, addr); + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) { + pmd_t *new_pmd; + + if (!cache) + return NULL; + new_pmd = kvm_mmu_memory_cache_alloc(cache); + pmd_init((unsigned long)new_pmd, + (unsigned long)invalid_pte_table); + pud_populate(NULL, pud, new_pmd); + } + pmd = pmd_offset(pud, addr); +#ifdef CONFIG_HUGETLB_PAGE + if (pmd_huge(*pmd)) { + return (pte_t *)pmd; + } +#endif + if (pmd_none(*pmd)) { + pte_t *new_pte; + + if (!cache) + return NULL; + new_pte = kvm_mmu_memory_cache_alloc(cache); + clear_page(new_pte); + pmd_populate_kernel(NULL, pmd, new_pte); + } + return pte_offset_kernel(pmd, addr); +} + +/* Caller must hold kvm->mm_lock */ +static pte_t *kvm_pte_for_gpa(struct kvm *kvm, + struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + return kvm_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); +} + +/* + * kvm_flush_gpa_{pte,pmd,pud,pgd,pt}. + * Flush a range of guest physical address space from the VM's GPA page tables. + */ + +static bool kvm_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, + unsigned long end_gpa, unsigned long *data) +{ + int i_min = pte_index(start_gpa); + int i_max = pte_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); + int i; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + set_pte(pte + i, __pte(0)); + if (data) + *data = *data + 1; + } + return safe_to_remove; +} + +static bool kvm_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, + unsigned long end_gpa, unsigned long *data) +{ + pte_t *pte; + unsigned long end = ~0ul; + int i_min = pmd_index(start_gpa); + int i_max = pmd_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pmd_present(pmd[i])) + continue; + + if (pmd_huge(pmd[i]) && pmd_present(pmd[i])) { + pmd_clear(pmd + i); + if (data) + *data += PTRS_PER_PMD; + continue; + } + + pte = pte_offset_kernel(pmd + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_flush_gpa_pte(pte, start_gpa, end, data)) { + pmd_clear(pmd + i); + pte_free_kernel(NULL, pte); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, + unsigned long end_gpa, unsigned long *data) +{ + pmd_t *pmd; + unsigned long end = ~0ul; + int i_min = pud_index(start_gpa); + int i_max = pud_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_flush_gpa_pmd(pmd, start_gpa, end, data)) { + pud_clear(pud + i); + pmd_free(NULL, pmd); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, + unsigned long end_gpa, unsigned long *data) +{ + p4d_t *p4d; + pud_t *pud; + unsigned long end = ~0ul; + int i_min = pgd_index(start_gpa); + int i_max = pgd_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pgd_present(pgd[i])) + continue; + + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_flush_gpa_pud(pud, start_gpa, end, data)) { + pgd_clear(pgd + i); + pud_free(NULL, pud); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +/** + * kvm_flush_gpa_pt() - Flush a range of guest physical addresses. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Flushes a range of GPA mappings from the GPA page tables. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether its safe to remove the top level page directory because + * all lower levels have been removed. + */ +static bool kvm_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn, void *data) +{ + return kvm_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT, (unsigned long *)data); +} + +/* + * kvm_mkclean_gpa_pt. + * Mark a range of guest physical address space clean (writes fault) in the VM's + * GPA page table to allow dirty page tracking. + */ + +static int kvm_mkclean_pte(pte_t *pte, unsigned long start, unsigned long end) +{ + int ret = 0; + int i_min = pte_index(start); + int i_max = pte_index(end); + int i; + pte_t val; + + for (i = i_min; i <= i_max; ++i) { + val = pte[i]; + if (pte_present(val) && pte_dirty(val)) { + set_pte(pte + i, pte_mkclean(val)); + ret = 1; + } + } + return ret; +} + +static int kvm_mkclean_pmd(pmd_t *pmd, unsigned long start, unsigned long end) +{ + int ret = 0; + pte_t *pte; + unsigned long cur_end = ~0ul; + int i_min = pmd_index(start); + int i_max = pmd_index(end); + int i; + pmd_t old, new; + + for (i = i_min; i <= i_max; ++i, start = 0) { + if (!pmd_present(pmd[i])) + continue; + + if (pmd_huge(pmd[i])) { + old = pmd[i]; + new = kvm_pmd_mkclean(old); + if (pmd_val(new) == pmd_val(old)) + continue; + set_pmd(pmd + i, new); + ret = 1; + continue; + } + + pte = pte_offset_kernel(pmd + i, 0); + if (i == i_max) + cur_end = end; + + ret |= kvm_mkclean_pte(pte, start, cur_end); + } + + return ret; +} + +static int kvm_mkclean_pud(pud_t *pud, unsigned long start, unsigned long end) +{ + int ret = 0; + pmd_t *pmd; + unsigned long cur_end = ~0ul; + int i_min = pud_index(start); + int i_max = pud_index(end); + int i; + + for (i = i_min; i <= i_max; ++i, start = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + cur_end = end; + + ret |= kvm_mkclean_pmd(pmd, start, cur_end); + } + return ret; +} + +static int kvm_mkclean_pgd(pgd_t *pgd, unsigned long start, unsigned long end) +{ + int ret = 0; + p4d_t *p4d; \ + pud_t *pud; + unsigned long cur_end = ~0ul; + int i_min = pgd_index(start); + int i_max = pgd_index(end); + int i; + + for (i = i_min; i <= i_max; ++i, start = 0) { + if (!pgd_present(pgd[i])) + continue; + + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); + if (i == i_max) + cur_end = end; + + ret |= kvm_mkclean_pud(pud, start, cur_end); + } + return ret; +} + +/** + * kvm_mkclean_gpa_pt() - Make a range of guest physical addresses clean. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Make a range of GPA mappings clean so that guest writes will fault and + * trigger dirty page logging. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether any GPA mappings were modified, which would require + * derived mappings (GVA page tables & TLB enties) to be + * invalidated. + */ +static int kvm_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mkclean_pgd(kvm->arch.gpa_mm.pgd, start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +/** + * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire @kvm->mmu_lock. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + gfn_t base_gfn = slot->base_gfn + gfn_offset; + gfn_t start = base_gfn + __ffs(mask); + gfn_t end = base_gfn + __fls(mask); + + kvm_mkclean_gpa_pt(kvm, start, end); +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + int needs_flush; + + kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", + __func__, kvm, mem->slot, mem->guest_phys_addr, + mem->memory_size, mem->userspace_addr); + + /* + * If dirty page logging is enabled, write protect all pages in the slot + * ready for dirty logging. + * + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() + */ + if (change == KVM_MR_FLAGS_ONLY && + (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + spin_lock(&kvm->mmu_lock); + /* Write protect GPA page table entries */ + needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, + new->base_gfn + new->npages - 1); + /* Let implementation do the rest */ + if (needs_flush) + kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); + } +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + /* Flush whole GPA */ + kvm_flush_gpa_pt(kvm, 0, ~0UL, NULL); + + /* Flush vpid for each VCPU individually */ + kvm_flush_remote_tlbs(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + unsigned long npages; + + /* + * The slot has been made invalid (ready for moving or deletion), so we + * need to ensure that it can no longer be accessed by any guest VCPUs. + */ + + npages = 0; + spin_lock(&kvm->mmu_lock); + /* Flush slot from GPA */ + kvm_flush_gpa_pt(kvm, slot->base_gfn, + slot->base_gfn + slot->npages - 1, &npages); + /* Let implementation do the rest */ + if (npages) + kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); +} + +void _kvm_destroy_mm(struct kvm *kvm) +{ + /* It should always be safe to remove after flushing the whole range */ + WARN_ON(!kvm_flush_gpa_pt(kvm, 0, ~0UL, NULL)); + pgd_free(NULL, kvm->arch.gpa_mm.pgd); + kvm->arch.gpa_mm.pgd = NULL; +} + +/* + * Mark a range of guest physical address space old (all accesses fault) in the + * VM's GPA page table to allow detection of commonly used pages. + */ + +static int kvm_mkold_pte(pte_t *pte, unsigned long start, + unsigned long end) +{ + int ret = 0; + int i_min = pte_index(start); + int i_max = pte_index(end); + int i; + pte_t old, new; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + old = pte[i]; + new = pte_mkold(old); + if (pte_val(new) == pte_val(old)) + continue; + set_pte(pte + i, new); + ret = 1; + } + + return ret; +} + +static int kvm_mkold_pmd(pmd_t *pmd, unsigned long start, unsigned long end) +{ + int ret = 0; + pte_t *pte; + unsigned long cur_end = ~0ul; + int i_min = pmd_index(start); + int i_max = pmd_index(end); + int i; + pmd_t old, new; + + for (i = i_min; i <= i_max; ++i, start = 0) { + if (!pmd_present(pmd[i])) + continue; + + if (pmd_huge(pmd[i])) { + old = pmd[i]; + new = kvm_pmd_mkold(old); + if (pmd_val(new) == pmd_val(old)) + continue; + set_pmd(pmd + i, new); + ret = 1; + continue; + } + + pte = pte_offset_kernel(pmd + i, 0); + if (i == i_max) + cur_end = end; + + ret |= kvm_mkold_pte(pte, start, cur_end); + } + + return ret; +} + +static int kvm_mkold_pud(pud_t *pud, unsigned long start, unsigned long end) +{ + int ret = 0; + pmd_t *pmd; + unsigned long cur_end = ~0ul; + int i_min = pud_index(start); + int i_max = pud_index(end); + int i; + + for (i = i_min; i <= i_max; ++i, start = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + cur_end = end; + + ret |= kvm_mkold_pmd(pmd, start, cur_end); + } + + return ret; +} + +static int kvm_mkold_pgd(pgd_t *pgd, unsigned long start, unsigned long end) +{ + int ret = 0; + p4d_t *p4d; + pud_t *pud; + unsigned long cur_end = ~0ul; + int i_min = pgd_index(start); + int i_max = pgd_index(end); + int i; + + for (i = i_min; i <= i_max; ++i, start = 0) { + if (!pgd_present(pgd[i])) + continue; + + p4d = p4d_offset(pgd, 0); + pud = pud_offset(p4d + i, 0); + if (i == i_max) + cur_end = end; + + ret |= kvm_mkold_pud(pud, start, cur_end); + } + + return ret; +} + +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, gfn_t gfn, + gpa_t gfn_end, + struct kvm_memory_slot *memslot, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gfn, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn_start, gfn_start+1, ..., gfn_end-1}. + */ + gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + ret |= handler(kvm, gfn, gfn_end, memslot, data); + } + + return ret; +} + + +static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + unsigned long npages; + + npages = 0; + kvm_flush_gpa_pt(kvm, gfn, gfn_end - 1, &npages); + *(unsigned long *)data = *(unsigned long *)data + npages; + + return npages > 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, bool blockable) +{ + unsigned long npages; + + npages = 0; + return handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &npages); +} + +static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + gpa_t gpa = gfn << PAGE_SHIFT; + pte_t hva_pte = *(pte_t *)data; + pte_t *gpa_pte = kvm_pte_for_gpa(kvm, NULL, gpa); + pte_t old_pte; + + if (!gpa_pte) + return 0; + + /* Mapping may need adjusting depending on memslot flags */ + old_pte = *gpa_pte; + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) + hva_pte = pte_mkclean(hva_pte); + else if (memslot->flags & KVM_MEM_READONLY) + hva_pte = pte_wrprotect(hva_pte); + + set_pte(gpa_pte, hva_pte); + + /* Replacing an absent or old page doesn't need flushes */ + if (!pte_present(old_pte) || !pte_young(old_pte)) + return 0; + + /* Pages swapped, aged, moved, or cleaned require flushes */ + return !pte_present(hva_pte) || + !pte_young(hva_pte) || + pte_pfn(old_pte) != pte_pfn(hva_pte) || + (pte_dirty(old_pte) && !pte_dirty(hva_pte)); +} + +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + int ret; + + ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); + if (ret) + /* Flush vpid for each VCPU individually */ + kvm_flush_remote_tlbs(kvm); + return 0; +} + +static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + return kvm_mkold_pgd(kvm->arch.gpa_mm.pgd, gfn << PAGE_SHIFT, + gfn_end << PAGE_SHIFT); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + gpa_t gpa = gfn << PAGE_SHIFT; + pte_t *gpa_pte = kvm_pte_for_gpa(kvm, NULL, gpa); + + if (!gpa_pte) + return 0; + return pte_young(*gpa_pte); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + +static pud_t *kvm_get_pud(struct kvm *kvm, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr) +{ + pgd_t *pgd; + + pgd = kvm->arch.gpa_mm.pgd + pgd_index(addr); + if (pgd_none(*pgd)) { + /* Not used yet */ + BUG(); + return NULL; + } + + return pud_offset(p4d_offset(pgd, addr), addr); +} + +static pmd_t *kvm_get_pmd(struct kvm *kvm, + struct kvm_mmu_memory_cache *cache, phys_addr_t addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = kvm_get_pud(kvm, cache, addr); + if (!pud || pud_huge(*pud)) + return NULL; + + if (pud_none(*pud)) { + if (!cache) + return NULL; + pmd = kvm_mmu_memory_cache_alloc(cache); + pmd_init((unsigned long)pmd, + (unsigned long)invalid_pte_table); + pud_populate(NULL, pud, pmd); + } + + return pmd_offset(pud, addr); +} + +static int kvm_set_pmd_huge(struct kvm_vcpu *vcpu, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + +retry: + pmd = kvm_get_pmd(vcpu->kvm, cache, addr); + VM_BUG_ON(!pmd); + + old_pmd = *pmd; + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + + if (pmd_present(old_pmd)) { + /* + * If we already have PTE level mapping for this block, + * we must unmap it to avoid inconsistent TLB state and + * leaking the table page. We could end up in this situation + * if the memory slot was marked for dirty logging and was + * reverted, leaving PTE level mappings for the pages accessed + * during the period. So, unmap the PTE level mapping for this + * block and retry, as we could have released the upper level + * table in the process. + * + * Normal THP split/merge follows mmu_notifier callbacks and do + * get handled accordingly. + */ + if (!pmd_huge(old_pmd)) { + ++vcpu->stat.huge_merge_exits; + kvm_flush_gpa_pt(vcpu->kvm, + (addr & PMD_MASK) >> PAGE_SHIFT, + ((addr & PMD_MASK) + PMD_SIZE - 1) >> PAGE_SHIFT, NULL); + goto retry; + } + /* + * Mapping in huge pages should only happen through a + * fault. If a page is merged into a transparent huge + * page, the individual subpages of that huge page + * should be unmapped through MMU notifiers before we + * get here. + * + * Merging of CompoundPages is not supported; they + * should become splitting first, unmapped, merged, + * and mapped back in on-demand. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); +#endif + pmd_clear(pmd); + } + + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + set_pmd(pmd, *new_pmd); + return 0; +} + +/* + * Adjust pfn start boundary if support for transparent hugepage + */ +static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, unsigned long *gpap) +{ + kvm_pfn_t pfn = *pfnp; + gfn_t gfn = *gpap >> PAGE_SHIFT; + struct page *page = pfn_to_page(pfn); + + /* + * PageTransCompoundMap() returns true for THP and + * hugetlbfs. Make sure the adjustment is done only for THP + * pages. + */ + if ((!PageHuge(page)) && PageTransCompound(page) && + (atomic_read(&page->_mapcount) < 0)) { + unsigned long mask; + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + mask = PTRS_PER_PMD - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { + *gpap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); + *pfnp = pfn; + } + + return true; + } + + return false; +} + +static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, + unsigned long hva, bool write) +{ + gpa_t gpa_start; + hva_t uaddr_start, uaddr_end; + unsigned long map_size; + size_t size; + + map_size = PMD_SIZE; + /* Disable dirty logging on HugePages */ + if ((memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) && write) + return false; + + size = memslot->npages * PAGE_SIZE; + gpa_start = memslot->base_gfn << PAGE_SHIFT; + uaddr_start = memslot->userspace_addr; + uaddr_end = uaddr_start + size; + + /* + * Pages belonging to memslots that don't have the same alignment + * within a PMD/PUD for userspace and GPA cannot be mapped with stage-2 + * PMD/PUD entries, because we'll end up mapping the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SIZE: + * +---+--------------------+--------------------+-----+ + * |abc|def Stage-2 block | Stage-2 block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those stage-2 blocks, we'll end up with this incorrect + * mapping: + * d -> f + * e -> g + * f -> h + */ + if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) + return false; + + /* + * Next, let's make sure we're not trying to map anything not covered + * by the memslot. This means we have to prohibit block size mappings + * for the beginning and end of a non-block aligned and non-block sized + * memory slot (illustrated by the head and tail parts of the + * userspace view above containing pages 'abcde' and 'xyz', + * respectively). + * + * Note that it doesn't matter if we do the check using the + * userspace_addr or the base_gfn, as both are equally aligned (per + * the check above) and equally sized. + */ + return (hva & ~(map_size - 1)) >= uaddr_start && + (hva & ~(map_size - 1)) + map_size <= uaddr_end; +} + +/** + * kvm_map_page_fast() - Fast path GPA fault handler. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write: Whether the fault was due to a write. + * + * Perform fast path GPA fault handling, doing all that can be done without + * calling into KVM. This handles marking old pages young (for idle page + * tracking), and dirtying of clean pages (for dirty page logging). + * + * Returns: 0 on success, in which case we can update derived mappings and + * resume guest execution. + * -EFAULT on failure due to absent GPA mapping or write to + * read-only page, in which case KVM must be consulted. + */ +static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write) +{ + struct kvm *kvm = vcpu->kvm; + gfn_t gfn = gpa >> PAGE_SHIFT; + pte_t *ptep; + kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ + bool pfn_valid = false; + int ret = 0; + struct kvm_memory_slot *slot; + + spin_lock(&kvm->mmu_lock); + + /* Fast path - just check GPA page table for an existing entry */ + ptep = kvm_pte_for_gpa(kvm, NULL, gpa); + if (!ptep || !pte_present(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track access to pages marked old */ + if (!pte_young(*ptep)) { + set_pte(ptep, pte_mkyoung(*ptep)); + pfn = pte_pfn(*ptep); + pfn_valid = true; + /* call kvm_set_pfn_accessed() after unlock */ + } + if (write && !pte_dirty(*ptep)) { + if (!pte_write(*ptep)) { + ret = -EFAULT; + goto out; + } + + if (kvm_pte_huge(*ptep)) { + /* + * Do not set write permission when dirty logging is + * enabled for HugePages + */ + slot = gfn_to_memslot(kvm, gfn); + if (slot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EFAULT; + goto out; + } + } + + /* Track dirtying of writeable pages */ + set_pte(ptep, pte_mkdirty(*ptep)); + pfn = pte_pfn(*ptep); + if (pmd_huge(*((pmd_t *)ptep))) { + int i; + gfn_t base_gfn = (gpa & PMD_MASK) >> PAGE_SHIFT; + + for (i = 0; i < PTRS_PER_PTE; i++) + mark_page_dirty(kvm, base_gfn + i); + } else + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + +out: + spin_unlock(&kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); + return ret; +} + +/* + * Split huge page + */ +static pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, pte_t *ptep, gfn_t gfn, + struct vm_area_struct *vma, unsigned long hva) +{ + int i; + pte_t val, *child; + struct kvm_mmu_memory_cache *memcache; + + memcache = &vcpu->arch.mmu_page_cache; + child = kvm_mmu_memory_cache_alloc(memcache); + val = kvm_pte_mksmall(*ptep); + for (i = 0; i < PTRS_PER_PTE; i++) { + kvm_set_pte(child + i, val); + pte_val(val) += PAGE_SIZE; + } + + /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */ + pte_val(val) = (unsigned long)child; + kvm_set_pte(ptep, val); + return child + (gfn & (PTRS_PER_PTE - 1)); +} + +/** + * kvm_map_page() - Map a guest physical page. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write: Whether the fault was due to a write. + * + * Handle GPA faults by creating a new GPA mapping (or updating an existing + * one). + * + * This takes care of marking pages young or dirty (idle/dirty page tracking), + * asking KVM for the corresponding PFN, and creating a mapping in the GPA page + * tables. Derived mappings (GVA page tables and TLBs) must be handled by the + * caller. + * + * Returns: 0 on success, in which case the caller may use the @out_entry + * and @out_buddy PTEs to update derived mappings and resume guest + * execution. + * -EFAULT if there is no memory region at @gpa or a write was + * attempted to a read-only memory region. This is usually handled + * as an MMIO access. + */ +static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write) +{ + bool writeable; + bool force_pte = false; + int i, srcu_idx, err = 0, retry_no = 0; + unsigned long hva; + unsigned long mmu_seq; + unsigned long prot_bits; + unsigned long vma_pagesize; + pte_t *ptep; + kvm_pfn_t pfn; + gfn_t gfn = gpa >> PAGE_SHIFT; + struct vm_area_struct *vma; + struct kvm *kvm = vcpu->kvm; + struct kvm_memory_slot *memslot; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + + /* Try the fast path to handle old / clean pages */ + srcu_idx = srcu_read_lock(&kvm->srcu); + err = kvm_map_page_fast(vcpu, gpa, write); + if (!err) + goto out; + + memslot = gfn_to_memslot(kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable); + if (kvm_is_error_hva(hva) || (write && !writeable)) + goto out; + + /* Let's check if we will get back a huge page backed by hugetlbfs */ + mmap_read_lock(current->mm); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (unlikely(!vma)) { + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); + mmap_read_unlock(current->mm); + err = -EFAULT; + goto out; + } + + vma_pagesize = vma_kernel_pagesize(vma); + if ((vma_pagesize == PMD_SIZE) && + !fault_supports_huge_mapping(memslot, hva, write)) { + force_pte = true; + vma_pagesize = PAGE_SIZE; + ++vcpu->stat.huge_dec_exits; + } + + /* PMD is not folded, adjust gfn to new boundary */ + if (vma_pagesize == PMD_SIZE) + gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; + + mmap_read_unlock(current->mm); + + /* We need a minimum of cached pages ready for page table creation */ + err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES); + if (err) + goto out; + +retry: + /* + * Used to check for invalidations in progress, of the pfn that is + * returned by pfn_to_pfn_prot below. + */ + mmu_seq = kvm->mmu_notifier_seq; + /* + * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in + * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * risk the page we get a reference to getting unmapped before we have a + * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * + * This smp_rmb() pairs with the effective smp_wmb() of the combination + * of the pte_unmap_unlock() after the PTE is zapped, and the + * spin_lock() in kvm_mmu_notifier_invalidate_() before + * mmu_notifier_seq is incremented. + */ + smp_rmb(); + + /* Slow path - ask KVM core whether we can access this GPA */ + pfn = gfn_to_pfn_prot(kvm, gfn, write, &writeable); + if (is_error_noslot_pfn(pfn)) { + err = -EFAULT; + goto out; + } + + spin_lock(&kvm->mmu_lock); + /* Check if an invalidation has taken place since we got pfn */ + if (mmu_notifier_retry(kvm, mmu_seq)) { + /* + * This can happen when mappings are changed asynchronously, but + * also synchronously if a COW is triggered by + * gfn_to_pfn_prot(). + */ + spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); + kvm_release_pfn_clean(pfn); + if (retry_no > 100) { + retry_no = 0; + schedule(); + } + retry_no++; + goto retry; + } + + if (vma_pagesize == PAGE_SIZE && !force_pte) { + /* + * Only PMD_SIZE transparent hugepages(THP) are + * currently supported. This code will need to be + * updated to support other THP sizes. + * + * Make sure the host VA and the guest IPA are sufficiently + * aligned and that the block is contained within the memslot. + */ + ++vcpu->stat.huge_thp_exits; + if (fault_supports_huge_mapping(memslot, hva, write) && + transparent_hugepage_adjust(&pfn, &gpa)) { + ++vcpu->stat.huge_adjust_exits; + vma_pagesize = PMD_SIZE; + } + } + + /* Set up the prot bits */ + prot_bits = _PAGE_PRESENT | __READABLE; + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + prot_bits |= _CACHE_SUC; + else + prot_bits |= _CACHE_CC; + + if (writeable) { + prot_bits |= _PAGE_WRITE; + if (write) { + prot_bits |= __WRITEABLE; + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + } + + if (vma_pagesize == PMD_SIZE) { + pmd_t new_pmd = pfn_pmd(pfn, __pgprot(prot_bits)); + new_pmd = pmd_mkhuge(new_pmd); + if (writeable && write) { + gfn_t base_gfn = (gpa & PMD_MASK) >> PAGE_SHIFT; + for (i = 0; i < PTRS_PER_PTE; i++) + mark_page_dirty(kvm, base_gfn + i); + } + + ++vcpu->stat.huge_set_exits; + kvm_set_pmd_huge(vcpu, memcache, gpa, &new_pmd); + } else { + pte_t new_pte = pfn_pte(pfn, __pgprot(prot_bits)); + if (writeable && write) + mark_page_dirty(kvm, gfn); + + /* Ensure page tables are allocated */ + ptep = kvm_pte_for_gpa(kvm, memcache, gpa); + if (ptep && kvm_pte_huge(*ptep) && write) + ptep = kvm_split_huge(vcpu, ptep, gfn, vma, hva); + + set_pte(ptep, new_pte); + err = 0; + } + + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + kvm_set_pfn_accessed(pfn); +out: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return err; +} + +int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long badv, + bool write) +{ + int ret; + + ret = kvm_map_page(vcpu, badv, write); + if (ret) + return ret; + + /* Invalidate this entry in the TLB */ + return kvm_tlb_flush_gpa(vcpu, badv); +} + +/** + * kvm_flush_tlb_all() - Flush all root TLB entries for + * guests. + * + * Invalidate all entries including GVA-->GPA and GPA-->HPA mappings. + */ +void kvm_flush_tlb_all(void) +{ + unsigned long flags; + + local_irq_save(flags); + invtlb_all(INVTLB_ALLGID, 0, 0); + local_irq_restore(flags); +} diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c new file mode 100644 index 000000000000..6fa063434ae2 --- /dev/null +++ b/arch/loongarch/kvm/timer.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kvmcpu.h" +#include "trace.h" +#include "kvm_compat.h" + +/* + * ktime_to_tick() - Scale ktime_t to a 64-bit stable timer. + * + * Caches the dynamic nanosecond bias in vcpu->arch.timer_dyn_bias. + */ +static u64 ktime_to_tick(struct kvm_vcpu *vcpu, ktime_t now) +{ + s64 now_ns, periods; + u64 delta; + + now_ns = ktime_to_ns(now); + delta = now_ns + vcpu->arch.timer_dyn_bias; + + if (delta >= vcpu->arch.timer_period) { + /* If delta is out of safe range the bias needs adjusting */ + periods = div64_s64(now_ns, vcpu->arch.timer_period); + vcpu->arch.timer_dyn_bias = -periods * vcpu->arch.timer_period; + /* Recalculate delta with new bias */ + delta = now_ns + vcpu->arch.timer_dyn_bias; + } + + /* + * We've ensured that: + * delta < timer_period + */ + return div_u64(delta * vcpu->arch.timer_mhz, MNSEC_PER_SEC); +} + +/** + * kvm_resume_hrtimer() - Resume hrtimer, updating expiry. + * @vcpu: Virtual CPU. + * @now: ktime at point of resume. + * @stable_timer: stable timer at point of resume. + * + * Resumes the timer and updates the timer expiry based on @now and @count. + */ +static void kvm_resume_hrtimer(struct kvm_vcpu *vcpu, ktime_t now, u64 stable_timer) +{ + u64 delta; + ktime_t expire; + + /* Stable timer decreased to zero or + * initialize to zero, set 4 second timer + */ + delta = div_u64(stable_timer * MNSEC_PER_SEC, vcpu->arch.timer_mhz); + expire = ktime_add_ns(now, delta); + + /* Update hrtimer to use new timeout */ + hrtimer_cancel(&vcpu->arch.swtimer); + hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); +} + +/** + * kvm_init_timer() - Initialise stable timer. + * @vcpu: Virtual CPU. + * @timer_hz: Frequency of timer. + * + * Initialise the timer to the specified frequency, zero it, and set it going if + * it's enabled. + */ +void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz) +{ + ktime_t now; + unsigned long ticks; + struct loongarch_csrs *csr = vcpu->arch.csr; + + vcpu->arch.timer_mhz = timer_hz >> 20; + vcpu->arch.timer_period = div_u64((u64)MNSEC_PER_SEC * IOCSR_TIMER_MASK, vcpu->arch.timer_mhz); + vcpu->arch.timer_dyn_bias = 0; + + /* Starting at 0 */ + ticks = 0; + now = ktime_get(); + vcpu->arch.timer_bias = ticks - ktime_to_tick(vcpu, now); + vcpu->arch.timer_bias &= IOCSR_TIMER_MASK; + + kvm_write_sw_gcsr(csr, KVM_CSR_TVAL, ticks); +} + +/** + * kvm_count_timeout() - Push timer forward on timeout. + * @vcpu: Virtual CPU. + * + * Handle an hrtimer event by push the hrtimer forward a period. + * + * Returns: The hrtimer_restart value to return to the hrtimer subsystem. + */ +enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu) +{ + unsigned long timer_cfg; + + /* Add the Count period to the current expiry time */ + timer_cfg = kvm_read_sw_gcsr(vcpu->arch.csr, KVM_CSR_TCFG); + if (timer_cfg & KVM_TCFG_PERIOD) { + hrtimer_add_expires_ns(&vcpu->arch.swtimer, timer_cfg & KVM_TCFG_VAL); + return HRTIMER_RESTART; + } else + return HRTIMER_NORESTART; +} + +/* + * kvm_restore_timer() - Restore timer state. + * @vcpu: Virtual CPU. + * + * Restore soft timer state from saved context. + */ +void kvm_restore_timer(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + ktime_t saved_ktime, now; + u64 stable_timer, new_timertick = 0; + u64 delta = 0; + int expired = 0; + unsigned long timer_cfg; + + /* + * Set guest stable timer cfg csr + */ + timer_cfg = kvm_read_sw_gcsr(csr, KVM_CSR_TCFG); + kvm_restore_hw_gcsr(csr, KVM_CSR_ESTAT); + if (!(timer_cfg & KVM_TCFG_EN)) { + kvm_restore_hw_gcsr(csr, KVM_CSR_TCFG); + kvm_restore_hw_gcsr(csr, KVM_CSR_TVAL); + return; + } + + now = ktime_get(); + saved_ktime = vcpu->arch.stable_ktime_saved; + stable_timer = kvm_read_sw_gcsr(csr, KVM_CSR_TVAL); + + /*hrtimer not expire */ + delta = ktime_to_tick(vcpu, ktime_sub(now, saved_ktime)); + if (delta >= stable_timer) + expired = 1; + + if (expired) { + if (timer_cfg & KVM_TCFG_PERIOD) { + new_timertick = (delta - stable_timer) % (timer_cfg & KVM_TCFG_VAL); + } else { + new_timertick = 1; + } + } else { + new_timertick = stable_timer - delta; + } + + new_timertick &= KVM_TCFG_VAL; + kvm_write_gcsr_timercfg(timer_cfg); + kvm_write_gcsr_timertick(new_timertick); + if (expired) + _kvm_queue_irq(vcpu, LARCH_INT_TIMER); +} + +/* + * kvm_acquire_timer() - Switch to hard timer state. + * @vcpu: Virtual CPU. + * + * Restore hard timer state on top of existing soft timer state if possible. + * + * Since hard timer won't remain active over preemption, preemption should be + * disabled by the caller. + */ +void kvm_acquire_timer(struct kvm_vcpu *vcpu) +{ + unsigned long flags, guestcfg; + + guestcfg = kvm_read_csr_gcfg(); + if (!(guestcfg & KVM_GCFG_TIT)) + return; + + /* enable guest access to hard timer */ + kvm_write_csr_gcfg(guestcfg & ~KVM_GCFG_TIT); + + /* + * Freeze the soft-timer and sync the guest stable timer with it. We do + * this with interrupts disabled to avoid latency. + */ + local_irq_save(flags); + hrtimer_cancel(&vcpu->arch.swtimer); + local_irq_restore(flags); +} + + +/* + * _kvm_save_timer() - Switch to software emulation of guest timer. + * @vcpu: Virtual CPU. + * + * Save guest timer state and switch to software emulation of guest + * timer. The hard timer must already be in use, so preemption should be + * disabled. + */ +static ktime_t _kvm_save_timer(struct kvm_vcpu *vcpu, u64 *stable_timer) +{ + u64 end_stable_timer; + ktime_t before_time; + + before_time = ktime_get(); + + /* + * Record a final stable timer which we will transfer to the soft-timer. + */ + end_stable_timer = kvm_read_gcsr_timertick(); + *stable_timer = end_stable_timer; + + kvm_resume_hrtimer(vcpu, before_time, end_stable_timer); + return before_time; +} + +/* + * kvm_save_timer() - Save guest timer state. + * @vcpu: Virtual CPU. + * + * Save guest timer state and switch to soft guest timer if hard timer was in + * use. + */ +void kvm_save_timer(struct kvm_vcpu *vcpu) +{ + struct loongarch_csrs *csr = vcpu->arch.csr; + unsigned long guestcfg; + u64 stable_timer = 0; + ktime_t save_ktime; + + preempt_disable(); + guestcfg = kvm_read_csr_gcfg(); + if (!(guestcfg & KVM_GCFG_TIT)) { + /* disable guest use of hard timer */ + kvm_write_csr_gcfg(guestcfg | KVM_GCFG_TIT); + + /* save hard timer state */ + kvm_save_hw_gcsr(csr, KVM_CSR_TCFG); + if (kvm_read_sw_gcsr(csr, KVM_CSR_TCFG) & KVM_TCFG_EN) { + save_ktime = _kvm_save_timer(vcpu, &stable_timer); + kvm_write_sw_gcsr(csr, KVM_CSR_TVAL, stable_timer); + vcpu->arch.stable_ktime_saved = save_ktime; + if (stable_timer == IOCSR_TIMER_MASK) + _kvm_queue_irq(vcpu, LARCH_INT_TIMER); + } else { + kvm_save_hw_gcsr(csr, KVM_CSR_TVAL); + } + } + + /* save timer-related state to VCPU context */ + kvm_save_hw_gcsr(csr, KVM_CSR_ESTAT); + preempt_enable(); +} + +void kvm_reset_timer(struct kvm_vcpu *vcpu) +{ + kvm_write_gcsr_timercfg(0); + kvm_write_sw_gcsr(vcpu->arch.csr, KVM_CSR_TCFG, 0); + hrtimer_cancel(&vcpu->arch.swtimer); +} diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h new file mode 100644 index 000000000000..b38a5ebe0f8b --- /dev/null +++ b/arch/loongarch/kvm/trace.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include +#include "kvm_compat.h" +#include "kvmcsr.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + +/* + * arch/loongarch/kvm/loongarch.c + */ +extern bool kvm_trace_guest_mode_change; +int kvm_guest_mode_change_trace_reg(void); +void kvm_guest_mode_change_trace_unreg(void); + +/* + * Tracepoints for VM enters + */ +DECLARE_EVENT_CLASS(kvm_transition, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, pc) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + ), + + TP_printk("PC: 0x%08lx", + __entry->pc) +); + +DEFINE_EVENT(kvm_transition, kvm_enter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_reenter, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +DEFINE_EVENT(kvm_transition, kvm_out, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu)); + +/* The first 32 exit reasons correspond to Cause.ExcCode */ +#define KVM_TRACE_EXIT_INT 0 +#define KVM_TRACE_EXIT_TLBLD (EXCCODE_TLBL) +#define KVM_TRACE_EXIT_TLBST (EXCCODE_TLBS) +#define KVM_TRACE_EXIT_TLBI (EXCCODE_TLBI) +#define KVM_TRACE_EXIT_TLBMOD (EXCCODE_TLBM) +#define KVM_TRACE_EXIT_TLBNR (EXCCODE_TLBNR) +#define KVM_TRACE_EXIT_TLBNX (EXCCODE_TLBNX) +#define KVM_TRACE_EXIT_TLBPE (EXCCODE_TLBPE) +#define KVM_TRACE_EXIT_ADDE (EXCCODE_ADE) +#define KVM_TRACE_EXIT_UNALIGN (EXCCODE_ALE) +#define KVM_TRACE_EXIT_ODB (EXCCODE_BCE) +#define KVM_TRACE_EXIT_SYSCALL (EXCCODE_SYS) +#define KVM_TRACE_EXIT_BP (EXCCODE_BP) +#define KVM_TRACE_EXIT_INE (EXCCODE_INE) +#define KVM_TRACE_EXIT_IPE (EXCCODE_IPE) +#define KVM_TRACE_EXIT_FPDIS (EXCCODE_FPDIS) +#define KVM_TRACE_EXIT_LSXDIS (EXCCODE_LSXDIS) +#define KVM_TRACE_EXIT_LASXDIS (EXCCODE_LASXDIS) +#define KVM_TRACE_EXIT_FPE (EXCCODE_FPE) +#define KVM_TRACE_EXIT_WATCH (EXCCODE_WATCH) +#define KVM_TRACE_EXIT_GSPR (EXCCODE_GSPR) +#define KVM_TRACE_EXIT_HC (EXCCODE_HVC) +#define KVM_TRACE_EXIT_GCM (EXCCODE_GCM) + +/* Further exit reasons */ +#define KVM_TRACE_EXIT_IDLE 64 +#define KVM_TRACE_EXIT_CACHE 65 +#define KVM_TRACE_EXIT_SIGNAL 66 + +/* Tracepoints for VM exits */ +#define kvm_trace_symbol_exit_types \ + { KVM_TRACE_EXIT_INT, "Interrupt" }, \ + { KVM_TRACE_EXIT_TLBLD, "TLB (LD)" }, \ + { KVM_TRACE_EXIT_TLBST, "TLB (ST)" }, \ + { KVM_TRACE_EXIT_TLBI, "TLB Ifetch" }, \ + { KVM_TRACE_EXIT_TLBMOD, "TLB Mod" }, \ + { KVM_TRACE_EXIT_TLBNR, "TLB RI" }, \ + { KVM_TRACE_EXIT_TLBNX, "TLB XI" }, \ + { KVM_TRACE_EXIT_TLBPE, "TLB Previlege Error" },\ + { KVM_TRACE_EXIT_ADDE, "Address Error" }, \ + { KVM_TRACE_EXIT_UNALIGN, "Address unalign" }, \ + { KVM_TRACE_EXIT_ODB, "Out boundary" }, \ + { KVM_TRACE_EXIT_SYSCALL, "System Call" }, \ + { KVM_TRACE_EXIT_BP, "Breakpoint" }, \ + { KVM_TRACE_EXIT_INE, "Reserved Inst" }, \ + { KVM_TRACE_EXIT_IPE, "Inst prev error" }, \ + { KVM_TRACE_EXIT_FPDIS, "FPU disable" }, \ + { KVM_TRACE_EXIT_LSXDIS, "LSX disable" }, \ + { KVM_TRACE_EXIT_LASXDIS, "LASX disable" }, \ + { KVM_TRACE_EXIT_FPE, "FPE" }, \ + { KVM_TRACE_EXIT_WATCH, "DEBUG" }, \ + { KVM_TRACE_EXIT_GSPR, "GSPR" }, \ + { KVM_TRACE_EXIT_HC, "Hypercall" }, \ + { KVM_TRACE_EXIT_GCM, "CSR Mod" }, \ + { KVM_TRACE_EXIT_IDLE, "IDLE" }, \ + { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ + { KVM_TRACE_EXIT_SIGNAL, "Signal" } + +TRACE_EVENT(kvm_exit, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), + TP_ARGS(vcpu, reason), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(unsigned int, reason) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->reason = reason; + ), + + TP_printk("[%s]PC: 0x%08lx", + __print_symbolic(__entry->reason, + kvm_trace_symbol_exit_types), + __entry->pc) +); + +#define KVM_TRACE_AUX_RESTORE 0 +#define KVM_TRACE_AUX_SAVE 1 +#define KVM_TRACE_AUX_ENABLE 2 +#define KVM_TRACE_AUX_DISABLE 3 +#define KVM_TRACE_AUX_DISCARD 4 + +#define KVM_TRACE_AUX_FPU 1 +#define KVM_TRACE_AUX_LSX 2 +#define KVM_TRACE_AUX_FPU_LSX 3 +#define KVM_TRACE_AUX_LASX 4 +#define KVM_TRACE_AUX_FPU_LSX_LASX 7 + +#define kvm_trace_symbol_aux_op \ + { KVM_TRACE_AUX_RESTORE, "restore" }, \ + { KVM_TRACE_AUX_SAVE, "save" }, \ + { KVM_TRACE_AUX_ENABLE, "enable" }, \ + { KVM_TRACE_AUX_DISABLE, "disable" }, \ + { KVM_TRACE_AUX_DISCARD, "discard" } + +#define kvm_trace_symbol_aux_state \ + { KVM_TRACE_AUX_FPU, "FPU" }, \ + { KVM_TRACE_AUX_LSX, "LSX" }, \ + { KVM_TRACE_AUX_LASX, "LASX" }, \ + { KVM_TRACE_AUX_FPU_LSX, "FPU & LSX" }, \ + { KVM_TRACE_AUX_FPU_LSX_LASX, "FPU & LSX & LASX" } + +TRACE_EVENT(kvm_aux, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, + unsigned int state), + TP_ARGS(vcpu, op, state), + TP_STRUCT__entry( + __field(unsigned long, pc) + __field(u8, op) + __field(u8, state) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.pc; + __entry->op = op; + __entry->state = state; + ), + + TP_printk("%s %s PC: 0x%08lx", + __print_symbolic(__entry->op, + kvm_trace_symbol_aux_op), + __print_symbolic(__entry->state, + kvm_trace_symbol_aux_state), + __entry->pc) +); + +TRACE_EVENT(kvm_vpid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned long vpid), + TP_ARGS(vcpu, vpid), + TP_STRUCT__entry( + __field(unsigned long, vpid) + ), + + TP_fast_assign( + __entry->vpid = vpid; + ), + + TP_printk("vpid: 0x%08lx", + __entry->vpid) +); + +#endif /* _TRACE_KVM_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index f214b0bfc1b4..8840f00c3ff8 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -54,11 +54,13 @@ int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; } +EXPORT_SYMBOL(pmd_huge); int pud_huge(pud_t pud) { return (pud_val(pud) & _PAGE_HUGE) != 0; } +EXPORT_SYMBOL(pud_huge); uint64_t pmd_to_entrylo(unsigned long pmd_val) { diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index f044c029b190..40279455beb4 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -7,7 +7,7 @@ config DRM_AMD_DC default y depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || LOONGARCH || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 23a4fcf8e30f..38a07bcabe54 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -95,6 +95,13 @@ static void eiointc_set_irq_route(int pos, unsigned int cpu, unsigned int mnode, } } +#ifdef CONFIG_LOONGARCH +static void virt_eiointc_set_irq_route(int pos, unsigned int cpu) +{ + iocsr_write8(cpu_logical_map(cpu), EIOINTC_REG_ROUTE + pos); +} +#endif + static DEFINE_RAW_SPINLOCK(affinity_lock); static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity, bool force) @@ -144,16 +151,20 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af regaddr = EIOINTC_REG_ENABLE + ((vector >> 5) << 2); - /* Mask target vector */ - csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), - 0x0, priv->node * CORES_PER_EIO_NODE); - - /* Set route for target vector */ - eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); - - /* Unmask target vector */ - csr_any_send(regaddr, EIOINTC_ALL_ENABLE, - 0x0, priv->node * CORES_PER_EIO_NODE); + if (!cpu_has_hypervisor) { + /* Mask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE & (~BIT(vector & 0x1F)), + 0x0, priv->node * CORES_PER_EIO_NODE); + /* Set route for target vector */ + eiointc_set_irq_route(vector, cpu, priv->node, &priv->node_map); + /* Unmask target vector */ + csr_any_send(regaddr, EIOINTC_ALL_ENABLE, + 0x0, priv->node * CORES_PER_EIO_NODE); + } else { + iocsr_write32(EIOINTC_ALL_ENABLE & (~((1 << (vector & 0x1F)))), regaddr); + virt_eiointc_set_irq_route(vector, cpu); + iocsr_write32(EIOINTC_ALL_ENABLE, regaddr); + } irq_data_update_effective_affinity(d, cpumask_of(cpu)); @@ -206,6 +217,8 @@ static int eiointc_router_init(unsigned int cpu) bit = BIT(cpu_logical_map(0)); else bit = (eiointc_priv[index]->node << 4) | 1; + if (cpu_has_hypervisor) + bit = cpu_logical_map(0); data = bit | (bit << 8) | (bit << 16) | (bit << 24); iocsr_write32(data, EIOINTC_REG_ROUTE + i * 4); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ca41220b40b8..bfe1e6a5cdb0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -250,6 +250,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_ARM_NISV 28 #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_LOONGARCH_IOCSR 36 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -319,6 +320,13 @@ struct kvm_run { __u32 len; __u8 is_write; } mmio; + /* KVM_EXIT_LOONGARCH_IOCSR */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } iocsr_io; /* KVM_EXIT_HYPERCALL */ struct { __u64 nr; @@ -695,6 +703,16 @@ struct kvm_s390_irq_state { __u32 reserved[4]; /* will stay unused for compatibility reasons */ }; +struct kvm_loongarch_vcpu_state { + __u8 online_vcpus; + __u8 is_migrate; + __u32 cpu_freq; + __u32 count_ctl; + __u64 irq_pending; + __u64 irq_clear; + __u64 core_ext_ioisr[4]; +}; + /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -1053,6 +1071,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_USER_SPACE_MSR 188 #define KVM_CAP_X86_MSR_FILTER 189 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_LOONGARCH_FPU 191 +#define KVM_CAP_LOONGARCH_LSX 192 +#define KVM_CAP_LOONGARCH_VZ 193 #ifdef KVM_CAP_IRQ_ROUTING @@ -1200,6 +1221,7 @@ struct kvm_dirty_tlb { #define KVM_REG_ARM64 0x6000000000000000ULL #define KVM_REG_MIPS 0x7000000000000000ULL #define KVM_REG_RISCV 0x8000000000000000ULL +#define KVM_REG_LOONGARCH 0x9000000000000000ULL #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL @@ -1557,6 +1579,13 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_X86_MSR_FILTER */ #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) +#define KVM_LOONGARCH_GET_VCPU_STATE _IOR(KVMIO, 0xd0, struct kvm_loongarch_vcpu_state) +#define KVM_LOONGARCH_SET_VCPU_STATE _IOW(KVMIO, 0xd1, struct kvm_loongarch_vcpu_state) +#define KVM_LOONGARCH_GET_CPUCFG _IOR(KVMIO, 0xd2, struct kvm_cpucfg) +#define KVM_LOONGARCH_GET_IOCSR _IOR(KVMIO, 0xd3, struct kvm_iocsr_entry) +#define KVM_LOONGARCH_SET_IOCSR _IOW(KVMIO, 0xd4, struct kvm_iocsr_entry) +#define KVM_LOONGARCH_SET_CPUCFG _IOR(KVMIO, 0xd5, struct kvm_cpucfg) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 58e4f88b2b9f..77e79f70645e 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -19,6 +19,10 @@ #include #include "irq.h" +#if defined(CONFIG_CPU_LOONGSON64) +#include "ls_irq.h" +#endif + int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) { -- Gitee From 726f3b53030be6f7f6c4860fd81fa98e6e928e13 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 23 Oct 2023 17:13:34 +0800 Subject: [PATCH 239/241] LoongArch: Update Loongson-3 default config file for Android Change-Id: Icf1d69d6f62a63afdbebf351306c470660abb6c8 Signed-off-by: Huacai Chen --- arch/loongarch/configs/loongson3_defconfig | 27 +++++++++++++--------- init/Kconfig | 11 +++++++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 9b33575b7d93..9e894edabd00 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -1,3 +1,4 @@ +# CONFIG_WERROR is not set # CONFIG_LOCALVERSION_AUTO is not set CONFIG_KERNEL_LZMA=y CONFIG_SYSVIPC=y @@ -35,6 +36,7 @@ CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y +CONFIG_RT_SOFTINT_OPTIMIZATION=y CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y @@ -95,6 +97,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CMDLINE_PARTITION=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y +CONFIG_GKI_HACKS_TO_FIX=y CONFIG_BINFMT_MISC=m CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y @@ -661,12 +664,12 @@ CONFIG_IR_SHARP_DECODER=m CONFIG_IR_MCE_KBD_DECODER=m CONFIG_IR_XMP_DECODER=m CONFIG_IR_IMON_DECODER=m -CONFIG_MEDIA_SUPPORT=m CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m CONFIG_MEDIA_PCI_SUPPORT=y CONFIG_VIDEO_BT848=m CONFIG_DVB_BT8XX=m +# CONFIG_DVB_LGS8GL5 is not set CONFIG_DRM=y CONFIG_DRM_RADEON=m CONFIG_DRM_RADEON_USERPTR=y @@ -703,7 +706,7 @@ CONFIG_SND_HDA_CODEC_HDMI=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_USB_AUDIO=m CONFIG_HIDRAW=y -CONFIG_UHID=m +CONFIG_UHID=y CONFIG_HID_A4TECH=m CONFIG_HID_CHERRY=m CONFIG_HID_LOGITECH=m @@ -750,8 +753,10 @@ CONFIG_RTC_DRV_EFI=y CONFIG_DMADEVICES=y CONFIG_UDMABUF=y CONFIG_DMABUF_HEAPS=y -CONFIG_DMABUF_HEAPS_SYSTEM=y -CONFIG_DMABUF_HEAPS_CMA=y +CONFIG_DMABUF_HEAPS_DEFERRED_FREE=y +CONFIG_DMABUF_HEAPS_PAGE_POOL=y +CONFIG_DMABUF_HEAPS_SYSTEM=m +CONFIG_DMABUF_HEAPS_CMA=m CONFIG_UIO=m CONFIG_UIO_PDRV_GENIRQ=m CONFIG_UIO_DMEM_GENIRQ=m @@ -782,6 +787,10 @@ CONFIG_COMEDI_NI_PCIDIO=m CONFIG_COMEDI_NI_PCIMIO=m CONFIG_R8188EU=m # CONFIG_88EU_AP_MODE is not set +CONFIG_ASHMEM=y +CONFIG_ION=y +CONFIG_ION_SYSTEM_HEAP=m +CONFIG_ION_CMA_HEAP=m CONFIG_LOONGSON_IOMMU=y CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y @@ -796,6 +805,9 @@ CONFIG_NTB_PERF=m CONFIG_NTB_TRANSPORT=m CONFIG_PWM=y CONFIG_USB4=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_BINDERFS=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -859,13 +871,6 @@ CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y CONFIG_MINIX_FS=m CONFIG_ROMFS_FS=m -CONFIG_PSTORE=m -CONFIG_PSTORE_LZO_COMPRESS=m -CONFIG_PSTORE_LZ4_COMPRESS=m -CONFIG_PSTORE_LZ4HC_COMPRESS=m -CONFIG_PSTORE_842_COMPRESS=y -CONFIG_PSTORE_ZSTD_COMPRESS=y -CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_EROFS_FS=m diff --git a/init/Kconfig b/init/Kconfig index 70bd7f69076a..8cfe761ffdf6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1296,6 +1296,17 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. +config RT_SOFTINT_OPTIMIZATION + bool "Improve RT scheduling during long softint execution" + depends on (ARM64 || LOONGARCH) + depends on SMP + default n + help + Enable an optimization which tries to avoid placing RT tasks on CPUs + occupied by nonpreemptible tasks, such as a long softint, or CPUs + which may soon block preemptions, such as a CPU running a ksoftirq + thread which handles slow softints. + config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS -- Gitee From cddbaf7eed9307d94e0ff7c6a8e6febce552ec1f Mon Sep 17 00:00:00 2001 From: Xiaotian Wu Date: Sat, 15 Oct 2022 13:33:31 +0800 Subject: [PATCH 240/241] remove __ARCH_WANT_NEW_STAT to keep same as upstream --- arch/loongarch/include/uapi/asm/unistd.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/loongarch/include/uapi/asm/unistd.h b/arch/loongarch/include/uapi/asm/unistd.h index b344b1f91715..fcb668984f03 100644 --- a/arch/loongarch/include/uapi/asm/unistd.h +++ b/arch/loongarch/include/uapi/asm/unistd.h @@ -1,5 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE #define __ARCH_WANT_SYS_CLONE3 -- Gitee From b2863fb30fc3393bc10d634dca6425aac1216f1c Mon Sep 17 00:00:00 2001 From: Fuxin Zhang Date: Fri, 16 Feb 2024 08:21:25 +0800 Subject: [PATCH 241/241] LoongArch: Fix build error --- arch/loongarch/mm/hugetlbpage.c | 4 ++-- arch/loongarch/pci/acpi.c | 4 ++-- drivers/irqchip/irq-loongson-eiointc.c | 3 ++- include/linux/decompress/mm.h | 2 +- scripts/sorttable.c | 1 - 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 8840f00c3ff8..cf4f81abc5a7 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -13,8 +13,8 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long sz) +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, + unsigned long sz) { pgd_t *pgd; p4d_t *p4d; diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c index 931a9a842c1f..9f418e40ecd0 100644 --- a/arch/loongarch/pci/acpi.c +++ b/arch/loongarch/pci/acpi.c @@ -90,9 +90,9 @@ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci) * - ioremap the config space */ static struct pci_config_window *arch_pci_ecam_create(struct device *dev, - struct resource *cfgres, struct resource *busr, struct pci_ecam_ops *ops) + struct resource *cfgres, struct resource *busr, const struct pci_ecam_ops *ops) { - int i, err; + int err; unsigned int bus_range, bsz; struct resource *conflict; struct pci_config_window *cfg; diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 38a07bcabe54..11c402e2b894 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -137,7 +137,8 @@ static int eiointc_set_irq_affinity(struct irq_data *d, const struct cpumask *af cpumask_and(&cpuspan_online_map, &priv->cpuspan_map, cpu_online_mask); for_each_cpu(c, &complement_map) { - if (c == cpu) break; idx++; + if (c == cpu) break; + idx++; } idx = idx % cpumask_weight(&cpuspan_online_map); diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h index 868e9eacd69e..024aefd05df2 100644 --- a/include/linux/decompress/mm.h +++ b/include/linux/decompress/mm.h @@ -31,7 +31,7 @@ STATIC_RW_DATA unsigned long malloc_ptr; STATIC_RW_DATA int malloc_count; -static void *malloc(int size) +static void *malloc(size_t size) { void *p; diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 030bf70530eb..84044a045188 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -382,7 +382,6 @@ static int do_file(char const *const fname, void *addr) break; case EM_ARCOMPACT: case EM_ARCV2: - case EM_LOONGARCH: case EM_MICROBLAZE: case EM_MIPS: case EM_XTENSA: -- Gitee