diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 810b9f5cbf9b7ac85818a7073edaddc35704b452..a6c32dd847d4d76fd50fd27896e0fb0121f32992 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2172,6 +2172,13 @@ [KVM,ARM] Allow use of GICv4 for direct injection of LPIs. + kvm-arm.ipiv_enabled= + [KVM,ARM] Allow use of HiSilicon ipiv on GICv4.1 + + kvm-arm.dvmbm_enabled= + [KVM,ARM] Allow use of HiSilicon DVMBM capability. + Default: 0 + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/arch/arm64/configs/tencent.config b/arch/arm64/configs/tencent.config index 8347032c1be74a93ac00d6c5f19ca860c2e84a04..f73a2f984ec6af054817a1f36c6c0f5ce95bbc01 100644 --- a/arch/arm64/configs/tencent.config +++ b/arch/arm64/configs/tencent.config @@ -1502,3 +1502,4 @@ CONFIG_HISI_SOC_CACHE=m CONFIG_HISI_SOC_HHA=m CONFIG_ARM64_HAFT=y CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y +CONFIG_KVM_HISI_VIRT=y diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 01886b83d1206afac90f61bb89b96cc719ecc1f4..8bcf3f4893b676df24eab8df39d1f437229d8894 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -45,6 +45,7 @@ #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_TLBI_DVMBM KVM_ARCH_REQ(8) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -84,6 +85,12 @@ struct kvm_arch { /* Mandated version of PSCI */ u32 psci_version; + +#ifdef CONFIG_KVM_HISI_VIRT + spinlock_t sched_lock; + cpumask_var_t sched_cpus; /* Union of all vcpu's cpus_ptr */ + u64 tlbi_dvmbm; +#endif }; #define KVM_NR_MEM_OBJS 40 @@ -341,6 +348,14 @@ struct kvm_vcpu_arch { /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; + +#ifdef CONFIG_KVM_HISI_VIRT + /* pCPUs this vCPU can be scheduled on. Pure copy of + * current->cpus_ptr + */ + cpumask_var_t sched_cpus; + cpumask_var_t pre_sched_cpus; +#endif }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -692,4 +707,8 @@ static inline enum mitigation_state kvm_arm_get_spectre_bhb_state(void) return arm64_get_spectre_bhb_state(); } +extern bool kvm_ncsnp_support; +extern bool kvm_dvmbm_support; +extern bool kvm_ipiv_support; + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index ffe0aad96b17be9e6d952b3520264c934759667c..ad78b0047b2cc4b164f087f767d406ac4fe769c4 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -317,7 +317,7 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) * faulting in pages. Furthermore, FWB implies IDC, so cleaning to * PoU is not required either in this case. */ - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + if (kvm_ncsnp_support || cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) return; kvm_flush_dcache_to_poc(va, size); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 1d0a3791c01768da4bbd6bc494e5e5d78ef8da18..b065318df9256ff4c3530697181580b4da7d2bf9 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -5,6 +5,7 @@ source "virt/kvm/Kconfig" source "virt/lib/Kconfig" +source "arch/arm64/kvm/hisilicon/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 410c084984d0be238fd2b02f95da4e92458f8bfd..58c568dc52d0fab7653de529a500099fe2cf6f6c 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,6 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o +obj-$(CONFIG_KVM_ARM_HOST) += hisilicon/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o diff --git a/arch/arm64/kvm/hisilicon/Kconfig b/arch/arm64/kvm/hisilicon/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..6536f897a32eb374b7131d7b5550bed498a15796 --- /dev/null +++ b/arch/arm64/kvm/hisilicon/Kconfig @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +config KVM_HISI_VIRT + bool "HiSilicon SoC specific virtualization features" + depends on ARCH_HISI + help + Support for HiSilicon SoC specific virtualization features. + On non-HiSilicon platforms, say N here. diff --git a/arch/arm64/kvm/hisilicon/Makefile b/arch/arm64/kvm/hisilicon/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..849f99d1526d8540d68d19e15465a780f2c5187a --- /dev/null +++ b/arch/arm64/kvm/hisilicon/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_KVM_HISI_VIRT) += hisi_virt.o diff --git a/arch/arm64/kvm/hisilicon/hisi_virt.c b/arch/arm64/kvm/hisilicon/hisi_virt.c new file mode 100644 index 0000000000000000000000000000000000000000..9f91774918449ce367b64425103946b318393140 --- /dev/null +++ b/arch/arm64/kvm/hisilicon/hisi_virt.c @@ -0,0 +1,620 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright(c) 2022 Huawei Technologies Co., Ltd + */ + +#include +#include +#include +#include +#include "hisi_virt.h" + +static enum hisi_cpu_type cpu_type = UNKNOWN_HI_TYPE; + +static bool dvmbm_enabled; +static bool ipiv_enabled; + +static const char * const hisi_cpu_type_str[] = { + "Hisi1612", + "Hisi1616", + "Hisi1620", + "HIP09", + "HIP10", + "HIP10C", + "HIP12", + "Unknown" +}; + +/* ACPI Hisi oem table id str */ +static const char * const oem_str[] = { + "HIP06 ", /* Hisi 1612 */ + "HIP07 ", /* Hisi 1616 */ + "HIP08 ", /* Hisi 1620 */ + "HIP09 ", /* HIP09 */ + "HIP10 ", /* HIP10 */ + "HIP10C ", /* HIP10C */ + "HIP12 " /* HIP12 */ +}; + +/* + * Probe Hisi CPU type form ACPI. + */ +static enum hisi_cpu_type acpi_get_hisi_cpu_type(void) +{ + struct acpi_table_header *table; + acpi_status status; + int i, str_size = ARRAY_SIZE(oem_str); + + /* Get oem table id from ACPI table header */ + status = acpi_get_table(ACPI_SIG_DSDT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn("Failed to get ACPI table: %s\n", + acpi_format_exception(status)); + return UNKNOWN_HI_TYPE; + } + + for (i = 0; i < str_size; ++i) { + if (!strncmp(oem_str[i], table->oem_table_id, 8)) + return i; + } + + return UNKNOWN_HI_TYPE; +} + +/* of Hisi cpu model str */ +static const char * const of_model_str[] = { + "Hi1612", + "Hi1616" +}; + +/* + * Probe Hisi CPU type from DT. + */ +static enum hisi_cpu_type of_get_hisi_cpu_type(void) +{ + const char *model; + int ret, i, str_size = ARRAY_SIZE(of_model_str); + + /* + * Note: There may not be a "model" node in FDT, which + * is provided by the vendor. In this case, we are not + * able to get CPU type information through this way. + */ + ret = of_property_read_string(of_root, "model", &model); + if (ret < 0) { + pr_warn("Failed to get Hisi cpu model by OF.\n"); + return UNKNOWN_HI_TYPE; + } + + for (i = 0; i < str_size; ++i) { + if (strstr(model, of_model_str[i])) + return i; + } + + return UNKNOWN_HI_TYPE; +} + +void probe_hisi_cpu_type(void) +{ + if (!acpi_disabled) + cpu_type = acpi_get_hisi_cpu_type(); + else + cpu_type = of_get_hisi_cpu_type(); + + kvm_info("detected: Hisi CPU type '%s'\n", hisi_cpu_type_str[cpu_type]); +} + +/* + * We have the fantastic HHA ncsnp capability on Kunpeng 920, + * with which hypervisor doesn't need to perform a lot of cache + * maintenance like before (in case the guest has non-cacheable + * Stage-1 mappings). + */ +#define NCSNP_MMIO_BASE 0x20107E238 +bool hisi_ncsnp_supported(void) +{ + void __iomem *base; + unsigned int high; + bool supported = false; + + if (cpu_type != HI_1620) + return supported; + + base = ioremap(NCSNP_MMIO_BASE, 4); + if (!base) { + pr_warn("Unable to map MMIO region when probing ncsnp!\n"); + return supported; + } + + high = readl_relaxed(base) >> 28; + iounmap(base); + if (high != 0x1) + supported = true; + + return supported; +} + +static int __init early_dvmbm_enable(char *buf) +{ + return strtobool(buf, &dvmbm_enabled); +} +early_param("kvm-arm.dvmbm_enabled", early_dvmbm_enable); + +static void hardware_enable_dvmbm(void *data) +{ + u64 val; + + val = read_sysreg_s(SYS_LSUDVM_CTRL_EL2); + val |= LSUDVM_CTLR_EL2_MASK; + write_sysreg_s(val, SYS_LSUDVM_CTRL_EL2); +} + +static void hardware_disable_dvmbm(void *data) +{ + u64 val; + + val = read_sysreg_s(SYS_LSUDVM_CTRL_EL2); + val &= ~LSUDVM_CTLR_EL2_MASK; + write_sysreg_s(val, SYS_LSUDVM_CTRL_EL2); +} + +static int __init early_ipiv_enable(char *buf) +{ + return strtobool(buf, &ipiv_enabled); +} +early_param("kvm-arm.ipiv_enabled", early_ipiv_enable); + +bool hisi_ipiv_supported(void) +{ + if (cpu_type != HI_IP12) + return false; + + /* Determine whether IPIV is supported by the hardware */ + if (!(read_sysreg(aidr_el1) & AIDR_EL1_IPIV_MASK)) { + kvm_info("Hisi ipiv not supported by the hardware\n"); + return false; + } + + /* User provided kernel command-line parameter */ + if (!ipiv_enabled || !is_kernel_in_hyp_mode()) + return false; + + /* Enable IPIV feature if necessary */ + if (!is_gicv4p1()) { + kvm_info("Hisi ipiv needs to enable GICv4p1!\n"); + return false; + } + + kvm_info("Enable Hisi ipiv, do not support vSGI broadcast\n"); + return true; +} + +void ipiv_gicd_init(void) +{ + gic_dist_enable_ipiv(); +} + +bool hisi_dvmbm_supported(void) +{ + if (cpu_type != HI_IP10 && cpu_type != HI_IP10C && + cpu_type != HI_IP12) + return false; + + if (!is_kernel_in_hyp_mode()) { + kvm_info("Hisi dvmbm not supported by KVM nVHE mode\n"); + return false; + } + + /* Determine whether DVMBM is supported by the hardware */ + if (!(read_sysreg(aidr_el1) & AIDR_EL1_DVMBM_MASK)) + return false; + + /* User provided kernel command-line parameter */ + if (!dvmbm_enabled) { + on_each_cpu(hardware_disable_dvmbm, NULL, 1); + return false; + } + + /* + * Enable TLBI Broadcast optimization by setting + * LSUDVM_CTRL_EL2's bit[0]. + */ + on_each_cpu(hardware_enable_dvmbm, NULL, 1); + return true; +} + +int kvm_sched_affinity_vcpu_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_dvmbm_support) + return 0; + + if (!zalloc_cpumask_var(&vcpu->arch.sched_cpus, GFP_ATOMIC) || + !zalloc_cpumask_var(&vcpu->arch.pre_sched_cpus, GFP_ATOMIC)) + return -ENOMEM; + + return 0; +} + +void kvm_sched_affinity_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + if (!kvm_dvmbm_support) + return; + + free_cpumask_var(vcpu->arch.sched_cpus); + free_cpumask_var(vcpu->arch.pre_sched_cpus); +} + +static void __kvm_write_lsudvmbm(struct kvm *kvm) +{ + write_sysreg_s(kvm->arch.tlbi_dvmbm, SYS_LSUDVMBM_EL2); +} + +static void kvm_write_lsudvmbm(struct kvm *kvm) +{ + spin_lock(&kvm->arch.sched_lock); + __kvm_write_lsudvmbm(kvm); + spin_unlock(&kvm->arch.sched_lock); +} + +static int kvm_dvmbm_get_dies_info(struct kvm *kvm, u64 *vm_aff3s, int size) +{ + int num = 0, cpu; + + for_each_cpu(cpu, kvm->arch.sched_cpus) { + bool found = false; + u64 aff3; + int i; + + if (num >= size) + break; + + aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + for (i = 0; i < num; i++) { + if (vm_aff3s[i] == aff3) { + found = true; + break; + } + } + + if (!found) + vm_aff3s[num++] = aff3; + } + + return num; +} + +static u32 socket_num, die_num; + +static u32 kvm_get_socket_num(void) +{ + int socket_id[MAX_PG_CFG_SOCKETS], cpu; + u32 num = 0; + + for_each_cpu(cpu, cpu_possible_mask) { + bool found = false; + u64 aff3, socket; + int i; + + aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + /* aff3[7:3]: socket ID */ + socket = (aff3 & SOCKET_ID_MASK) >> SOCKET_ID_SHIFT; + for (i = 0; i < num; i++) { + if (socket_id[i] == socket) { + found = true; + break; + } + } + if (!found) + socket_id[num++] = socket; + } + return num; +} + +static u32 kvm_get_die_num(void) +{ + int die_id[MAX_DIES_PER_SOCKET], cpu; + u32 num = 0; + + for_each_cpu(cpu, cpu_possible_mask) { + bool found = false; + u64 aff3, die; + int i; + + aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + /* aff3[2:0]: die ID */ + die = aff3 & DIE_ID_MASK; + for (i = 0; i < num; i++) { + if (die_id[i] == die) { + found = true; + break; + } + } + if (!found) + die_id[num++] = die; + } + return num; +} + +static u32 g_die_pg[MAX_PG_CFG_SOCKETS * MAX_DIES_PER_SOCKET] + [MAX_CLUSTERS_PER_DIE]; + +static void kvm_get_die_pg(unsigned long pg_cfg, int socket_id, int die_id) +{ + u32 pg_num = 0, i, j; + u32 pg_flag[MAX_CLUSTERS_PER_DIE]; + u32 die_tmp = socket_id * die_num + die_id; + + for (i = 0; i < MAX_CLUSTERS_PER_DIE; i++) { + if (test_bit(i, &pg_cfg)) + pg_num++; + g_die_pg[die_tmp][i] = i; + pg_flag[i] = 0; + } + + for (i = 0; i < MAX_CLUSTERS_PER_DIE - pg_num; i++) { + if (test_bit(i, &pg_cfg)) { + for (j = 0; j < pg_num; j++) { + u32 cluster_bak = MAX_CLUSTERS_PER_DIE + - pg_num + j; + + if (!test_bit(cluster_bak, &pg_cfg) && + !pg_flag[cluster_bak]) { + pg_flag[cluster_bak] = 1; + g_die_pg[die_tmp][i] = cluster_bak; + g_die_pg[die_tmp][cluster_bak] = i; + break; + } + } + } + } +} + +static void kvm_update_vm_lsudvmbm(struct kvm *kvm) +{ + u64 mpidr, aff3, aff2, aff1, phy_aff2; + u64 vm_aff3s[DVMBM_MAX_DIES]; + u64 val; + int cpu, nr_dies; + u32 socket_id, die_id; + + nr_dies = kvm_dvmbm_get_dies_info(kvm, vm_aff3s, DVMBM_MAX_DIES); + if (nr_dies > 2) { + val = DVMBM_RANGE_ALL_DIES << DVMBM_RANGE_SHIFT; + goto out_update; + } + + if (nr_dies == 1) { + val = DVMBM_RANGE_ONE_DIE << DVMBM_RANGE_SHIFT | + vm_aff3s[0] << DVMBM_DIE1_SHIFT; + + /* fulfill bits [52:0] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + aff1 = MPIDR_AFFINITY_LEVEL(mpidr, 1); + socket_id = (aff3 & SOCKET_ID_MASK) >> SOCKET_ID_SHIFT; + die_id = (aff3 & DIE_ID_MASK) >> DIE_ID_SHIFT; + if (die_id == TOTEM_B_ID) + die_id = 0; + else + die_id = 1; + + phy_aff2 = g_die_pg[socket_id * die_num + die_id][aff2]; + val |= 1ULL << (phy_aff2 * 4 + aff1); + } + + goto out_update; + } + + /* nr_dies == 2 */ + val = DVMBM_RANGE_TWO_DIES << DVMBM_RANGE_SHIFT | + DVMBM_GRAN_CLUSTER << DVMBM_GRAN_SHIFT | + vm_aff3s[0] << DVMBM_DIE1_SHIFT | + vm_aff3s[1] << DVMBM_DIE2_SHIFT; + + /* and fulfill bits [43:0] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + socket_id = (aff3 & SOCKET_ID_MASK) >> SOCKET_ID_SHIFT; + die_id = (aff3 & DIE_ID_MASK) >> DIE_ID_SHIFT; + if (die_id == TOTEM_B_ID) + die_id = 0; + else + die_id = 1; + + if (aff3 == vm_aff3s[0]) { + phy_aff2 = g_die_pg[socket_id * die_num + die_id][aff2]; + val |= 1ULL << (phy_aff2 + DVMBM_DIE1_CLUSTER_SHIFT); + } else { + phy_aff2 = g_die_pg[socket_id * die_num + die_id][aff2]; + val |= 1ULL << (phy_aff2 + DVMBM_DIE2_CLUSTER_SHIFT); + } + } + +out_update: + kvm->arch.tlbi_dvmbm = val; +} + +static void kvm_update_vm_lsudvmbm_hip12(struct kvm *kvm) +{ + u64 mpidr, aff3, aff2; + u64 vm_aff3s[DVMBM_MAX_DIES_HIP12]; + u64 val; + int cpu, nr_dies; + + nr_dies = kvm_dvmbm_get_dies_info(kvm, vm_aff3s, DVMBM_MAX_DIES_HIP12); + if (nr_dies > 2) { + val = DVMBM_RANGE_ALL_DIES << DVMBM_RANGE_SHIFT; + goto out_update; + } + + if (nr_dies == 1) { + val = DVMBM_RANGE_ONE_DIE << DVMBM_RANGE_SHIFT | + vm_aff3s[0] << DVMBM_DIE1_VDIE_SHIFT_HIP12; + + /* fulfill bits [11:6] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + + val |= 1ULL << (aff2 + DVMBM_DIE1_CLUSTER_SHIFT_HIP12); + } + + goto out_update; + } + + /* nr_dies == 2 */ + val = DVMBM_RANGE_TWO_DIES << DVMBM_RANGE_SHIFT | + DVMBM_GRAN_CLUSTER << DVMBM_GRAN_SHIFT | + vm_aff3s[0] << DVMBM_DIE1_VDIE_SHIFT_HIP12 | + vm_aff3s[1] << DVMBM_DIE2_VDIE_SHIFT_HIP12; + + /* and fulfill bits [11:0] */ + for_each_cpu(cpu, kvm->arch.sched_cpus) { + mpidr = cpu_logical_map(cpu); + aff3 = MPIDR_AFFINITY_LEVEL(mpidr, 3); + aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + + if (aff3 == vm_aff3s[0]) + val |= 1ULL << (aff2 + DVMBM_DIE1_CLUSTER_SHIFT_HIP12); + else + val |= 1ULL << (aff2 + DVMBM_DIE2_CLUSTER_SHIFT_HIP12); + } + +out_update: + kvm->arch.tlbi_dvmbm = val; +} + +void kvm_tlbi_dvmbm_vcpu_load(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + cpumask_t mask; + unsigned long i; + + /* Don't bother on old hardware */ + if (!kvm_dvmbm_support) + return; + + cpumask_copy(vcpu->arch.sched_cpus, current->cpus_ptr); + + if (likely(cpumask_equal(vcpu->arch.sched_cpus, + vcpu->arch.pre_sched_cpus))) { + kvm_write_lsudvmbm(kvm); + return; + } + + /* Re-calculate sched_cpus for this VM */ + spin_lock(&kvm->arch.sched_lock); + + cpumask_clear(&mask); + kvm_for_each_vcpu(i, tmp, kvm) { + /* + * We may get the stale sched_cpus if another thread + * is concurrently changing its affinity. It'll + * eventually go through vcpu_load() and we rely on + * the last sched_lock holder to make things correct. + */ + cpumask_or(&mask, &mask, tmp->arch.sched_cpus); + } + + if (cpumask_equal(kvm->arch.sched_cpus, &mask)) + goto out_unlock; + + cpumask_copy(kvm->arch.sched_cpus, &mask); + + kvm_flush_remote_tlbs(kvm); + + /* + * Re-calculate LSUDVMBM_EL2 for this VM and kick all vcpus + * out to reload the LSUDVMBM configuration. + */ + if (cpu_type == HI_IP12) + kvm_update_vm_lsudvmbm_hip12(kvm); + else + kvm_update_vm_lsudvmbm(kvm); + kvm_make_all_cpus_request(kvm, KVM_REQ_RELOAD_TLBI_DVMBM); + +out_unlock: + __kvm_write_lsudvmbm(kvm); + spin_unlock(&kvm->arch.sched_lock); +} + +void kvm_tlbi_dvmbm_vcpu_put(struct kvm_vcpu *vcpu) +{ + if (!kvm_dvmbm_support) + return; + + cpumask_copy(vcpu->arch.pre_sched_cpus, vcpu->arch.sched_cpus); +} + +void kvm_get_pg_cfg(void) +{ + void __iomem *mn_base; + u32 i, j; + u32 pg_cfgs[MAX_PG_CFG_SOCKETS * MAX_DIES_PER_SOCKET]; + u64 mn_phy_base; + u32 val; + + if (cpu_type == HI_IP12) + return; + + socket_num = kvm_get_socket_num(); + die_num = kvm_get_die_num(); + + for (i = 0; i < socket_num; i++) { + for (j = 0; j < die_num; j++) { + + /* + * totem B means the first CPU DIE within a SOCKET, + * totem A means the second one. + */ + mn_phy_base = (j == 0) ? TB_MN_BASE : TA_MN_BASE; + mn_phy_base += CHIP_ADDR_OFFSET(i); + mn_phy_base += MN_ECO0_OFFSET; + + mn_base = ioremap(mn_phy_base, 4); + if (!mn_base) { + kvm_info("MN base addr ioremap failed\n"); + return; + } + val = readl_relaxed(mn_base); + pg_cfgs[j + i * die_num] = val & 0xff; + kvm_get_die_pg(pg_cfgs[j + i * die_num], i, j); + iounmap(mn_base); + } + } +} + +int kvm_sched_affinity_vm_init(struct kvm *kvm) +{ + if (!kvm_dvmbm_support) + return 0; + + spin_lock_init(&kvm->arch.sched_lock); + if (!zalloc_cpumask_var(&kvm->arch.sched_cpus, GFP_ATOMIC)) + return -ENOMEM; + + return 0; +} + +void kvm_sched_affinity_vm_destroy(struct kvm *kvm) +{ + if (!kvm_dvmbm_support) + return; + + free_cpumask_var(kvm->arch.sched_cpus); +} + +void kvm_hisi_reload_lsudvmbm(struct kvm *kvm) +{ + if (WARN_ON_ONCE(!kvm_dvmbm_support)) + return; + + preempt_disable(); + kvm_write_lsudvmbm(kvm); + preempt_enable(); +} diff --git a/arch/arm64/kvm/hisilicon/hisi_virt.h b/arch/arm64/kvm/hisilicon/hisi_virt.h new file mode 100644 index 0000000000000000000000000000000000000000..e956f6f9db3632fc62d419b3e059e95f6e743c60 --- /dev/null +++ b/arch/arm64/kvm/hisilicon/hisi_virt.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright(c) 2022 Huawei Technologies Co., Ltd + */ + +#ifndef __HISI_VIRT_H__ +#define __HISI_VIRT_H__ + +#ifdef CONFIG_KVM_HISI_VIRT +enum hisi_cpu_type { + HI_1612, + HI_1616, + HI_1620, + HI_IP09, + HI_IP10, + HI_IP10C, + HI_IP12, + UNKNOWN_HI_TYPE +}; + +/* HIP12 */ +#define AIDR_EL1_IPIV_MASK GENMASK_ULL(17, 16) +/* HIP10 */ +#define AIDR_EL1_DVMBM_MASK GENMASK_ULL(13, 12) +#define SYS_LSUDVM_CTRL_EL2 sys_reg(3, 4, 15, 7, 4) +#define LSUDVM_CTLR_EL2_MASK BIT_ULL(0) + +#define MAX_CLUSTERS_PER_DIE 8 +#define TB_MN_BASE 0x00C6067f0000 +#define TA_MN_BASE 0x0046067F0000 +#define CHIP_ADDR_OFFSET(_chip) (((((_chip) >> 3) & 0x1) * 0x80000000000) + \ + ((((_chip) >> 2) & 0x1) * (0x100000000000)) + \ + (((_chip) & 0x3) * 0x200000000000)) +#define MAX_PG_CFG_SOCKETS 4 +#define MAX_DIES_PER_SOCKET 2 +#define MN_ECO0_OFFSET 0xc00 +#define SOCKET_ID_MASK 0xf8 +#define SOCKET_ID_SHIFT 3 +#define DIE_ID_MASK 0x7 +#define DIE_ID_SHIFT 0 +#define TOTEM_B_ID 3 + +/* + * MPIDR_EL1 layout on HIP10 + * + * Aff3[7:3] - socket ID [0-15] + * Aff3[2:0] - die ID [1,3] + * Aff2 - cluster ID [0-9] + * Aff1 - core ID [0-3] + * Aff0 - thread ID [0,1] + */ + +#define SYS_LSUDVMBM_EL2 sys_reg(3, 4, 15, 7, 5) +#define DVMBM_RANGE_SHIFT 62 +#define DVMBM_RANGE_ONE_DIE 0ULL +#define DVMBM_RANGE_TWO_DIES 1ULL +#define DVMBM_RANGE_ALL_DIES 3ULL + +#define DVMBM_GRAN_SHIFT 61 +#define DVMBM_GRAN_CLUSTER 0ULL +#define DVMBM_GRAN_DIE 1ULL + +#define DVMBM_DIE1_SHIFT 53 +#define DVMBM_DIE2_SHIFT 45 +#define DVMBM_DIE1_CLUSTER_SHIFT 22 +#define DVMBM_DIE2_CLUSTER_SHIFT 0 + +#define DVMBM_MAX_DIES 32 + +/* HIP12 */ +#define DVMBM_DIE1_VDIE_SHIFT_HIP12 57 +#define DVMBM_DIE2_VDIE_SHIFT_HIP12 53 +#define DVMBM_DIE1_CLUSTER_SHIFT_HIP12 6 +#define DVMBM_DIE2_CLUSTER_SHIFT_HIP12 0 +#define DVMBM_MAX_DIES_HIP12 8 + +void probe_hisi_cpu_type(void); +bool hisi_ncsnp_supported(void); +bool hisi_dvmbm_supported(void); +bool hisi_ipiv_supported(void); +void kvm_get_pg_cfg(void); +void ipiv_gicd_init(void); + +int kvm_sched_affinity_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_sched_affinity_vcpu_destroy(struct kvm_vcpu *vcpu); +int kvm_sched_affinity_vm_init(struct kvm *kvm); +void kvm_sched_affinity_vm_destroy(struct kvm *kvm); +void kvm_tlbi_dvmbm_vcpu_load(struct kvm_vcpu *vcpu); +void kvm_tlbi_dvmbm_vcpu_put(struct kvm_vcpu *vcpu); +void kvm_hisi_reload_lsudvmbm(struct kvm *kvm); +#else +static inline void probe_hisi_cpu_type(void) {} +static inline bool hisi_ncsnp_supported(void) +{ + return false; +} +static inline bool hisi_dvmbm_supported(void) +{ + return false; +} +static inline bool hisi_ipiv_supported(void) +{ + return false; +} +static inline void kvm_get_pg_cfg(void) {} +static inline void ipiv_gicd_init(void) {} + +static inline int kvm_sched_affinity_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} +static inline void kvm_sched_affinity_vcpu_destroy(struct kvm_vcpu *vcpu) {} +static inline int kvm_sched_affinity_vm_init(struct kvm *kvm) +{ + return 0; +} +static inline void kvm_sched_affinity_vm_destroy(struct kvm *kvm) {} +static inline void kvm_tlbi_dvmbm_vcpu_load(struct kvm_vcpu *vcpu) {} +static inline void kvm_tlbi_dvmbm_vcpu_put(struct kvm_vcpu *vcpu) {} +static inline void kvm_hisi_reload_lsudvmbm(struct kvm *kvm) {} +#endif /* CONFIG_KVM_HISI_VIRT */ + +extern bool gic_dist_enable_ipiv(void); +extern bool is_gicv4p1(void); +#endif /* __HISI_VIRT_H__ */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 605222ed29438847c159e5006d69a3afbcd5c7b5..506ed30295f84455e15c891f19306cd7e9911927 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -361,14 +361,14 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu, /* * We want to avoid world-switching all the DBG registers all the * time: - * + * * - If we've touched any debug register, it is likely that we're * going to touch more of them. It then makes sense to disable the * traps and start doing the save/restore dance * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is * then mandatory to save/restore the registers, as the guest * depends on them. - * + * * For this, we use a DIRTY bit, indicating the guest has modified the * debug registers, used as follow: * @@ -610,6 +610,8 @@ static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1); } +extern struct static_key_false ipiv_enable; + static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 mpidr; @@ -624,6 +626,16 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); + + if (static_branch_unlikely(&ipiv_enable)) { + /* + * To avoid sending multi-SGIs in guest OS, + * make aff1/aff2 unique + */ + mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(1); + mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(2); + } + vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9e421c7f1c074060827e09d4c3a9e1253bc13591..eb160b3af096adb6c58e606244060545f03a5797 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -447,6 +447,7 @@ void build_devid_pools(void) pr_info("ITS: reserved device id pools enabled\n"); } #endif +extern struct static_key_false ipiv_enable; /* * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we @@ -4394,11 +4395,71 @@ static void its_vpe_4_1_unmask_irq(struct irq_data *d) its_vpe_4_1_send_inv(d); } +/* IPIV private register */ +#define CPU_SYS_TRAP_EL2 sys_reg(3, 4, 15, 7, 2) +#define CPU_SYS_TRAP_EL2_IPIV_ENABLE_SHIFT 0 +#define CPU_SYS_TRAP_EL2_IPIV_ENABLE \ + (1ULL << CPU_SYS_TRAP_EL2_IPIV_ENABLE_SHIFT) + +/* + * ipiv_disable_vsgi_trap and ipiv_enable_vsgi_trap run only + * in VHE mode and in EL2. + */ +static void ipiv_disable_vsgi_trap(void) +{ +#ifdef CONFIG_ARM64 + u64 val; + + /* disable guest access ICC_SGI1R_EL1 trap, enable ipiv */ + val = read_sysreg_s(CPU_SYS_TRAP_EL2); + val |= CPU_SYS_TRAP_EL2_IPIV_ENABLE; + write_sysreg_s(val, CPU_SYS_TRAP_EL2); +#endif +} + +static void ipiv_enable_vsgi_trap(void) +{ +#ifdef CONFIG_ARM64 + u64 val; + + /* enable guest access ICC_SGI1R_EL1 trap, disable ipiv */ + val = read_sysreg_s(CPU_SYS_TRAP_EL2); + val &= ~CPU_SYS_TRAP_EL2_IPIV_ENABLE; + write_sysreg_s(val, CPU_SYS_TRAP_EL2); +#endif +} + static void its_vpe_4_1_schedule(struct its_vpe *vpe, struct its_cmd_info *info) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + struct its_vm *vm = vpe->its_vm; + unsigned long vpeid_page_addr; + u64 ipiv_val = 0; u64 val = 0; + u32 nr_vpes; + + if (static_branch_unlikely(&ipiv_enable) && + vm->nassgireq) { + /* wait gicr_ipiv_busy */ + WARN_ON_ONCE(readl_relaxed_poll_timeout_atomic(vlpi_base + + GICR_IPIV_ST, ipiv_val, !(ipiv_val + & GICR_IPIV_ST_IPIV_BUSY), 1, 500)); + vpeid_page_addr = virt_to_phys(page_address(vm->vpeid_page)); + writel_relaxed(lower_32_bits(vpeid_page_addr), + vlpi_base + GICR_VM_TABLE_BAR_L); + writel_relaxed(upper_32_bits(vpeid_page_addr), + vlpi_base + GICR_VM_TABLE_BAR_H); + + /* setup gicr_vcpu_entry_num_max and gicr_ipiv_its_ta_sel */ + nr_vpes = vpe->its_vm->nr_vpes; + ipiv_val = ((nr_vpes - 1) << + GICR_IPIV_CTRL_VCPU_ENTRY_NUM_MAX_SHIFT) | + (0 << GICR_IPIV_CTRL_IPIV_ITS_TA_SEL_SHIFT); + writel_relaxed(ipiv_val, vlpi_base + GICR_IPIV_CTRL); + + ipiv_disable_vsgi_trap(); + } /* Schedule the VPE */ val |= GICR_VPENDBASER_Valid; @@ -4413,6 +4474,7 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, struct its_cmd_info *info) { void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + struct its_vm *vm = vpe->its_vm; u64 val; if (info->req_db) { @@ -4444,6 +4506,18 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, GICR_VPENDBASER_PendingLast); vpe->pending_last = true; } + + if (static_branch_unlikely(&ipiv_enable) && + vm->nassgireq) { + /* wait gicr_ipiv_busy */ + WARN_ON_ONCE(readl_relaxed_poll_timeout_atomic(vlpi_base + + GICR_IPIV_ST, val, !(val & GICR_IPIV_ST_IPIV_BUSY), + 1, 500)); + writel_relaxed(0, vlpi_base + GICR_VM_TABLE_BAR_L); + writel_relaxed(0, vlpi_base + GICR_VM_TABLE_BAR_H); + + ipiv_enable_vsgi_trap(); + } } static void its_vpe_4_1_invall(struct its_vpe *vpe) @@ -4844,6 +4918,10 @@ static void its_vpe_irq_domain_free(struct irq_domain *domain, if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { its_lpi_free(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); its_free_prop_table(vm->vprop_page); + if (static_branch_unlikely(&ipiv_enable)) { + free_pages((unsigned long)page_address(vm->vpeid_page), + get_order(nr_irqs * 2)); + } } } @@ -4853,8 +4931,10 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq struct irq_chip *irqchip = &its_vpe_irq_chip; struct its_vm *vm = args; unsigned long *bitmap; - struct page *vprop_page; + struct page *vprop_page, *vpeid_page; int base, nr_ids, i, err = 0; + void *vpeid_table_va; + u16 *vpeid_entry; BUG_ON(!vm); @@ -4878,14 +4958,35 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq vm->nr_db_lpis = nr_ids; vm->vprop_page = vprop_page; - if (gic_rdists->has_rvpeid) + if (gic_rdists->has_rvpeid) { irqchip = &its_vpe_4_1_irq_chip; + if (static_branch_unlikely(&ipiv_enable)) { + /* + * The vpeid's size is 2 bytes, so we need to + * allocate 2 * (num of vcpus). nr_irqs is + * equal to the number of vCPUs. + */ + vpeid_page = alloc_pages(GFP_KERNEL, + get_order(nr_irqs * 2)); + if (!vpeid_page) { + its_lpi_free(bitmap, base, nr_ids); + its_free_prop_table(vprop_page); + return -ENOMEM; + } + vm->vpeid_page = vpeid_page; + vpeid_table_va = page_address(vpeid_page); + } + } for (i = 0; i < nr_irqs; i++) { vm->vpes[i]->vpe_db_lpi = base + i; err = its_vpe_init(vm->vpes[i]); if (err) break; + if (static_branch_unlikely(&ipiv_enable)) { + vpeid_entry = (u16 *)vpeid_table_va + i; + *vpeid_entry = vm->vpes[i]->vpe_id; + } err = its_irq_gic_domain_alloc(domain, virq + i, vm->vpes[i]->vpe_db_lpi); if (err) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index f875e168c873e977d8f376cdf407ae342c22b971..ff7585b98792e4497e490601e03a23a33362675f 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -87,6 +87,9 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); */ static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +DEFINE_STATIC_KEY_FALSE(ipiv_enable); +EXPORT_SYMBOL(ipiv_enable); + /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; @@ -1027,6 +1030,36 @@ static int gic_dist_supports_lpis(void) !gicv3_nolpi); } +bool is_gicv4p1(void) +{ + if (!gic_data.rdists.has_rvpeid) + return false; + + return true; +} +EXPORT_SYMBOL(is_gicv4p1); + +void gic_dist_enable_ipiv(void) +{ + u32 val; + + val = readl_relaxed(gic_data.dist_base + GICD_MISC_CTRL); + val |= GICD_MISC_CTRL_CFG_IPIV_EN; + writel_relaxed(val, gic_data.dist_base + GICD_MISC_CTRL); + static_branch_enable(&ipiv_enable); + + val = (0 << GICD_IPIV_CTRL_AFF_DIRECT_VPEID_SHIFT) | + (0 << GICD_IPIV_CTRL_AFF1_LEFT_SHIFT_SHIFT) | + (4 << GICD_IPIV_CTRL_AFF2_LEFT_SHIFT_SHIFT) | + (7 << GICD_IPIV_CTRL_VM_TABLE_INNERCACHE_SHIFT) | + (2 << GICD_IPIV_CTRL_VM_TABLE_SHAREABILITY_SHIFT); + writel_relaxed(val, gic_data.dist_base + GICD_IPIV_CTRL); + + /* Set target ITS address of IPIV feature */ + writel_relaxed(0x4880, gic_data.dist_base + GICD_IPIV_ITS_TA_BASE); +} +EXPORT_SYMBOL(gic_dist_enable_ipiv); + static void gic_cpu_init(void) { void __iomem *rbase; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c56c484b196cd4184e8f96b63bb063a5c8bf6e40..13ca13a8487e55d1d264f18ba064fece7c3d3076 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -109,6 +109,18 @@ #define GIC_PAGE_SIZE_64K 2ULL #define GIC_PAGE_SIZE_MASK 3ULL +#define GICD_MISC_CTRL 0x2084 +#define GICD_MISC_CTRL_CFG_IPIV_EN (1U << 19) + +/* IPIV private register */ +#define GICD_IPIV_CTRL 0xc05c +#define GICD_IPIV_CTRL_AFF_DIRECT_VPEID_SHIFT 4 +#define GICD_IPIV_CTRL_AFF1_LEFT_SHIFT_SHIFT 8 +#define GICD_IPIV_CTRL_AFF2_LEFT_SHIFT_SHIFT 12 +#define GICD_IPIV_CTRL_VM_TABLE_INNERCACHE_SHIFT 16 +#define GICD_IPIV_CTRL_VM_TABLE_SHAREABILITY_SHIFT 19 +#define GICD_IPIV_ITS_TA_BASE 0xc010 + /* * Re-Distributor registers, offsets from RD_base */ @@ -358,6 +370,21 @@ #define GICR_VSGIPENDR_BUSY (1U << 31) #define GICR_VSGIPENDR_PENDING GENMASK(15, 0) +/* IPIV VM table address */ +#define GICR_VM_TABLE_BAR_L 0x140 +#define GICR_VM_TABLE_BAR_H 0x144 + +#define GICR_IPIV_CTRL 0x148 +#define GICR_IPIV_CTRL_VCPU_ENTRY_NUM_MAX_SHIFT 8 +/* + * Select ITS to determine the ITS through which the IPI is sent. + */ +#define GICR_IPIV_CTRL_IPIV_ITS_TA_SEL_SHIFT 4 + +#define GICR_IPIV_ST 0x14c +#define GICR_IPIV_ST_IPIV_BUSY_SHIFT 0 +#define GICR_IPIV_ST_IPIV_BUSY (1 << GICR_IPIV_ST_IPIV_BUSY_SHIFT) + /* * ITS registers, offsets from ITS_base */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 07e8db2aa449f46086375daeb685fea11e249296..aae19c59ca6fb2c676b140230fc07723a0bf6399 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -26,6 +26,8 @@ struct its_vm { unsigned long *db_bitmap; int nr_db_lpis; u32 vlpi_count[GICv4_ITS_LIST_MAX]; + struct page *vpeid_page; +bool nassgireq; }; /* Embedded in kvm_vcpu.arch */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ebb362be8c953286f500d4ca6fef7abcd189687a..248e534379e87f5a8c995f823b5f77c6f163663a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1008,6 +1008,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 +#define KVM_CAP_ARM_IPIV_MODE 503 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 28cdd2f4d1eff766e66d9a56d198133c182b863c..93f9906e2d3ea374c08bbda41cb2edf6cbe8218a 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -44,7 +44,17 @@ __asm__(".arch_extension virt"); #endif +#include "hisilicon/hisi_virt.h" + DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); + +/* Capability of DVMBM */ +bool kvm_dvmbm_support; + +/* Capability of IPIV */ +bool kvm_ipiv_support; + + static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); /* Per-CPU variable containing the currently running vcpu. */ @@ -57,6 +67,9 @@ static DEFINE_SPINLOCK(kvm_vmid_lock); static bool vgic_present; +/* Capability of non-cacheable snooping */ +bool kvm_ncsnp_support; + static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -107,6 +120,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; + ret = kvm_sched_affinity_vm_init(kvm); + if (ret) + return ret; + ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; @@ -163,6 +180,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; + kvm_sched_affinity_vm_destroy(kvm); kvm_vgic_destroy(kvm); free_percpu(kvm->arch.last_vcpu_ran); @@ -177,6 +195,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) atomic_set(&kvm->online_vcpus, 0); } +extern struct static_key_false ipiv_enable; + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -229,6 +249,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = sdev_enable; break; #endif + case KVM_CAP_ARM_IPIV_MODE: + if (static_branch_unlikely(&ipiv_enable)) + r = 1; + else + r = 0; + break; default: r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; @@ -283,6 +309,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; + err = kvm_sched_affinity_vcpu_init(vcpu); + if (err) + goto free_vcpu; + err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); if (err) goto vcpu_uninit; @@ -315,6 +345,8 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { kvm_arch_vcpu_free(vcpu); + + kvm_sched_affinity_vcpu_destroy(vcpu); } int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) @@ -422,6 +454,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu_ptrauth_disable(vcpu); } + + kvm_tlbi_dvmbm_vcpu_load(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -435,6 +469,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->cpu = -1; kvm_arm_set_running_vcpu(NULL); + + kvm_tlbi_dvmbm_vcpu_put(vcpu); } static void vcpu_power_off(struct kvm_vcpu *vcpu) @@ -696,6 +732,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) vgic_v4_load(vcpu); preempt_enable(); } + + if (kvm_check_request(KVM_REQ_RELOAD_TLBI_DVMBM, vcpu)) + kvm_hisi_reload_lsudvmbm(vcpu->kvm); } } @@ -1769,6 +1808,20 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + probe_hisi_cpu_type(); + kvm_ncsnp_support = hisi_ncsnp_supported(); + kvm_dvmbm_support = hisi_dvmbm_supported(); + kvm_ipiv_support = hisi_ipiv_supported(); + kvm_info("KVM ncsnp %s\n", kvm_ncsnp_support ? "enabled" : "disabled"); + kvm_info("KVM dvmbm %s\n", kvm_dvmbm_support ? "enabled" : "disabled"); + kvm_info("KVM ipiv %s\n", kvm_ipiv_support ? "enabled" : "disabled"); + + if (kvm_dvmbm_support) + kvm_get_pg_cfg(); + + if (kvm_ipiv_support) + ipiv_gicd_init(); + in_hyp_mode = is_kernel_in_hyp_mode(); if (!in_hyp_mode && kvm_arch_requires_vhe()) { diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 3d04b3c539840c7e7097f9ff25b5a3ba1d7e710d..03019ae2b05cab3f5b3d24337ec7a8e32b88976d 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2015, 2016 ARM Ltd. */ - +#include #include #include #include @@ -459,11 +459,16 @@ int kvm_vgic_map_resources(struct kvm *kvm) return ret; } +extern struct static_key_false ipiv_enable; +static int ipiv_irq; + /* GENERIC PROBE */ static int vgic_init_cpu_starting(unsigned int cpu) { enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); + if (static_branch_unlikely(&ipiv_enable)) + enable_percpu_irq(ipiv_irq, 0); return 0; } @@ -471,9 +476,17 @@ static int vgic_init_cpu_starting(unsigned int cpu) static int vgic_init_cpu_dying(unsigned int cpu) { disable_percpu_irq(kvm_vgic_global_state.maint_irq); + if (static_branch_unlikely(&ipiv_enable)) + disable_percpu_irq(ipiv_irq); return 0; } +static irqreturn_t vgic_ipiv_irq_handler(int irq, void *data) +{ + kvm_info("IPIV irq handler!\n"); + return IRQ_HANDLED; +} + static irqreturn_t vgic_maintenance_handler(int irq, void *data) { /* @@ -561,6 +574,28 @@ int kvm_vgic_hyp_init(void) } kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); + + if (static_branch_unlikely(&ipiv_enable)) { + ipiv_irq = acpi_register_gsi(NULL, 18, ACPI_EDGE_SENSITIVE, + ACPI_ACTIVE_HIGH); + if (ipiv_irq < 0) { + kvm_err("No ipiv exception irq\n"); + free_percpu_irq(kvm_vgic_global_state.maint_irq, + kvm_get_running_vcpus()); + return -ENXIO; + } + + ret = request_percpu_irq(ipiv_irq, vgic_ipiv_irq_handler, + "ipiv exception", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", ipiv_irq); + free_percpu_irq(kvm_vgic_global_state.maint_irq, + kvm_get_running_vcpus()); + acpi_unregister_gsi(18); + return ret; + } + } + return 0; out_free_irq: diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 9f58d383dbc3ade3a92fac43c25f4e66ad594a7c..29991d2a831dabf1c0d5b72aed1388b4593ae83f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -129,6 +129,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, /* Switching HW SGIs? */ dist->nassgireq = val & GICD_CTLR_nASSGIreq; + dist->its_vm.nassgireq = dist->nassgireq; if (is_hwsgi != dist->nassgireq) vgic_v4_configure_vsgis(vcpu->kvm);