From e3de20aac07d94e68465b80fe1c4c90a1dc087f0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 27 Dec 2020 12:11:16 +0200 Subject: [PATCH 01/48] intel_idle: add SnowRidge C-state table mainline inclusion from mainline-v5.11-rc2 commit 9cf93f056f783f986c19f40d5304d1bcffa0fc0d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 9cf93f056f78 intel_idle: add SnowRidge C-state table. Backport intel_idle driver -------------------------------- Add C-state table for the SnowRidge SoC which is found on Intel Jacobsville platforms. The following has been changed. 1. C1E latency changed from 10us to 15us. It was measured using the open source "wult" tool (the "nic" method, 15us is the 99.99th percentile). 2. C1E power break even changed from 20us to 25us, which may result in less C1E residency in some workloads. 3. C6 latency changed from 50us to 130us. Measured the same way as C1E. The C6 C-state is supported only by some SnowRidge revisions, so add a C-state table commentary about this. On SnowRidge, C6 support is enumerated via the usual mechanism: "mwait" leaf of the "cpuid" instruction. The 'intel_idle' driver does check this leaf, so even though C6 is present in the table, the driver will only use it if the CPU does support it. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 41 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d79335506ecd..28f93b9aa51b 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -963,6 +963,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .enter = NULL } }; +/* + * Note, depending on HW and FW revision, SnowRidge SoC may or may not support + * C6, and this is indicated in the CPUID mwait leaf. + */ +static struct cpuidle_state snr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 15, + .target_residency = 25, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 130, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1084,6 +1117,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_snr __initconst = { + .state_table = snr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1122,7 +1161,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), {} }; -- Gitee From 0f8062fbabc16c3a98c001e0f29dd971f559c1b8 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 15 Jan 2021 15:56:46 -0800 Subject: [PATCH 02/48] intel_idle: remove definition of DEBUG mainline inclusion from mainline-v5.12-rc2 commit 651bc5816c39e57833fea4478c8ecfb72ad47e44 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 651bc5816c39 intel_idle: remove definition of DEBUG. Backport intel_idle driver -------------------------------- Defining DEBUG should only be done in development. So remove DEBUG. Signed-off-by: Tom Rix Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 28f93b9aa51b..3273360f30f7 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -37,7 +37,7 @@ */ /* un-comment DEBUG to enable pr_debug() statements */ -#define DEBUG +/* #define DEBUG */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -- Gitee From cf27c6dd77b5b2b94d077ed874c313ea42b5019f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 8 Mar 2021 16:31:46 +0200 Subject: [PATCH 03/48] intel_idle: update ICX C6 data mainline inclusion from mainline-v5.13-rc1 commit d484b8bfc6fa71a088e4ac85d9ce11aa0385867e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit d484b8bfc6fa intel_idle: update ICX C6 data. Backport intel_idle driver -------------------------------- Change IceLake Xeon C6 latency from 128 us to 170 us. The latency was measured with the "wult" tool and corresponds to the 99.99th percentile when measuring with the "nic" method. Note, the 128 us figure correspond to the median latency, but in intel_idle we use the "worst case" latency figure instead. C6 target residency was increased from 384 us to 600 us, which may result in less C6 residency in some workloads. This value was tested and compared to values 384, and 1000. Value 600 is a reasonable tradeoff between power and performance. Signed-off-by: Artem Bityutskiy Acked-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3273360f30f7..6cac0b748efa 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -744,8 +744,8 @@ static struct cpuidle_state icx_cstates[] __initdata = { .name = "C6", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 128, - .target_residency = 384, + .exit_latency = 170, + .target_residency = 600, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { -- Gitee From 6dd726125f90f12f331ca13c9a7ebd6ec00cd27f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 7 Apr 2021 09:10:28 +0300 Subject: [PATCH 04/48] intel_idle: add Iclelake-D support mainline inclusion from mainline-v5.13-rc1 commit 22141d5f411895bb1b0df2a6b05f702e11e63918 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 22141d5f4118 intel_idle: add Iclelake-D support. Backport intel_idle driver -------------------------------- This patch adds Icelake Xeon D support to the intel_idle driver. Since Icelake D and Icelake SP C-state characteristics the same, we use Icelake SP C-states table for Icelake D as well. Signed-off-by: Artem Bityutskiy Acked-by: Chen Yu Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 6cac0b748efa..ec1b9d306ba6 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1156,6 +1156,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), -- Gitee From 3a5ec17e30b976073001ae9b76968db5ea8e3a51 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 28 May 2021 11:20:54 +0800 Subject: [PATCH 05/48] intel_idle: Adjust the SKX C6 parameters if PC6 is disabled mainline inclusion from mainline-v5.14-rc1 commit 64233338499126c5c31e07165735ab5441c7e45a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 642333384991 intel_idle: Adjust the SKX C6 parameters if PC6 is disabled. Backport intel_idle driver -------------------------------- Because cpuidle assumes worst-case C-state parameters, PC6 parameters are used for describing C6, which is worst-case for requesting CC6. When PC6 is enabled, this is appropriate. But if PC6 is disabled in the BIOS, the exit latency and target residency should be adjusted accordingly. Exit latency: Previously the C6 exit latency was measured as the PC6 exit latency. With PC6 disabled, the C6 exit latency should be the one of CC6. Target residency: With PC6 disabled, the idle duration within [CC6, PC6) would make the idle governor choose C1E over C6. This would cause low energy-efficiency. We should lower the bar to request C6 when PC6 is disabled. To fill this gap, check if PC6 is disabled in the BIOS in the MSR_PKG_CST_CONFIG_CONTROL(0xe2) register. If so, use the CC6 exit latency for C6 and set target_residency to 3 times of the new exit latency. [This is consistent with how intel_idle driver uses _CST to calculate the target_residency.] As a result, the OS would be more likely to choose C6 over C1E when PC6 is disabled, which is reasonable, because if C6 is enabled, it implies that the user cares about energy, so choosing C6 more frequently makes sense. The new CC6 exit latency of 92us was measured with wult[1] on SKX via NIC wakeup as the 99.99th percentile. Also CLX and CPX both have the same CPU model number as SkX, but their CC6 exit latencies are similar to the SKX one, 96us and 89us respectively, so reuse the SKX value for them. There is a concern that it might be better to use a more generic approach instead of optimizing every platform. However, if the required code complexity and different PC6 bit interpretation on different platforms are taken into account, tuning the code per platform seems to be an acceptable tradeoff. Link: https://intel.github.io/wult/ # [1] Suggested-by: Len Brown Signed-off-by: Chen Yu Reviewed-by: Artem Bityutskiy [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index ec1b9d306ba6..e6c543b5ee1d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1484,6 +1484,36 @@ static void __init sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } +/** + * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake + * idle states table. + */ +static void __init skx_idle_state_table_update(void) +{ + unsigned long long msr; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* + * 000b: C0/C1 (no package C-state support) + * 001b: C2 + * 010b: C6 (non-retention) + * 011b: C6 (retention) + * 111b: No Package C state limits. + */ + if ((msr & 0x7) < 2) { + /* + * Uses the CC6 + PC0 latency and 3 times of + * latency for target_residency if the PC6 + * is disabled in BIOS. This is consistent + * with how intel_idle driver uses _CST + * to set the target_residency. + */ + skx_cstates[2].exit_latency = 92; + skx_cstates[2].target_residency = 276; + } +} + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; @@ -1515,6 +1545,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SKYLAKE: sklh_idle_state_table_update(); break; + case INTEL_FAM6_SKYLAKE_X: + skx_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { -- Gitee From 839940d9189b2a54cd761b666f2c0a9fbde2c3a5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 17 Sep 2021 10:20:22 +0300 Subject: [PATCH 06/48] intel_idle: enable interrupts before C1 on Xeons mainline inclusion from mainline-v5.16-rc1 commit c227233ad64c77e57db738ab0e46439db71822a3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit c227233ad64c intel_idle: enable interrupts before C1 on Xeons. Backport intel_idle driver -------------------------------- Enable local interrupts before requesting C1 on the last two generations of Intel Xeon platforms: Sky Lake, Cascade Lake, Cooper Lake, Ice Lake. This decreases average C1 interrupt latency by about 5-10%, as measured with the 'wult' tool. The '->enter()' function of the driver enters C-states with local interrupts disabled by executing the 'monitor' and 'mwait' pair of instructions. If an interrupt happens, the CPU exits the C-state and continues executing instructions after 'mwait'. It does not jump to the interrupt handler, because local interrupts are disabled. The cpuidle subsystem enables interrupts a bit later, after doing some housekeeping. With this patch, we enable local interrupts before requesting C1. In this case, if the CPU wakes up because of an interrupt, it will jump to the interrupt handler right away. The cpuidle housekeeping will be done after the pending interrupt(s) are handled. Enabling interrupts before entering a C-state has measurable impact for faster C-states, like C1. Deeper, but slower C-states like C6 do not really benefit from this sort of change, because their latency is a lot higher comparing to the delay added by cpuidle housekeeping. This change was also tested with cyclictest and dbench. In case of Ice Lake, the average cyclictest latency decreased by 5.1%, and the average 'dbench' throughput increased by about 0.8%. Both tests were run for 4 hours with only C1 enabled (all other idle states, including 'POLL', were disabled). CPU frequency was pinned to HFM, and uncore frequency was pinned to the maximum value. The other platforms had similar single-digit percentage improvements. It is worth noting that this patch affects 'cpuidle' statistics a tiny bit. Before this patch, C1 residency did not include the interrupt handling time, but with this patch, it will include it. This is similar to what happens in case of the 'POLL' state, which also runs with interrupts enabled. Suggested-by: Len Brown Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e6c543b5ee1d..0b66e25c0e2d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -88,6 +88,12 @@ static struct cpuidle_state *cpuidle_state_table __initdata; static unsigned int mwait_substates __initdata; +/* + * Enable interrupts before entering the C-state. On some platforms and for + * some C-states, this may measurably decrease interrupt latency. + */ +#define CPUIDLE_FLAG_IRQ_ENABLE BIT(14) + /* * Enable this state by default even if the ACPI _CST does not list it. */ @@ -127,6 +133,9 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, unsigned long eax = flg2MWAIT(state->flags); unsigned long ecx = 1; /* break on interrupt flag */ + if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) + local_irq_enable(); + mwait_idle_with_hints(eax, ecx); return index; @@ -698,7 +707,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", - .flags = MWAIT2flg(0x00), + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 2, .target_residency = 2, .enter = &intel_idle, @@ -727,7 +736,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { { .name = "C1", .desc = "MWAIT 0x00", - .flags = MWAIT2flg(0x00), + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE, .exit_latency = 1, .target_residency = 1, .enter = &intel_idle, -- Gitee From 23be0951971714eda30d4a62f53e72e70fef6bed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 2 Mar 2022 10:15:58 +0200 Subject: [PATCH 07/48] intel_idle: add SPR support mainline inclusion from mainline-v5.18-rc1 commit 9edf3c0ffef0ec1bed8300315852b5c6a0997130 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 9edf3c0ffef0 intel_idle: add SPR support. Backport intel_idle driver -------------------------------- Add Sapphire Rapids Xeon support. Up until very recently, the C1 and C1E C-states were independent, but this has changed in some new chips, including Sapphire Rapids Xeon (SPR). In these chips the C1 and C1E states cannot be enabled at the same time. The "C1E promotion" bit in 'MSR_IA32_POWER_CTL' also has its semantics changed a bit. Here are the C1, C1E, and "C1E promotion" bit rules on Xeons before SPR. 1. If C1E promotion bit is disabled. a. C1 requests end up with C1 C-state. b. C1E requests end up with C1E C-state. 2. If C1E promotion bit is enabled. a. C1 requests end up with C1E C-state. b. C1E requests end up with C1E C-state. Here are the C1, C1E, and "C1E promotion" bit rules on Sapphire Rapids Xeon. 1. If C1E promotion bit is disabled. a. C1 requests end up with C1 C-state. b. C1E requests end up with C1 C-state. 2. If C1E promotion bit is enabled. a. C1 requests end up with C1E C-state. b. C1E requests end up with C1E C-state. Before SPR Xeon, the 'intel_idle' driver was disabling C1E promotion and was exposing C1 and C1E as independent C-states. But on SPR, C1 and C1E cannot be enabled at the same time. This patch adds both C1 and C1E states. However, C1E is marked as with the "CPUIDLE_FLAG_UNUSABLE" flag, which means that in won't be registered by default. The C1E promotion bit will be cleared, which means that by default only C1 and C6 will be registered on SPR. The next patch will add an option for enabling C1E and disabling C1 on SPR. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 47 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 0b66e25c0e2d..1c7c25909e54 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -761,6 +761,46 @@ static struct cpuidle_state icx_cstates[] __initdata = { .enter = NULL } }; +/* + * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice + * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in + * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 + * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then + * both C1 and C1E requests end up with C1, so there is effectively no C1E. + * + * By default we enable C1 and disable C1E by marking it with + * 'CPUIDLE_FLAG_UNUSABLE'. + */ +static struct cpuidle_state spr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ + CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 290, + .target_residency = 800, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state atom_cstates[] __initdata = { { .name = "C1E", @@ -1104,6 +1144,12 @@ static const struct idle_cpu idle_cpu_icx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_spr __initconst = { + .state_table = spr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_avn __initconst = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, @@ -1166,6 +1212,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), -- Gitee From 8319c2b0f1f31915a17acd5b678bd1d782c9c5e7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 2 Mar 2022 10:15:59 +0200 Subject: [PATCH 08/48] intel_idle: add 'preferred_cstates' module argument mainline inclusion from mainline-v5.18-rc1 commit da0e58c038e60e7e65d30813ebdfe91687aa8a24 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit da0e58c038e6 intel_idle: add 'preferred_cstates' module argument. Backport intel_idle driver -------------------------------- On Sapphire Rapids Xeon (SPR) the C1 and C1E states are basically mutually exclusive - only one of them can be enabled. By default, 'intel_idle' driver enables C1 and disables C1E. However, some users prefer to use C1E instead of C1, because it saves more energy. This patch adds a new module parameter ('preferred_cstates') for enabling C1E and disabling C1. Here is the idea behind it. 1. This option has effect only for "mutually exclusive" C-states like C1 and C1E on SPR. 2. It does not have any effect on independent C-states, which do not require other C-states to be disabled (most states on most platforms as of today). 3. For mutually exclusive C-states, the 'intel_idle' driver always has a reasonable default, such as enabling C1 on SPR by default. On other platforms, the default may be different. 4. Users can override the default using the 'preferred_cstates' parameter. 5. The parameter accepts the preferred C-states bit-mask, similarly to the existing 'states_off' parameter. 6. This parameter is not limited to C1/C1E, and leaves room for supporting other mutually exclusive C-states, if they come in the future. Today 'intel_idle' can only be compiled-in, which means that on SPR, in order to disable C1 and enable C1E, users should boot with the following kernel argument: intel_idle.preferred_cstates=4 Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 46 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 1c7c25909e54..b2688c326522 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -64,6 +64,7 @@ static struct cpuidle_driver intel_idle_driver = { /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; static unsigned int disabled_states_mask; +static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; @@ -1400,6 +1401,8 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +static void c1e_promotion_enable(void); + /** * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * @@ -1570,6 +1573,26 @@ static void __init skx_idle_state_table_update(void) } } +/** + * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. + */ +static void __init spr_idle_state_table_update(void) +{ + /* Check if user prefers C1E over C1. */ + if (preferred_states_mask & BIT(2)) { + if (preferred_states_mask & BIT(1)) + /* Both can't be enabled, stick to the defaults. */ + return; + + spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; + spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + + /* Enable C1E using the "C1E promotion" bit. */ + c1e_promotion_enable(); + disable_promotion_to_c1e = false; + } +} + static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; @@ -1604,6 +1627,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SKYLAKE_X: skx_idle_state_table_update(); break; + case INTEL_FAM6_SAPPHIRERAPIDS_X: + spr_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { @@ -1676,6 +1702,15 @@ static void auto_demotion_disable(void) wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); } +static void c1e_promotion_enable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits |= 0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + static void c1e_promotion_disable(void) { unsigned long long msr_bits; @@ -1845,3 +1880,14 @@ module_param(max_cstate, int, 0444); */ module_param_named(states_off, disabled_states_mask, uint, 0444); MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); +/* + * Some platforms come with mutually exclusive C-states, so that if one is + * enabled, the other C-states must not be used. Example: C1 and C1E on + * Sapphire Rapids platform. This parameter allows for selecting the + * preferred C-states among the groups of mutually exclusive C-states - the + * selected C-states will be registered, the other C-states from the mutually + * exclusive group won't be registered. If the platform has no mutually + * exclusive C-states, this parameter has no effect. + */ +module_param_named(preferred_cstates, preferred_states_mask, uint, 0444); +MODULE_PARM_DESC(preferred_cstates, "Mask of preferred idle states"); -- Gitee From dc85adcb572c8efdd2b85ae2ade6346dcadef52a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 2 Mar 2022 10:16:00 +0200 Subject: [PATCH 09/48] intel_idle: add core C6 optimization for SPR mainline inclusion from mainline-v5.18-rc1 commit 3a9cf77b60dc9839b6674943bb7c9dcd524b6294 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 3a9cf77b60dc intel_idle: add core C6 optimization for SPR. Backport intel_idle driver -------------------------------- Add a Sapphire Rapids Xeon C6 optimization, similar to what we have for Sky Lake Xeon: if package C6 is disabled, adjust C6 exit latency and target residency to match core C6 values, instead of using the default package C6 values. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b2688c326522..e385ddf15b32 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1578,6 +1578,8 @@ static void __init skx_idle_state_table_update(void) */ static void __init spr_idle_state_table_update(void) { + unsigned long long msr; + /* Check if user prefers C1E over C1. */ if (preferred_states_mask & BIT(2)) { if (preferred_states_mask & BIT(1)) @@ -1591,6 +1593,19 @@ static void __init spr_idle_state_table_update(void) c1e_promotion_enable(); disable_promotion_to_c1e = false; } + + /* + * By default, the C6 state assumes the worst-case scenario of package + * C6. However, if PC6 is disabled, we update the numbers to match + * core C6. + */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); + + /* Limit value 2 and above allow for PC6. */ + if ((msr & 0x7) < 2) { + spr_cstates[2].exit_latency = 190; + spr_cstates[2].target_residency = 600; + } } static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) -- Gitee From 54ba645bed8c8aa9722fc9c03a35c51789eed326 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Mar 2022 20:35:47 +0100 Subject: [PATCH 10/48] cpuidle: intel_idle: Update intel_idle() kerneldoc comment mainline inclusion from mainline-v5.18-rc1 commit a335b1e6bb29300d3bc6749763a4298627e594ba category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit a335b1e6bb29 cpuidle: intel_idle: Update intel_idle() kerneldoc comment. Backport intel_idle driver -------------------------------- Commit bf9282dc26e7 ("cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic") moved the leave_mm() call away from intel_idle(), but it didn't update its kerneldoc comment accordingly, so do that now. Fixes: bf9282dc26e7 ("cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic") Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index e385ddf15b32..4ba4ab974dbe 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -122,9 +122,6 @@ static unsigned int mwait_substates __initdata; * If the local APIC timer is not known to be reliable in the target idle state, * enable one-shot tick broadcasting for the target CPU before executing MWAIT. * - * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to - * flushing user TLBs. - * * Must be called under local_irq_disable(). */ static __cpuidle int intel_idle(struct cpuidle_device *dev, -- Gitee From 5c3af46f79a48b2eb7e5faff1f8dcdd2a2e44a78 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Mar 2022 20:36:42 +0100 Subject: [PATCH 11/48] cpuidle: intel_idle: Drop redundant backslash at line end mainline inclusion from mainline-v5.18-rc1 commit 03eb65224e5711e7a2f34b500d44866b322a249a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 03eb65224e57 cpuidle: intel_idle: Drop redundant backslash at line end. Backport intel_idle driver -------------------------------- Drop a redundant backslash character at the end of a line in the spr_cstates[] definition. Signed-off-by: Rafael J. Wysocki Acked-by: Artem Bityutskiy Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 4ba4ab974dbe..b7640cfe0020 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -781,7 +781,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | \ + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | CPUIDLE_FLAG_UNUSABLE, .exit_latency = 2, .target_residency = 4, -- Gitee From eed9fe8489aafbc28e77b4fd36b095801ed17c76 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 27 Apr 2022 09:08:52 +0300 Subject: [PATCH 12/48] intel_idle: Fix the 'preferred_cstates' module parameter mainline inclusion from mainline-v5.18-rc5 commit 39c184a6a9a7a99950b321d55fe713175cf1d404 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 39c184a6a9a7 intel_idle: Fix the 'preferred_cstates' module parameter. Backport for intel_idle driver -------------------------------- Problem description. When user boots kernel up with the 'intel_idle.preferred_cstates=4' option, we enable C1E and disable C1 states on Sapphire Rapids Xeon (SPR). In order for C1E to work on SPR, we have to enable the C1E promotion bit on all CPUs. However, we enable it only on one CPU. Fix description. The 'intel_idle' driver already has the infrastructure for disabling C1E promotion on every CPU. This patch uses the same infrastructure for enabling C1E promotion on every CPU. It changes the boolean 'disable_promotion_to_c1e' variable to a tri-state 'c1e_promotion' variable. Tested on a 2-socket SPR system. I verified the following combinations: * C1E promotion enabled and disabled in BIOS. * Booted with and without the 'intel_idle.preferred_cstates=4' kernel argument. In all 4 cases C1E promotion was correctly set on all CPUs. Also tested on an old Broadwell system, just to make sure it does not cause a regression. C1E promotion was correctly disabled on that system, both C1 and C1E were exposed (as expected). Fixes: da0e58c038e6 ("intel_idle: add 'preferred_cstates' module argument") Reported-by: Jan Beulich Signed-off-by: Artem Bityutskiy [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b7640cfe0020..cf5ed4c1d02c 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -69,7 +69,12 @@ static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; -static bool disable_promotion_to_c1e; + +static enum { + C1E_PROMOTION_PRESERVE, + C1E_PROMOTION_ENABLE, + C1E_PROMOTION_DISABLE +} c1e_promotion = C1E_PROMOTION_PRESERVE; struct idle_cpu { struct cpuidle_state *state_table; @@ -1398,8 +1403,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -static void c1e_promotion_enable(void); - /** * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * @@ -1587,8 +1590,7 @@ static void __init spr_idle_state_table_update(void) spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; /* Enable C1E using the "C1E promotion" bit. */ - c1e_promotion_enable(); - disable_promotion_to_c1e = false; + c1e_promotion = C1E_PROMOTION_ENABLE; } /* @@ -1754,7 +1756,9 @@ static int intel_idle_cpu_init(unsigned int cpu) if (auto_demotion_disable_flags) auto_demotion_disable(); - if (disable_promotion_to_c1e) + if (c1e_promotion == C1E_PROMOTION_ENABLE) + c1e_promotion_enable(); + else if (c1e_promotion == C1E_PROMOTION_DISABLE) c1e_promotion_disable(); return 0; @@ -1833,7 +1837,8 @@ static int __init intel_idle_init(void) if (icpu) { cpuidle_state_table = icpu->state_table; auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; - disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->disable_promotion_to_c1e) + c1e_promotion = C1E_PROMOTION_DISABLE; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { -- Gitee From 2b0581cc4a97a912816e3d52990ee43f263716eb Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 27 Apr 2022 09:08:53 +0300 Subject: [PATCH 13/48] intel_idle: Fix SPR C6 optimization mainline inclusion from mainline-v5.18-rc5 commit 7eac3bd38d18cd3317756649921b8264ddfee692 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I5BECY CVE: NA Intel-SIG: commit 7eac3bd38d18 intel_idle: Fix SPR C6 optimization. Backport for intel_idle driver -------------------------------- The Sapphire Rapids (SPR) C6 optimization was added to the end of the 'spr_idle_state_table_update()' function. However, the function has a 'return' which may happen before the optimization has a chance to run. And this may prevent the optimization from happening. This is an unlikely scenario, but possible if user boots with, say, the 'intel_idle.preferred_cstates=6' kernel boot option. This patch fixes the issue by eliminating the problematic 'return' statement. Fixes: 3a9cf77b60dc ("intel_idle: add core C6 optimization for SPR") Suggested-by: Jan Beulich Reported-by: Jan Beulich Signed-off-by: Artem Bityutskiy [ rjw: Minor changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/idle/intel_idle.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index cf5ed4c1d02c..47551ab73ca8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1581,11 +1581,9 @@ static void __init spr_idle_state_table_update(void) unsigned long long msr; /* Check if user prefers C1E over C1. */ - if (preferred_states_mask & BIT(2)) { - if (preferred_states_mask & BIT(1)) - /* Both can't be enabled, stick to the defaults. */ - return; - + if ((preferred_states_mask & BIT(2)) && + !(preferred_states_mask & BIT(1))) { + /* Disable C1 and enable C1E. */ spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; -- Gitee From 879d4bbc4747e1f79dae81dd031562482e4f3d1c Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 28 Oct 2020 18:44:45 -0700 Subject: [PATCH 14/48] PCI: Add defines for Designated Vendor-Specific Extended Capability mainline inclusion from mainline-v5.11-rc1 commit 1dc2da5cd51f648de6d1df87e2bc6ea13f72f19c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 1dc2da5cd51f PCI: Add defines for Designated Vendor-Specific Extended Capability. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Add PCIe Designated Vendor-Specific Extended Capability (DVSEC) and defines for the header offsets. Defined in PCIe r5.0, sec 7.9.6. Signed-off-by: David E. Box Acked-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- include/uapi/linux/pci_regs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 7e0d526dd96f..9e257fe9efa5 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -729,6 +729,7 @@ #define PCI_EXT_CAP_ID_DPC 0x1D /* Downstream Port Containment */ #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ +#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ #define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT @@ -1079,6 +1080,10 @@ #define PCI_L1SS_CTL1_LTR_L12_TH_SCALE 0xe0000000 /* LTR_L1.2_THRESHOLD_Scale */ #define PCI_L1SS_CTL2 0x0c /* Control 2 Register */ +/* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ +#define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +#define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ + /* Data Link Feature */ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ -- Gitee From ddce0a7181d1b6da69ec1d0452598c82d8a4c183 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 28 Oct 2020 18:55:33 -0700 Subject: [PATCH 15/48] mfd: Intel Platform Monitoring Technology support mainline inclusion from mainline-v5.11-rc1 commit 4f8217d5b0ca8ace78a27dc371b87697eedc421d category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 4f8217d5b0ca mfd: Intel Platform Monitoring Technology support. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Intel Platform Monitoring Technology (PMT) is an architecture for enumerating and accessing hardware monitoring facilities. PMT supports multiple types of monitoring capabilities. This driver creates platform devices for each type so that they may be managed by capability specific drivers (to be introduced). Capabilities are discovered using PCIe DVSEC ids. Support is included for the 3 current capability types, Telemetry, Watcher, and Crashlog. The features are available on new Intel platforms starting from Tiger Lake for which support is added. This patch adds support for Tiger Lake (TGL), Alder Lake (ADL), and Out-of-Band Management Services Module (OOBMSM). Also add a quirk mechanism for several early hardware differences and bugs. For Tiger Lake and Alder Lake, do not support Watcher and Crashlog capabilities since they will not be compatible with future product. Also, fix use a quirk to fix the discovery table offset. Co-developed-by: Alexander Duyck Signed-off-by: Alexander Duyck Signed-off-by: David E. Box Reviewed-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- MAINTAINERS | 5 + drivers/mfd/Kconfig | 10 ++ drivers/mfd/Makefile | 1 + drivers/mfd/intel_pmt.c | 223 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 239 insertions(+) create mode 100644 drivers/mfd/intel_pmt.c diff --git a/MAINTAINERS b/MAINTAINERS index 23a23bd94c00..cdffd5192e09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9038,6 +9038,11 @@ F: drivers/mfd/intel_soc_pmic* F: include/linux/mfd/intel_msic.h F: include/linux/mfd/intel_soc_pmic* +INTEL PMT DRIVER +M: "David E. Box" +S: Maintained +F: drivers/mfd/intel_pmt.c + INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT M: Stanislav Yakovlev L: linux-wireless@vger.kernel.org diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 15680c3c9279..3f9f84f9f288 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -699,6 +699,16 @@ config MFD_INTEL_PMC_BXT Register and P-unit access. In addition this creates devices for iTCO watchdog and telemetry that are part of the PMC. +config MFD_INTEL_PMT + tristate "Intel Platform Monitoring Technology (PMT) support" + depends on PCI + select MFD_CORE + help + The Intel Platform Monitoring Technology (PMT) is an interface that + provides access to hardware monitor registers. This driver supports + Telemetry, Watcher, and Crashlog PMT capabilities/devices for + platforms starting from Tiger Lake. + config MFD_IPAQ_MICRO bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support" depends on SA1100_H3100 || SA1100_H3600 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index fb1df45a301e..ce8f1c0583d5 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -216,6 +216,7 @@ obj-$(CONFIG_MFD_INTEL_LPSS_PCI) += intel-lpss-pci.o obj-$(CONFIG_MFD_INTEL_LPSS_ACPI) += intel-lpss-acpi.o obj-$(CONFIG_MFD_INTEL_MSIC) += intel_msic.o obj-$(CONFIG_MFD_INTEL_PMC_BXT) += intel_pmc_bxt.o +obj-$(CONFIG_MFD_INTEL_PMT) += intel_pmt.o obj-$(CONFIG_MFD_PALMAS) += palmas.o obj-$(CONFIG_MFD_VIPERBOARD) += viperboard.o obj-$(CONFIG_MFD_RC5T583) += rc5t583.o rc5t583-irq.o diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c new file mode 100644 index 000000000000..744b230cdcca --- /dev/null +++ b/drivers/mfd/intel_pmt.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitoring Technology PMT driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: David E. Box + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Intel DVSEC capability vendor space offsets */ +#define INTEL_DVSEC_ENTRIES 0xA +#define INTEL_DVSEC_SIZE 0xB +#define INTEL_DVSEC_TABLE 0xC +#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) +#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) +#define INTEL_DVSEC_ENTRY_SIZE 4 + +/* PMT capabilities */ +#define DVSEC_INTEL_ID_TELEMETRY 2 +#define DVSEC_INTEL_ID_WATCHER 3 +#define DVSEC_INTEL_ID_CRASHLOG 4 + +struct intel_dvsec_header { + u16 length; + u16 id; + u8 num_entries; + u8 entry_size; + u8 tbir; + u32 offset; +}; + +enum pmt_quirks { + /* Watcher capability not supported */ + PMT_QUIRK_NO_WATCHER = BIT(0), + + /* Crashlog capability not supported */ + PMT_QUIRK_NO_CRASHLOG = BIT(1), + + /* Use shift instead of mask to read discovery table offset */ + PMT_QUIRK_TABLE_SHIFT = BIT(2), +}; + +struct pmt_platform_info { + unsigned long quirks; +}; + +static const struct pmt_platform_info tgl_info = { + .quirks = PMT_QUIRK_NO_WATCHER | PMT_QUIRK_NO_CRASHLOG | + PMT_QUIRK_TABLE_SHIFT, +}; + +static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header, + unsigned long quirks) +{ + struct device *dev = &pdev->dev; + struct resource *res, *tmp; + struct mfd_cell *cell; + const char *name; + int count = header->num_entries; + int size = header->entry_size; + int id = header->id; + int i; + + switch (id) { + case DVSEC_INTEL_ID_TELEMETRY: + name = "pmt_telemetry"; + break; + case DVSEC_INTEL_ID_WATCHER: + if (quirks & PMT_QUIRK_NO_WATCHER) { + dev_info(dev, "Watcher not supported\n"); + return 0; + } + name = "pmt_watcher"; + break; + case DVSEC_INTEL_ID_CRASHLOG: + if (quirks & PMT_QUIRK_NO_CRASHLOG) { + dev_info(dev, "Crashlog not supported\n"); + return 0; + } + name = "pmt_crashlog"; + break; + default: + dev_err(dev, "Unrecognized PMT capability: %d\n", id); + return -EINVAL; + } + + if (!header->num_entries || !header->entry_size) { + dev_err(dev, "Invalid count or size for %s header\n", name); + return -EINVAL; + } + + cell = devm_kzalloc(dev, sizeof(*cell), GFP_KERNEL); + if (!cell) + return -ENOMEM; + + res = devm_kcalloc(dev, count, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + if (quirks & PMT_QUIRK_TABLE_SHIFT) + header->offset >>= 3; + + /* + * The PMT DVSEC contains the starting offset and count for a block of + * discovery tables, each providing access to monitoring facilities for + * a section of the device. Create a resource list of these tables to + * provide to the driver. + */ + for (i = 0, tmp = res; i < count; i++, tmp++) { + tmp->start = pdev->resource[header->tbir].start + + header->offset + i * (size << 2); + tmp->end = tmp->start + (size << 2) - 1; + tmp->flags = IORESOURCE_MEM; + } + + cell->resources = res; + cell->num_resources = count; + cell->name = name; + + return devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, cell, 1, NULL, 0, + NULL); +} + +static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct pmt_platform_info *info; + unsigned long quirks = 0; + bool found_devices = false; + int ret, pos = 0; + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + info = (struct pmt_platform_info *)id->driver_data; + + if (info) + quirks = info->quirks; + + do { + struct intel_dvsec_header header; + u32 table; + u16 vid; + + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); + if (!pos) + break; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid); + if (vid != PCI_VENDOR_ID_INTEL) + continue; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, + &header.id); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, + &header.num_entries); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, + &header.entry_size); + pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, + &table); + + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + + ret = pmt_add_dev(pdev, &header, quirks); + if (ret) { + dev_warn(&pdev->dev, + "Failed to add device for DVSEC id %d\n", + header.id); + continue; + } + + found_devices = true; + } while (true); + + if (!found_devices) + return -ENODEV; + + pm_runtime_put(&pdev->dev); + pm_runtime_allow(&pdev->dev); + + return 0; +} + +static void pmt_pci_remove(struct pci_dev *pdev) +{ + pm_runtime_forbid(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); +} + +#define PCI_DEVICE_ID_INTEL_PMT_ADL 0x467d +#define PCI_DEVICE_ID_INTEL_PMT_OOBMSM 0x09a7 +#define PCI_DEVICE_ID_INTEL_PMT_TGL 0x9a0d +static const struct pci_device_id pmt_pci_ids[] = { + { PCI_DEVICE_DATA(INTEL, PMT_ADL, &tgl_info) }, + { PCI_DEVICE_DATA(INTEL, PMT_OOBMSM, NULL) }, + { PCI_DEVICE_DATA(INTEL, PMT_TGL, &tgl_info) }, + { } +}; +MODULE_DEVICE_TABLE(pci, pmt_pci_ids); + +static struct pci_driver pmt_pci_driver = { + .name = "intel-pmt", + .id_table = pmt_pci_ids, + .probe = pmt_pci_probe, + .remove = pmt_pci_remove, +}; +module_pci_driver(pmt_pci_driver); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel Platform Monitoring Technology PMT driver"); +MODULE_LICENSE("GPL v2"); -- Gitee From 81b9ddf8fa3e6b6b208a2385ce48863122d2eacc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 28 Oct 2020 18:55:34 -0700 Subject: [PATCH 16/48] platform/x86: Intel PMT class driver mainline inclusion from mainline-v5.11-rc1 commit e2729113ce66d8d21f729b41bc3ed3feaf1acf69 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit e2729113ce66 platform/x86: Intel PMT class driver. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Intel Platform Monitoring Technology is meant to provide a common way to access telemetry and system metrics. Register mappings are not provided by the driver. Instead, a GUID is read from a header for each endpoint. The GUID identifies the device and is to be used with an XML, provided by the vendor, to discover the available set of metrics and their register mapping. This allows firmware updates to modify the register space without needing to update the driver every time with new mappings. Firmware writes a new GUID in this case to specify the new mapping. Software tools with access to the associated XML file can then interpret the changes. The module manages access to all Intel PMT endpoints on a system, independent of the device exporting them. It creates an intel_pmt class to manage the devices. For each telemetry endpoint, sysfs files provide GUID and size information as well as a pointer to the parent device the telemetry came from. Software may discover the association between endpoints and devices by iterating through the list in sysfs, or by looking for the existence of the class folder under the device of interest. A binary sysfs attribute of the same name allows software to then read or map the telemetry space for direct access. Signed-off-by: Alexander Duyck Signed-off-by: David E. Box Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- .../ABI/testing/sysfs-class-intel_pmt | 54 ++++ MAINTAINERS | 1 + drivers/platform/x86/Kconfig | 12 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/intel_pmt_class.c | 297 ++++++++++++++++++ drivers/platform/x86/intel_pmt_class.h | 52 +++ 6 files changed, 417 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-intel_pmt create mode 100644 drivers/platform/x86/intel_pmt_class.c create mode 100644 drivers/platform/x86/intel_pmt_class.h diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt b/Documentation/ABI/testing/sysfs-class-intel_pmt new file mode 100644 index 000000000000..926b5cf95fd1 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-intel_pmt @@ -0,0 +1,54 @@ +What: /sys/class/intel_pmt/ +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + The intel_pmt/ class directory contains information for + devices that expose hardware telemetry using Intel Platform + Monitoring Technology (PMT) + +What: /sys/class/intel_pmt/telem +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + The telem directory contains files describing an instance of + a PMT telemetry device that exposes hardware telemetry. Each + telem directory has an associated telem file. This file + may be opened and mapped or read to access the telemetry space + of the device. The register layout of the telemetry space is + determined from an XML file that matches the PCI device id and + GUID for the device. + +What: /sys/class/intel_pmt/telem/telem +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The telemetry data for this telemetry device. This file + may be mapped or read to obtain the data. + +What: /sys/class/intel_pmt/telem/guid +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The GUID for this telemetry device. The GUID identifies + the version of the XML file for the parent device that is to + be used to get the register layout. + +What: /sys/class/intel_pmt/telem/size +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The size of telemetry region in bytes that corresponds to + the mapping size for the telem file. + +What: /sys/class/intel_pmt/telem/offset +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The offset of telemetry region in bytes that corresponds to + the mapping for the telem file. diff --git a/MAINTAINERS b/MAINTAINERS index cdffd5192e09..466b1c599848 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9042,6 +9042,7 @@ INTEL PMT DRIVER M: "David E. Box" S: Maintained F: drivers/mfd/intel_pmt.c +F: drivers/platform/x86/intel_pmt_* INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT M: Stanislav Yakovlev diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index a1858689d6e1..9118f76ed3e9 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1362,6 +1362,18 @@ config INTEL_PMC_CORE - LTR Ignore - MPHY/PLL gating status (Sunrisepoint PCH only) +config INTEL_PMT_CLASS + tristate "Intel Platform Monitoring Technology (PMT) Class driver" + help + The Intel Platform Monitoring Technology (PMT) class driver provides + the basic sysfs interface and file hierarchy uses by PMT devices. + + For more information, see: + + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_class. + config INTEL_PUNIT_IPC tristate "Intel P-Unit IPC Driver" help diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 5f823f7eff45..f4b1f87f2401 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -140,6 +140,7 @@ obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_MRFLD_PWRBTN) += intel_mrfld_pwrbtn.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o intel_pmc_core_pltdrv.o +obj-$(CONFIG_INTEL_PMT_CLASS) += intel_pmt_class.o obj-$(CONFIG_INTEL_PUNIT_IPC) += intel_punit_ipc.o obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c new file mode 100644 index 000000000000..aa88dc23bbde --- /dev/null +++ b/drivers/platform/x86/intel_pmt_class.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitory Technology Telemetry driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "Alexander Duyck" + */ + +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define PMT_XA_START 0 +#define PMT_XA_MAX INT_MAX +#define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX) + +/* + * sysfs + */ +static ssize_t +intel_pmt_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + struct intel_pmt_entry *entry = container_of(attr, + struct intel_pmt_entry, + pmt_bin_attr); + + if (off < 0) + return -EINVAL; + + if (off >= entry->size) + return 0; + + if (count > entry->size - off) + count = entry->size - off; + + memcpy_fromio(buf, entry->base + off, count); + + return count; +} + +static int +intel_pmt_mmap(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, struct vm_area_struct *vma) +{ + struct intel_pmt_entry *entry = container_of(attr, + struct intel_pmt_entry, + pmt_bin_attr); + unsigned long vsize = vma->vm_end - vma->vm_start; + struct device *dev = kobj_to_dev(kobj); + unsigned long phys = entry->base_addr; + unsigned long pfn = PFN_DOWN(phys); + unsigned long psize; + + if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) + return -EROFS; + + psize = (PFN_UP(entry->base_addr + entry->size) - pfn) * PAGE_SIZE; + if (vsize > psize) { + dev_err(dev, "Requested mmap size is too large\n"); + return -EINVAL; + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + vsize, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static ssize_t +guid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "0x%x\n", entry->guid); +} +static DEVICE_ATTR_RO(guid); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "%zu\n", entry->size); +} +static DEVICE_ATTR_RO(size); + +static ssize_t +offset_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + + return sprintf(buf, "%lu\n", offset_in_page(entry->base_addr)); +} +static DEVICE_ATTR_RO(offset); + +static struct attribute *intel_pmt_attrs[] = { + &dev_attr_guid.attr, + &dev_attr_size.attr, + &dev_attr_offset.attr, + NULL +}; +ATTRIBUTE_GROUPS(intel_pmt); + +static struct class intel_pmt_class = { + .name = "intel_pmt", + .owner = THIS_MODULE, + .dev_groups = intel_pmt_groups, +}; + +static int intel_pmt_populate_entry(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev, + struct resource *disc_res) +{ + struct pci_dev *pci_dev = to_pci_dev(dev->parent); + u8 bir; + + /* + * The base offset should always be 8 byte aligned. + * + * For non-local access types the lower 3 bits of base offset + * contains the index of the base address register where the + * telemetry can be found. + */ + bir = GET_BIR(header->base_offset); + + /* Local access and BARID only for now */ + switch (header->access_type) { + case ACCESS_LOCAL: + if (bir) { + dev_err(dev, + "Unsupported BAR index %d for access type %d\n", + bir, header->access_type); + return -EINVAL; + } + /* + * For access_type LOCAL, the base address is as follows: + * base address = end of discovery region + base offset + */ + entry->base_addr = disc_res->end + 1 + header->base_offset; + break; + case ACCESS_BARID: + /* + * If another BAR was specified then the base offset + * represents the offset within that BAR. SO retrieve the + * address from the parent PCI device and add offset. + */ + entry->base_addr = pci_resource_start(pci_dev, bir) + + GET_ADDRESS(header->base_offset); + break; + default: + dev_err(dev, "Unsupported access type %d\n", + header->access_type); + return -EINVAL; + } + + entry->guid = header->guid; + entry->size = header->size; + + return 0; +} + +static int intel_pmt_dev_register(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct device *parent) +{ + struct resource res; + struct device *dev; + int ret; + + ret = xa_alloc(ns->xa, &entry->devid, entry, PMT_XA_LIMIT, GFP_KERNEL); + if (ret) + return ret; + + dev = device_create(&intel_pmt_class, parent, MKDEV(0, 0), entry, + "%s%d", ns->name, entry->devid); + + if (IS_ERR(dev)) { + dev_err(parent, "Could not create %s%d device node\n", + ns->name, entry->devid); + ret = PTR_ERR(dev); + goto fail_dev_create; + } + + entry->kobj = &dev->kobj; + + if (ns->attr_grp) { + ret = sysfs_create_group(entry->kobj, ns->attr_grp); + if (ret) + goto fail_sysfs; + } + + /* if size is 0 assume no data buffer, so no file needed */ + if (!entry->size) + return 0; + + res.start = entry->base_addr; + res.end = res.start + entry->size - 1; + res.flags = IORESOURCE_MEM; + + entry->base = devm_ioremap_resource(dev, &res); + if (IS_ERR(entry->base)) { + ret = PTR_ERR(entry->base); + goto fail_ioremap; + } + + sysfs_bin_attr_init(&entry->pmt_bin_attr); + entry->pmt_bin_attr.attr.name = ns->name; + entry->pmt_bin_attr.attr.mode = 0440; + entry->pmt_bin_attr.mmap = intel_pmt_mmap; + entry->pmt_bin_attr.read = intel_pmt_read; + entry->pmt_bin_attr.size = entry->size; + + ret = sysfs_create_bin_file(&dev->kobj, &entry->pmt_bin_attr); + if (!ret) + return 0; + +fail_ioremap: + sysfs_remove_group(entry->kobj, ns->attr_grp); +fail_sysfs: + device_unregister(dev); +fail_dev_create: + xa_erase(ns->xa, entry->devid); + + return ret; +} + +int intel_pmt_dev_create(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct platform_device *pdev, int idx) +{ + struct intel_pmt_header header; + struct resource *disc_res; + int ret = -ENODEV; + + disc_res = platform_get_resource(pdev, IORESOURCE_MEM, idx); + if (!disc_res) + return ret; + + entry->disc_table = devm_platform_ioremap_resource(pdev, idx); + if (IS_ERR(entry->disc_table)) + return PTR_ERR(entry->disc_table); + + ret = ns->pmt_header_decode(entry, &header, &pdev->dev); + if (ret) + return ret; + + ret = intel_pmt_populate_entry(entry, &header, &pdev->dev, disc_res); + if (ret) + return ret; + + return intel_pmt_dev_register(entry, ns, &pdev->dev); + +} +EXPORT_SYMBOL_GPL(intel_pmt_dev_create); + +void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns) +{ + struct device *dev = kobj_to_dev(entry->kobj); + + if (entry->size) + sysfs_remove_bin_file(entry->kobj, &entry->pmt_bin_attr); + + if (ns->attr_grp) + sysfs_remove_group(entry->kobj, ns->attr_grp); + + device_unregister(dev); + xa_erase(ns->xa, entry->devid); +} +EXPORT_SYMBOL_GPL(intel_pmt_dev_destroy); + +static int __init pmt_class_init(void) +{ + return class_register(&intel_pmt_class); +} + +static void __exit pmt_class_exit(void) +{ + class_unregister(&intel_pmt_class); +} + +module_init(pmt_class_init); +module_exit(pmt_class_exit); + +MODULE_AUTHOR("Alexander Duyck "); +MODULE_DESCRIPTION("Intel PMT Class driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/platform/x86/intel_pmt_class.h b/drivers/platform/x86/intel_pmt_class.h new file mode 100644 index 000000000000..de8f8139ba31 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_class.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_PMT_CLASS_H +#define _INTEL_PMT_CLASS_H + +#include +#include +#include +#include +#include +#include + +/* PMT access types */ +#define ACCESS_BARID 2 +#define ACCESS_LOCAL 3 + +/* PMT discovery base address/offset register layout */ +#define GET_BIR(v) ((v) & GENMASK(2, 0)) +#define GET_ADDRESS(v) ((v) & GENMASK(31, 3)) + +struct intel_pmt_entry { + struct bin_attribute pmt_bin_attr; + struct kobject *kobj; + void __iomem *disc_table; + void __iomem *base; + unsigned long base_addr; + size_t size; + u32 guid; + int devid; +}; + +struct intel_pmt_header { + u32 base_offset; + u32 size; + u32 guid; + u8 access_type; +}; + +struct intel_pmt_namespace { + const char *name; + struct xarray *xa; + const struct attribute_group *attr_grp; + int (*pmt_header_decode)(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev); +}; + +int intel_pmt_dev_create(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns, + struct platform_device *pdev, int idx); +void intel_pmt_dev_destroy(struct intel_pmt_entry *entry, + struct intel_pmt_namespace *ns); +#endif -- Gitee From 2b9ca8a6fdbdefeff6d7832f666c32ba6006ac4f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 28 Oct 2020 18:55:35 -0700 Subject: [PATCH 17/48] platform/x86: Intel PMT Telemetry capability driver mainline inclusion from mainline-v5.11-rc1 commit 68fe8e6e2c4b04e2733d77834f55a4a0e172b770 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 68fe8e6e2c4b platform/x86: Intel PMT Telemetry capability driver. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- PMT Telemetry is a capability of the Intel Platform Monitoring Technology. The Telemetry capability provides access to device telemetry metrics that provide hardware performance data to users from read-only register spaces. With this driver present the intel_pmt directory can be populated with telem devices. These devices will contain the standard intel_pmt sysfs data and a "telem" binary sysfs attribute which can be used to access the telemetry data. Also create a PCI device id list for early telemetry hardware that require workarounds for known issues. Signed-off-by: Alexander Duyck Co-developed-by: David E. Box Signed-off-by: David E. Box Reviewed-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/Kconfig | 11 ++ drivers/platform/x86/Makefile | 1 + drivers/platform/x86/intel_pmt_telemetry.c | 160 +++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 drivers/platform/x86/intel_pmt_telemetry.c diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 9118f76ed3e9..ac93c807eb9a 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1374,6 +1374,17 @@ config INTEL_PMT_CLASS To compile this driver as a module, choose M here: the module will be called intel_pmt_class. +config INTEL_PMT_TELEMETRY + tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver" + select INTEL_PMT_CLASS + help + The Intel Platform Monitory Technology (PMT) Telemetry driver provides + access to hardware telemetry metrics on devices that support the + feature. + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_telemetry. + config INTEL_PUNIT_IPC tristate "Intel P-Unit IPC Driver" help diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index f4b1f87f2401..6a7b61f59ea8 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_MRFLD_PWRBTN) += intel_mrfld_pwrbtn.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o intel_pmc_core_pltdrv.o obj-$(CONFIG_INTEL_PMT_CLASS) += intel_pmt_class.o +obj-$(CONFIG_INTEL_PMT_TELEMETRY) += intel_pmt_telemetry.o obj-$(CONFIG_INTEL_PUNIT_IPC) += intel_punit_ipc.o obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel_pmt_telemetry.c new file mode 100644 index 000000000000..f8a87614efa4 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_telemetry.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitory Technology Telemetry driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "David E. Box" + */ + +#include +#include +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define TELEM_DEV_NAME "pmt_telemetry" + +#define TELEM_SIZE_OFFSET 0x0 +#define TELEM_GUID_OFFSET 0x4 +#define TELEM_BASE_OFFSET 0x8 +#define TELEM_ACCESS(v) ((v) & GENMASK(3, 0)) +/* size is in bytes */ +#define TELEM_SIZE(v) (((v) & GENMASK(27, 12)) >> 10) + +/* Used by client hardware to identify a fixed telemetry entry*/ +#define TELEM_CLIENT_FIXED_BLOCK_GUID 0x10000000 + +struct pmt_telem_priv { + int num_entries; + struct intel_pmt_entry entry[]; +}; + +/* + * Early implementations of PMT on client platforms have some + * differences from the server platforms (which use the Out Of Band + * Management Services Module OOBMSM). This list tracks those + * platforms as needed to handle those differences. Newer client + * platforms are expected to be fully compatible with server. + */ +static const struct pci_device_id pmt_telem_early_client_pci_ids[] = { + { PCI_VDEVICE(INTEL, 0x9a0d) }, /* TGL */ + { PCI_VDEVICE(INTEL, 0x467d) }, /* ADL */ + { } +}; + +static bool intel_pmt_is_early_client_hw(struct device *dev) +{ + struct pci_dev *parent = to_pci_dev(dev->parent); + + return !!pci_match_id(pmt_telem_early_client_pci_ids, parent); +} + +static bool pmt_telem_region_overlaps(struct intel_pmt_entry *entry, + struct device *dev) +{ + u32 guid = readl(entry->disc_table + TELEM_GUID_OFFSET); + + if (guid != TELEM_CLIENT_FIXED_BLOCK_GUID) + return false; + + return intel_pmt_is_early_client_hw(dev); +} + +static int pmt_telem_header_decode(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev) +{ + void __iomem *disc_table = entry->disc_table; + + if (pmt_telem_region_overlaps(entry, dev)) + return 1; + + header->access_type = TELEM_ACCESS(readl(disc_table)); + header->guid = readl(disc_table + TELEM_GUID_OFFSET); + header->base_offset = readl(disc_table + TELEM_BASE_OFFSET); + + /* Size is measured in DWORDS, but accessor returns bytes */ + header->size = TELEM_SIZE(readl(disc_table)); + + return 0; +} + +static DEFINE_XARRAY_ALLOC(telem_array); +static struct intel_pmt_namespace pmt_telem_ns = { + .name = "telem", + .xa = &telem_array, + .pmt_header_decode = pmt_telem_header_decode, +}; + +static int pmt_telem_remove(struct platform_device *pdev) +{ + struct pmt_telem_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < priv->num_entries; i++) + intel_pmt_dev_destroy(&priv->entry[i], &pmt_telem_ns); + + return 0; +} + +static int pmt_telem_probe(struct platform_device *pdev) +{ + struct pmt_telem_priv *priv; + size_t size; + int i, ret; + + size = struct_size(priv, entry, pdev->num_resources); + priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + for (i = 0; i < pdev->num_resources; i++) { + struct intel_pmt_entry *entry = &priv->entry[i]; + + ret = intel_pmt_dev_create(entry, &pmt_telem_ns, pdev, i); + if (ret < 0) + goto abort_probe; + if (ret) + continue; + + priv->num_entries++; + } + + return 0; +abort_probe: + pmt_telem_remove(pdev); + return ret; +} + +static struct platform_driver pmt_telem_driver = { + .driver = { + .name = TELEM_DEV_NAME, + }, + .remove = pmt_telem_remove, + .probe = pmt_telem_probe, +}; + +static int __init pmt_telem_init(void) +{ + return platform_driver_register(&pmt_telem_driver); +} +module_init(pmt_telem_init); + +static void __exit pmt_telem_exit(void) +{ + platform_driver_unregister(&pmt_telem_driver); + xa_destroy(&telem_array); +} +module_exit(pmt_telem_exit); + +MODULE_AUTHOR("David E. Box "); +MODULE_DESCRIPTION("Intel PMT Telemetry driver"); +MODULE_ALIAS("platform:" TELEM_DEV_NAME); +MODULE_LICENSE("GPL v2"); -- Gitee From c25e00107196a59b8710e29ae8510f50b28ed489 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 28 Oct 2020 18:55:36 -0700 Subject: [PATCH 18/48] platform/x86: Intel PMT Crashlog capability driver mainline inclusion from mainline-v5.11-rc1 commit 5ef9998c96b0c99c49c202054586967e609286d2 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 5ef9998c96b0 platform/x86: Intel PMT Crashlog capability driver. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Add support for the Intel Platform Monitoring Technology crashlog interface. This interface provides a few sysfs values to allow for controlling the crashlog telemetry interface as well as a character driver to allow for mapping the crashlog memory region so that it can be accessed after a crashlog has been recorded. This driver is meant to only support the server version of the crashlog which is identified as crash_type 1 with a version of zero. Currently no other types are supported. Signed-off-by: Alexander Duyck Signed-off-by: David E. Box Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- .../ABI/testing/sysfs-class-intel_pmt | 65 ++++ drivers/platform/x86/Kconfig | 11 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/intel_pmt_crashlog.c | 328 ++++++++++++++++++ 4 files changed, 405 insertions(+) create mode 100644 drivers/platform/x86/intel_pmt_crashlog.c diff --git a/Documentation/ABI/testing/sysfs-class-intel_pmt b/Documentation/ABI/testing/sysfs-class-intel_pmt index 926b5cf95fd1..ed4c886a21b1 100644 --- a/Documentation/ABI/testing/sysfs-class-intel_pmt +++ b/Documentation/ABI/testing/sysfs-class-intel_pmt @@ -52,3 +52,68 @@ Contact: David Box Description: (RO) The offset of telemetry region in bytes that corresponds to the mapping for the telem file. + +What: /sys/class/intel_pmt/crashlog +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + The crashlog directory contains files for configuring an + instance of a PMT crashlog device that can perform crash data + recording. Each crashlog device has an associated crashlog + file. This file can be opened and mapped or read to access the + resulting crashlog buffer. The register layout for the buffer + can be determined from an XML file of specified GUID for the + parent device. + +What: /sys/class/intel_pmt/crashlog/crashlog +Date: October 2020 +KernelVersion: 5.10 +Contact: David Box +Description: + (RO) The crashlog buffer for this crashlog device. This file + may be mapped or read to obtain the data. + +What: /sys/class/intel_pmt/crashlog/guid +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The GUID for this crashlog device. The GUID identifies the + version of the XML file for the parent device that should be + used to determine the register layout. + +What: /sys/class/intel_pmt/crashlog/size +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The length of the result buffer in bytes that corresponds + to the size for the crashlog buffer. + +What: /sys/class/intel_pmt/crashlog/offset +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RO) The offset of the buffer in bytes that corresponds + to the mapping for the crashlog device. + +What: /sys/class/intel_pmt/crashlog/enable +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RW) Boolean value controlling if the crashlog functionality + is enabled for the crashlog device. + +What: /sys/class/intel_pmt/crashlog/trigger +Date: October 2020 +KernelVersion: 5.10 +Contact: Alexander Duyck +Description: + (RW) Boolean value controlling the triggering of the crashlog + device node. When read it provides data on if the crashlog has + been triggered. When written to it can be used to either clear + the current trigger by writing false, or to trigger a new + event if the trigger is not currently set. diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index ac93c807eb9a..da36a682d0cf 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1385,6 +1385,17 @@ config INTEL_PMT_TELEMETRY To compile this driver as a module, choose M here: the module will be called intel_pmt_telemetry. +config INTEL_PMT_CRASHLOG + tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver" + select INTEL_PMT_CLASS + help + The Intel Platform Monitoring Technology (PMT) crashlog driver provides + access to hardware crashlog capabilities on devices that support the + feature. + + To compile this driver as a module, choose M here: the module + will be called intel_pmt_crashlog. + config INTEL_PUNIT_IPC tristate "Intel P-Unit IPC Driver" help diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 6a7b61f59ea8..ca82c1344977 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -142,6 +142,7 @@ obj-$(CONFIG_INTEL_MRFLD_PWRBTN) += intel_mrfld_pwrbtn.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o intel_pmc_core_pltdrv.o obj-$(CONFIG_INTEL_PMT_CLASS) += intel_pmt_class.o obj-$(CONFIG_INTEL_PMT_TELEMETRY) += intel_pmt_telemetry.o +obj-$(CONFIG_INTEL_PMT_CRASHLOG) += intel_pmt_crashlog.o obj-$(CONFIG_INTEL_PUNIT_IPC) += intel_punit_ipc.o obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_PCI) += intel_scu_pcidrv.o diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c new file mode 100644 index 000000000000..97dd749c8290 --- /dev/null +++ b/drivers/platform/x86/intel_pmt_crashlog.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Platform Monitoring Technology Crashlog driver + * + * Copyright (c) 2020, Intel Corporation. + * All Rights Reserved. + * + * Author: "Alexander Duyck" + */ + +#include +#include +#include +#include +#include +#include + +#include "intel_pmt_class.h" + +#define DRV_NAME "pmt_crashlog" + +/* Crashlog discovery header types */ +#define CRASH_TYPE_OOBMSM 1 + +/* Control Flags */ +#define CRASHLOG_FLAG_DISABLE BIT(27) + +/* + * Bits 28 and 29 control the state of bit 31. + * + * Bit 28 will clear bit 31, if set, allowing a new crashlog to be captured. + * Bit 29 will immediately trigger a crashlog to be generated, setting bit 31. + * Bit 30 is read-only and reserved as 0. + * Bit 31 is the read-only status with a 1 indicating log is complete. + */ +#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(28) +#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(29) +#define CRASHLOG_FLAG_TRIGGER_COMPLETE BIT(31) +#define CRASHLOG_FLAG_TRIGGER_MASK GENMASK(31, 28) + +/* Crashlog Discovery Header */ +#define CONTROL_OFFSET 0x0 +#define GUID_OFFSET 0x4 +#define BASE_OFFSET 0x8 +#define SIZE_OFFSET 0xC +#define GET_ACCESS(v) ((v) & GENMASK(3, 0)) +#define GET_TYPE(v) (((v) & GENMASK(7, 4)) >> 4) +#define GET_VERSION(v) (((v) & GENMASK(19, 16)) >> 16) +/* size is in bytes */ +#define GET_SIZE(v) ((v) * sizeof(u32)) + +struct crashlog_entry { + /* entry must be first member of struct */ + struct intel_pmt_entry entry; + struct mutex control_mutex; +}; + +struct pmt_crashlog_priv { + int num_entries; + struct crashlog_entry entry[]; +}; + +/* + * I/O + */ +static bool pmt_crashlog_complete(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* return current value of the crashlog complete flag */ + return !!(control & CRASHLOG_FLAG_TRIGGER_COMPLETE); +} + +static bool pmt_crashlog_disabled(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* return current value of the crashlog disabled flag */ + return !!(control & CRASHLOG_FLAG_DISABLE); +} + +static bool pmt_crashlog_supported(struct intel_pmt_entry *entry) +{ + u32 discovery_header = readl(entry->disc_table + CONTROL_OFFSET); + u32 crash_type, version; + + crash_type = GET_TYPE(discovery_header); + version = GET_VERSION(discovery_header); + + /* + * Currently we only recognize OOBMSM version 0 devices. + * We can ignore all other crashlog devices in the system. + */ + return crash_type == CRASH_TYPE_OOBMSM && version == 0; +} + +static void pmt_crashlog_set_disable(struct intel_pmt_entry *entry, + bool disable) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + /* clear trigger bits so we are only modifying disable flag */ + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + + if (disable) + control |= CRASHLOG_FLAG_DISABLE; + else + control &= ~CRASHLOG_FLAG_DISABLE; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +static void pmt_crashlog_set_clear(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + control |= CRASHLOG_FLAG_TRIGGER_CLEAR; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +static void pmt_crashlog_set_execute(struct intel_pmt_entry *entry) +{ + u32 control = readl(entry->disc_table + CONTROL_OFFSET); + + control &= ~CRASHLOG_FLAG_TRIGGER_MASK; + control |= CRASHLOG_FLAG_TRIGGER_EXECUTE; + + writel(control, entry->disc_table + CONTROL_OFFSET); +} + +/* + * sysfs + */ +static ssize_t +enable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry = dev_get_drvdata(dev); + int enabled = !pmt_crashlog_disabled(entry); + + return sprintf(buf, "%d\n", enabled); +} + +static ssize_t +enable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct crashlog_entry *entry; + bool enabled; + int result; + + entry = dev_get_drvdata(dev); + + result = kstrtobool(buf, &enabled); + if (result) + return result; + + mutex_lock(&entry->control_mutex); + pmt_crashlog_set_disable(&entry->entry, !enabled); + mutex_unlock(&entry->control_mutex); + + return count; +} +static DEVICE_ATTR_RW(enable); + +static ssize_t +trigger_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct intel_pmt_entry *entry; + int trigger; + + entry = dev_get_drvdata(dev); + trigger = pmt_crashlog_complete(entry); + + return sprintf(buf, "%d\n", trigger); +} + +static ssize_t +trigger_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct crashlog_entry *entry; + bool trigger; + int result; + + entry = dev_get_drvdata(dev); + + result = kstrtobool(buf, &trigger); + if (result) + return result; + + mutex_lock(&entry->control_mutex); + + if (!trigger) { + pmt_crashlog_set_clear(&entry->entry); + } else if (pmt_crashlog_complete(&entry->entry)) { + /* we cannot trigger a new crash if one is still pending */ + result = -EEXIST; + goto err; + } else if (pmt_crashlog_disabled(&entry->entry)) { + /* if device is currently disabled, return busy */ + result = -EBUSY; + goto err; + } else { + pmt_crashlog_set_execute(&entry->entry); + } + + result = count; +err: + mutex_unlock(&entry->control_mutex); + return result; +} +static DEVICE_ATTR_RW(trigger); + +static struct attribute *pmt_crashlog_attrs[] = { + &dev_attr_enable.attr, + &dev_attr_trigger.attr, + NULL +}; + +static struct attribute_group pmt_crashlog_group = { + .attrs = pmt_crashlog_attrs, +}; + +static int pmt_crashlog_header_decode(struct intel_pmt_entry *entry, + struct intel_pmt_header *header, + struct device *dev) +{ + void __iomem *disc_table = entry->disc_table; + struct crashlog_entry *crashlog; + + if (!pmt_crashlog_supported(entry)) + return 1; + + /* initialize control mutex */ + crashlog = container_of(entry, struct crashlog_entry, entry); + mutex_init(&crashlog->control_mutex); + + header->access_type = GET_ACCESS(readl(disc_table)); + header->guid = readl(disc_table + GUID_OFFSET); + header->base_offset = readl(disc_table + BASE_OFFSET); + + /* Size is measured in DWORDS, but accessor returns bytes */ + header->size = GET_SIZE(readl(disc_table + SIZE_OFFSET)); + + return 0; +} + +static DEFINE_XARRAY_ALLOC(crashlog_array); +static struct intel_pmt_namespace pmt_crashlog_ns = { + .name = "crashlog", + .xa = &crashlog_array, + .attr_grp = &pmt_crashlog_group, + .pmt_header_decode = pmt_crashlog_header_decode, +}; + +/* + * initialization + */ +static int pmt_crashlog_remove(struct platform_device *pdev) +{ + struct pmt_crashlog_priv *priv = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < priv->num_entries; i++) + intel_pmt_dev_destroy(&priv->entry[i].entry, &pmt_crashlog_ns); + + return 0; +} + +static int pmt_crashlog_probe(struct platform_device *pdev) +{ + struct pmt_crashlog_priv *priv; + size_t size; + int i, ret; + + size = struct_size(priv, entry, pdev->num_resources); + priv = devm_kzalloc(&pdev->dev, size, GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + + for (i = 0; i < pdev->num_resources; i++) { + struct intel_pmt_entry *entry = &priv->entry[i].entry; + + ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, pdev, i); + if (ret < 0) + goto abort_probe; + if (ret) + continue; + + priv->num_entries++; + } + + return 0; +abort_probe: + pmt_crashlog_remove(pdev); + return ret; +} + +static struct platform_driver pmt_crashlog_driver = { + .driver = { + .name = DRV_NAME, + }, + .remove = pmt_crashlog_remove, + .probe = pmt_crashlog_probe, +}; + +static int __init pmt_crashlog_init(void) +{ + return platform_driver_register(&pmt_crashlog_driver); +} + +static void __exit pmt_crashlog_exit(void) +{ + platform_driver_unregister(&pmt_crashlog_driver); + xa_destroy(&crashlog_array); +} + +module_init(pmt_crashlog_init); +module_exit(pmt_crashlog_exit); + +MODULE_AUTHOR("Alexander Duyck "); +MODULE_DESCRIPTION("Intel PMT Crashlog driver"); +MODULE_ALIAS("platform:" DRV_NAME); +MODULE_LICENSE("GPL v2"); -- Gitee From c10e13c43a8b060acb6c82c7fb18c778ccaf81ff Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 Nov 2020 10:22:51 +0300 Subject: [PATCH 19/48] platform/x86: pmt: Fix a potential Oops on error in probe mainline inclusion from mainline-v5.11-rc1 commit d3d73d25e0d9bc43fd2a6f4b4e58ff182e55b217 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit d3d73d25e0d9 platform/x86: pmt: Fix a potential Oops on error in probe. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- The "ns->attr_grp" pointer can be NULL so this error handling code needs to check for that to avoid an Oops. Fixes: e2729113ce66 ("platform/x86: Intel PMT class driver") Signed-off-by: Dan Carpenter Reviewed-by: David E. Box Link: https://lore.kernel.org/r/20201117072251.GC1111239@mwanda Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/intel_pmt_class.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c index aa88dc23bbde..c8939fba4509 100644 --- a/drivers/platform/x86/intel_pmt_class.c +++ b/drivers/platform/x86/intel_pmt_class.c @@ -225,7 +225,8 @@ static int intel_pmt_dev_register(struct intel_pmt_entry *entry, return 0; fail_ioremap: - sysfs_remove_group(entry->kobj, ns->attr_grp); + if (ns->attr_grp) + sysfs_remove_group(entry->kobj, ns->attr_grp); fail_sysfs: device_unregister(dev); fail_dev_create: -- Gitee From aa1980cb3878e62c6d8f17516a1e06361c086e52 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 26 Jan 2021 12:55:06 -0800 Subject: [PATCH 20/48] platform/x86: intel_pmt: Make INTEL_PMT_CLASS non-user-selectable mainline inclusion from mainline-v5.12-rc2 commit 35d8a973fe4d38afee944db636c3d2b1df3741a7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 35d8a973fe4d platform/x86: intel_pmt: Make INTEL_PMT_CLASS non-user-selectable. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Fix error in Kconfig that exposed INTEL_PMT_CLASS as a user selectable option. It is already selected by INTEL_PMT_TELEMETRY and INTEL_PMT_CRASHLOG which are user selectable. Fixes: e2729113ce66 ("platform/x86: Intel PMT class driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20210126205508.30907-1-david.e.box@linux.intel.com Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index da36a682d0cf..15c8dd312af5 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1363,7 +1363,7 @@ config INTEL_PMC_CORE - MPHY/PLL gating status (Sunrisepoint PCH only) config INTEL_PMT_CLASS - tristate "Intel Platform Monitoring Technology (PMT) Class driver" + tristate help The Intel Platform Monitoring Technology (PMT) class driver provides the basic sysfs interface and file hierarchy uses by PMT devices. -- Gitee From 0c7b979ae17ac1369e71045eabf158480ff1e862 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 26 Jan 2021 12:55:07 -0800 Subject: [PATCH 21/48] platform/x86: intel_pmt_telemetry: Add dependency on MFD_INTEL_PMT mainline inclusion from mainline-v5.12-rc2 commit f3f6da5014dea3cc005b36948abe3664b5d1f7d3 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit f3f6da5014de platform/x86: intel_pmt_telemetry: Add dependency on MFD_INTEL_PMT. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- All devices that expose Intel Platform Monitoring Technology (PMT) telemetry are currently owned by the intel_pmt MFD driver. Therefore make the telemetry driver depend on the MFD driver for build. Fixes: 68fe8e6e2c4b ("platform/x86: Intel PMT Telemetry capability driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20210126205508.30907-2-david.e.box@linux.intel.com Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 15c8dd312af5..06484c5c5b8c 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1376,6 +1376,7 @@ config INTEL_PMT_CLASS config INTEL_PMT_TELEMETRY tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver" + depends on MFD_INTEL_PMT select INTEL_PMT_CLASS help The Intel Platform Monitory Technology (PMT) Telemetry driver provides -- Gitee From 22c0f0c9e9bf18f63f8aed22f7e2defd077753df Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 26 Jan 2021 12:55:08 -0800 Subject: [PATCH 22/48] platform/x86: intel_pmt_crashlog: Add dependency on MFD_INTEL_PMT mainline inclusion from mainline-v5.12-rc2 commit fdd3feb37e36bec2ad75d76f8ac4d0273c5c0a91 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit fdd3feb37e36 platform/x86: intel_pmt_crashlog: Add dependency on MFD_INTEL_PMT. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- All devices that expose Intel Platform Monitoring Technology (PMT) crashlog are currently owned by the intel_pmt MFD driver. Therefore make the crashlog driver depend on the MFD driver for build. Fixes: 5ef9998c96b0 ("platform/x86: Intel PMT Crashlog capability driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20210126205508.30907-3-david.e.box@linux.intel.com Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 06484c5c5b8c..a24783aa52ea 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1388,6 +1388,7 @@ config INTEL_PMT_TELEMETRY config INTEL_PMT_CRASHLOG tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver" + depends on MFD_INTEL_PMT select INTEL_PMT_CLASS help The Intel Platform Monitoring Technology (PMT) crashlog driver provides -- Gitee From 9c2552c05b7f89f76c32ce573a33576e430f72f5 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 24 Feb 2021 12:10:04 -0800 Subject: [PATCH 23/48] mfd: intel_pmt: Fix nuisance messages and handling of disabled capabilities mainline inclusion from mainline-v5.13-rc1 commit a1a5c1c3df282dc122508a17500317266ef19e46 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit a1a5c1c3df28 mfd: intel_pmt: Fix nuisance messages and handling of disabled capabilities. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Some products will be available that have PMT capabilities that are not supported. Remove the warnings in this instance to avoid nuisance messages and confusion. Also return an error code for capabilities that are disabled by quirk to prevent them from keeping the driver loaded if only disabled capabilities are found. Fixes: 4f8217d5b0ca ("mfd: Intel Platform Monitoring Technology support") Signed-off-by: David E. Box Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/mfd/intel_pmt.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c index 744b230cdcca..65da2b17a204 100644 --- a/drivers/mfd/intel_pmt.c +++ b/drivers/mfd/intel_pmt.c @@ -79,19 +79,18 @@ static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header, case DVSEC_INTEL_ID_WATCHER: if (quirks & PMT_QUIRK_NO_WATCHER) { dev_info(dev, "Watcher not supported\n"); - return 0; + return -EINVAL; } name = "pmt_watcher"; break; case DVSEC_INTEL_ID_CRASHLOG: if (quirks & PMT_QUIRK_NO_CRASHLOG) { dev_info(dev, "Crashlog not supported\n"); - return 0; + return -EINVAL; } name = "pmt_crashlog"; break; default: - dev_err(dev, "Unrecognized PMT capability: %d\n", id); return -EINVAL; } @@ -174,12 +173,8 @@ static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) header.offset = INTEL_DVSEC_TABLE_OFFSET(table); ret = pmt_add_dev(pdev, &header, quirks); - if (ret) { - dev_warn(&pdev->dev, - "Failed to add device for DVSEC id %d\n", - header.id); + if (ret) continue; - } found_devices = true; } while (true); -- Gitee From 1a6161db47ce8d98291caf649c095fe75352e2f5 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Wed, 24 Feb 2021 12:10:05 -0800 Subject: [PATCH 24/48] mfd: intel_pmt: Add support for DG1 mainline inclusion from mainline-v5.13-rc1 commit aa47ad3f853ae72c32b7e46dfc8bc2c8dc2dbad7 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit aa47ad3f853a mfd: intel_pmt: Add support for DG1. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Adds PMT Telemetry aggregator support for the DG1 graphics PCIe card. The device does not have the DVSEC region in its PCI config space so hard code the discovery table data in the driver. Also requires a fix for DG1 in the Telemetry driver for how the ACCESS_TYPE field is used. Signed-off-by: David E. Box Reviewed-by: Hans de Goede Signed-off-by: Lee Jones Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/mfd/intel_pmt.c | 101 +++++++++++++++------ drivers/platform/x86/intel_pmt_class.c | 46 ++++++++++ drivers/platform/x86/intel_pmt_class.h | 1 + drivers/platform/x86/intel_pmt_telemetry.c | 20 ---- 4 files changed, 119 insertions(+), 49 deletions(-) diff --git a/drivers/mfd/intel_pmt.c b/drivers/mfd/intel_pmt.c index 65da2b17a204..dd7eb614c28e 100644 --- a/drivers/mfd/intel_pmt.c +++ b/drivers/mfd/intel_pmt.c @@ -49,10 +49,14 @@ enum pmt_quirks { /* Use shift instead of mask to read discovery table offset */ PMT_QUIRK_TABLE_SHIFT = BIT(2), + + /* DVSEC not present (provided in driver data) */ + PMT_QUIRK_NO_DVSEC = BIT(3), }; struct pmt_platform_info { unsigned long quirks; + struct intel_dvsec_header **capabilities; }; static const struct pmt_platform_info tgl_info = { @@ -60,6 +64,26 @@ static const struct pmt_platform_info tgl_info = { PMT_QUIRK_TABLE_SHIFT, }; +/* DG1 Platform with DVSEC quirk*/ +static struct intel_dvsec_header dg1_telemetry = { + .length = 0x10, + .id = 2, + .num_entries = 1, + .entry_size = 3, + .tbir = 0, + .offset = 0x466000, +}; + +static struct intel_dvsec_header *dg1_capabilities[] = { + &dg1_telemetry, + NULL +}; + +static const struct pmt_platform_info dg1_info = { + .quirks = PMT_QUIRK_NO_DVSEC, + .capabilities = dg1_capabilities, +}; + static int pmt_add_dev(struct pci_dev *pdev, struct intel_dvsec_header *header, unsigned long quirks) { @@ -147,37 +171,54 @@ static int pmt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (info) quirks = info->quirks; - do { - struct intel_dvsec_header header; - u32 table; - u16 vid; + if (info && (info->quirks & PMT_QUIRK_NO_DVSEC)) { + struct intel_dvsec_header **header; - pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); - if (!pos) - break; + header = info->capabilities; + while (*header) { + ret = pmt_add_dev(pdev, *header, quirks); + if (ret) + dev_warn(&pdev->dev, + "Failed to add device for DVSEC id %d\n", + (*header)->id); + else + found_devices = true; - pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid); - if (vid != PCI_VENDOR_ID_INTEL) - continue; - - pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, - &header.id); - pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, - &header.num_entries); - pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, - &header.entry_size); - pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, - &table); - - header.tbir = INTEL_DVSEC_TABLE_BAR(table); - header.offset = INTEL_DVSEC_TABLE_OFFSET(table); - - ret = pmt_add_dev(pdev, &header, quirks); - if (ret) - continue; - - found_devices = true; - } while (true); + ++header; + } + } else { + do { + struct intel_dvsec_header header; + u32 table; + u16 vid; + + pos = pci_find_next_ext_capability(pdev, pos, PCI_EXT_CAP_ID_DVSEC); + if (!pos) + break; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vid); + if (vid != PCI_VENDOR_ID_INTEL) + continue; + + pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, + &header.id); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_ENTRIES, + &header.num_entries); + pci_read_config_byte(pdev, pos + INTEL_DVSEC_SIZE, + &header.entry_size); + pci_read_config_dword(pdev, pos + INTEL_DVSEC_TABLE, + &table); + + header.tbir = INTEL_DVSEC_TABLE_BAR(table); + header.offset = INTEL_DVSEC_TABLE_OFFSET(table); + + ret = pmt_add_dev(pdev, &header, quirks); + if (ret) + continue; + + found_devices = true; + } while (true); + } if (!found_devices) return -ENODEV; @@ -195,10 +236,12 @@ static void pmt_pci_remove(struct pci_dev *pdev) } #define PCI_DEVICE_ID_INTEL_PMT_ADL 0x467d +#define PCI_DEVICE_ID_INTEL_PMT_DG1 0x490e #define PCI_DEVICE_ID_INTEL_PMT_OOBMSM 0x09a7 #define PCI_DEVICE_ID_INTEL_PMT_TGL 0x9a0d static const struct pci_device_id pmt_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, PMT_ADL, &tgl_info) }, + { PCI_DEVICE_DATA(INTEL, PMT_DG1, &dg1_info) }, { PCI_DEVICE_DATA(INTEL, PMT_OOBMSM, NULL) }, { PCI_DEVICE_DATA(INTEL, PMT_TGL, &tgl_info) }, { } diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c index c8939fba4509..228e21f1ce5c 100644 --- a/drivers/platform/x86/intel_pmt_class.c +++ b/drivers/platform/x86/intel_pmt_class.c @@ -19,6 +19,28 @@ #define PMT_XA_MAX INT_MAX #define PMT_XA_LIMIT XA_LIMIT(PMT_XA_START, PMT_XA_MAX) +/* + * Early implementations of PMT on client platforms have some + * differences from the server platforms (which use the Out Of Band + * Management Services Module OOBMSM). This list tracks those + * platforms as needed to handle those differences. Newer client + * platforms are expected to be fully compatible with server. + */ +static const struct pci_device_id pmt_telem_early_client_pci_ids[] = { + { PCI_VDEVICE(INTEL, 0x467d) }, /* ADL */ + { PCI_VDEVICE(INTEL, 0x490e) }, /* DG1 */ + { PCI_VDEVICE(INTEL, 0x9a0d) }, /* TGL */ + { } +}; + +bool intel_pmt_is_early_client_hw(struct device *dev) +{ + struct pci_dev *parent = to_pci_dev(dev->parent); + + return !!pci_match_id(pmt_telem_early_client_pci_ids, parent); +} +EXPORT_SYMBOL_GPL(intel_pmt_is_early_client_hw); + /* * sysfs */ @@ -147,6 +169,30 @@ static int intel_pmt_populate_entry(struct intel_pmt_entry *entry, * base address = end of discovery region + base offset */ entry->base_addr = disc_res->end + 1 + header->base_offset; + + /* + * Some hardware use a different calculation for the base address + * when access_type == ACCESS_LOCAL. On the these systems + * ACCCESS_LOCAL refers to an address in the same BAR as the + * header but at a fixed offset. But as the header address was + * supplied to the driver, we don't know which BAR it was in. + * So search for the bar whose range includes the header address. + */ + if (intel_pmt_is_early_client_hw(dev)) { + int i; + + entry->base_addr = 0; + for (i = 0; i < 6; i++) + if (disc_res->start >= pci_resource_start(pci_dev, i) && + (disc_res->start <= pci_resource_end(pci_dev, i))) { + entry->base_addr = pci_resource_start(pci_dev, i) + + header->base_offset; + break; + } + if (!entry->base_addr) + return -EINVAL; + } + break; case ACCESS_BARID: /* diff --git a/drivers/platform/x86/intel_pmt_class.h b/drivers/platform/x86/intel_pmt_class.h index de8f8139ba31..1337019c2873 100644 --- a/drivers/platform/x86/intel_pmt_class.h +++ b/drivers/platform/x86/intel_pmt_class.h @@ -44,6 +44,7 @@ struct intel_pmt_namespace { struct device *dev); }; +bool intel_pmt_is_early_client_hw(struct device *dev); int intel_pmt_dev_create(struct intel_pmt_entry *entry, struct intel_pmt_namespace *ns, struct platform_device *pdev, int idx); diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel_pmt_telemetry.c index f8a87614efa4..9b95ef050457 100644 --- a/drivers/platform/x86/intel_pmt_telemetry.c +++ b/drivers/platform/x86/intel_pmt_telemetry.c @@ -34,26 +34,6 @@ struct pmt_telem_priv { struct intel_pmt_entry entry[]; }; -/* - * Early implementations of PMT on client platforms have some - * differences from the server platforms (which use the Out Of Band - * Management Services Module OOBMSM). This list tracks those - * platforms as needed to handle those differences. Newer client - * platforms are expected to be fully compatible with server. - */ -static const struct pci_device_id pmt_telem_early_client_pci_ids[] = { - { PCI_VDEVICE(INTEL, 0x9a0d) }, /* TGL */ - { PCI_VDEVICE(INTEL, 0x467d) }, /* ADL */ - { } -}; - -static bool intel_pmt_is_early_client_hw(struct device *dev) -{ - struct pci_dev *parent = to_pci_dev(dev->parent); - - return !!pci_match_id(pmt_telem_early_client_pci_ids, parent); -} - static bool pmt_telem_region_overlaps(struct intel_pmt_entry *entry, struct device *dev) { -- Gitee From 5ac8a629650c114f25da7d9c41926640dfed6f53 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 16 Mar 2021 19:44:54 -0700 Subject: [PATCH 25/48] platform/x86: intel_pmt_class: Initial resource to 0 mainline inclusion from mainline-v5.13-rc1 commit 501bb68a66cfc0bc2a2458483400cb49daca974f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 501bb68a66cf platform/x86: intel_pmt_class: Initial resource to 0. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Initialize the struct resource in intel_pmt_dev_register to zero to avoid a fault should the char *name field be non-zero. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20210317024455.3071477-1-david.e.box@linux.intel.com Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/intel_pmt_class.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_pmt_class.c b/drivers/platform/x86/intel_pmt_class.c index 228e21f1ce5c..c86ff15b1ed5 100644 --- a/drivers/platform/x86/intel_pmt_class.c +++ b/drivers/platform/x86/intel_pmt_class.c @@ -219,7 +219,7 @@ static int intel_pmt_dev_register(struct intel_pmt_entry *entry, struct intel_pmt_namespace *ns, struct device *parent) { - struct resource res; + struct resource res = {0}; struct device *dev; int ret; -- Gitee From 8ab1b0c075ce22bcb036812c4ac4c898fdd15b43 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 16 Mar 2021 19:44:55 -0700 Subject: [PATCH 26/48] platform/x86: intel_pmt_crashlog: Fix incorrect macros mainline inclusion from mainline-v5.12-rc5 commit 10c931cdfe64ebc38a15a485dd794915044f2111 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit 10c931cdfe64 platform/x86: intel_pmt_crashlog: Fix incorrect macros. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Fixes off-by-one bugs in the macro assignments for the crashlog control bits. Was initially tested on emulation but bug revealed after testing on silicon. Fixes: 5ef9998c96b0 ("platform/x86: Intel PMT Crashlog capability driver") Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20210317024455.3071477-2-david.e.box@linux.intel.com Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/intel_pmt_crashlog.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c index 97dd749c8290..92d315a16cfd 100644 --- a/drivers/platform/x86/intel_pmt_crashlog.c +++ b/drivers/platform/x86/intel_pmt_crashlog.c @@ -23,18 +23,17 @@ #define CRASH_TYPE_OOBMSM 1 /* Control Flags */ -#define CRASHLOG_FLAG_DISABLE BIT(27) +#define CRASHLOG_FLAG_DISABLE BIT(28) /* - * Bits 28 and 29 control the state of bit 31. + * Bits 29 and 30 control the state of bit 31. * - * Bit 28 will clear bit 31, if set, allowing a new crashlog to be captured. - * Bit 29 will immediately trigger a crashlog to be generated, setting bit 31. - * Bit 30 is read-only and reserved as 0. + * Bit 29 will clear bit 31, if set, allowing a new crashlog to be captured. + * Bit 30 will immediately trigger a crashlog to be generated, setting bit 31. * Bit 31 is the read-only status with a 1 indicating log is complete. */ -#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(28) -#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(29) +#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(29) +#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(30) #define CRASHLOG_FLAG_TRIGGER_COMPLETE BIT(31) #define CRASHLOG_FLAG_TRIGGER_MASK GENMASK(31, 28) -- Gitee From 5b15a15d7477110bc7cb53bfc039f87e6770634e Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 5 Jun 2021 22:38:05 +0200 Subject: [PATCH 27/48] platform/x86: intel_pmt_crashlog: Constify static attribute_group struct mainline inclusion from mainline-v5.14-rc1 commit d24023e375704860c6c8b91c3af3034669aa1bc5 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit d24023e37570 platform/x86: intel_pmt_crashlog: Constify static attribute_group struct. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- The only use of pmt_crashlog_group is to assign its address to the attr_grp field in the intel_pmt_namespace struct, which is a pointer to const attribute_group. Make it const to allow the compiler to put it in read-only memory. Signed-off-by: Rikard Falkeborn Link: https://lore.kernel.org/r/20210605203807.60547-3-rikard.falkeborn@gmail.com Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/intel_pmt_crashlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c index 92d315a16cfd..56963ceb6345 100644 --- a/drivers/platform/x86/intel_pmt_crashlog.c +++ b/drivers/platform/x86/intel_pmt_crashlog.c @@ -218,7 +218,7 @@ static struct attribute *pmt_crashlog_attrs[] = { NULL }; -static struct attribute_group pmt_crashlog_group = { +static const struct attribute_group pmt_crashlog_group = { .attrs = pmt_crashlog_attrs, }; -- Gitee From a4a2d906de5f6d830ee00690c119f1c99c9d91ec Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Tue, 17 Aug 2021 15:40:17 -0700 Subject: [PATCH 28/48] platform/x86: intel_pmt_telemetry: Ignore zero sized entries mainline inclusion from mainline-v5.15-rc1 commit ef195e8a7f43924b9979b2cd81ac7fa54f24bb3c category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I596K9 CVE: NA Intel-SIG: commit ef195e8a7f43 platform/x86: intel_pmt_telemetry: Ignore zero sized entries. Backport for intel PMT (Platform Monitoring Technology) support -------------------------------- Some devices may expose non-functioning entries that are reserved for future use. These entries have zero size. Ignore them during probe. Signed-off-by: David E. Box Link: https://lore.kernel.org/r/20210817224018.1013192-5-david.e.box@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: yingbao jia Signed-off-by: Jun Tian --- drivers/platform/x86/intel_pmt_telemetry.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/intel_pmt_telemetry.c b/drivers/platform/x86/intel_pmt_telemetry.c index 9b95ef050457..9f845e70a1f8 100644 --- a/drivers/platform/x86/intel_pmt_telemetry.c +++ b/drivers/platform/x86/intel_pmt_telemetry.c @@ -61,6 +61,14 @@ static int pmt_telem_header_decode(struct intel_pmt_entry *entry, /* Size is measured in DWORDS, but accessor returns bytes */ header->size = TELEM_SIZE(readl(disc_table)); + /* + * Some devices may expose non-functioning entries that are + * reserved for future use. They have zero size. Do not fail + * probe for these. Just ignore them. + */ + if (header->size == 0) + return 1; + return 0; } -- Gitee From 72a974dd1dc2500d3bbcfd95b8f83a7b296a107e Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 5 Mar 2021 17:32:16 +0800 Subject: [PATCH 29/48] iommu: add Unisoc IOMMU basic driver mainline inclusion from mainline-v5.13-rc1 commit 0ee4ad6a9d452e2888eb41f4a850c195310139a9 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 0ee4ad6a9d45 iommu: add Unisoc IOMMU basic drive Backport for intel SPR ENQCMD Instruction support -------------------------------- This IOMMU module can be used by Unisoc's multimedia devices, such as display, Image codec(jpeg) and a few signal processors, including VSP(video), GSP(graphic), ISP(image), and CPP(camera pixel processor), etc. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20210305093216.201897-3-zhang.lyra@gmail.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/Kconfig | 12 + drivers/iommu/Makefile | 4 +- drivers/iommu/sprd-iommu.c | 577 +++++++++++++++++++++++++++++++++++++ 3 files changed, 591 insertions(+), 2 deletions(-) create mode 100644 drivers/iommu/sprd-iommu.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d97e38bfbe4b..3e393dca0173 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -422,4 +422,16 @@ config SMMU_BYPASS_DEV This feature will be replaced by ACPI IORT RMR node, which will be upstreamed in mainline. +config SPRD_IOMMU + tristate "Unisoc IOMMU Support" + depends on ARCH_SPRD || COMPILE_TEST + select IOMMU_API + help + Support for IOMMU on Unisoc's SoCs, this IOMMU can be used by + Unisoc's multimedia devices, such as display, Image codec(jpeg) + and a few signal processors, including VSP(video), GSP(graphic), + ISP(image), and CPP(camera pixel processor), etc. + + Say Y here if you want to use the multimedia devices listed above. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 1e7519f56afb..0ac9c7fb4b8f 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,5 +27,5 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o -obj-$(CONFIG_IOMMU_SVA_LIB) += io-pgfault.o +obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c new file mode 100644 index 000000000000..7100ed17dcce --- /dev/null +++ b/drivers/iommu/sprd-iommu.c @@ -0,0 +1,577 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Unisoc IOMMU driver + * + * Copyright (C) 2020 Unisoc, Inc. + * Author: Chunyan Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SPRD_IOMMU_PAGE_SHIFT 12 +#define SPRD_IOMMU_PAGE_SIZE SZ_4K + +#define SPRD_EX_CFG 0x0 +#define SPRD_IOMMU_VAOR_BYPASS BIT(4) +#define SPRD_IOMMU_GATE_EN BIT(1) +#define SPRD_IOMMU_EN BIT(0) +#define SPRD_EX_UPDATE 0x4 +#define SPRD_EX_FIRST_VPN 0x8 +#define SPRD_EX_VPN_RANGE 0xc +#define SPRD_EX_FIRST_PPN 0x10 +#define SPRD_EX_DEFAULT_PPN 0x14 + +#define SPRD_IOMMU_VERSION 0x0 +#define SPRD_VERSION_MASK GENMASK(15, 8) +#define SPRD_VERSION_SHIFT 0x8 +#define SPRD_VAU_CFG 0x4 +#define SPRD_VAU_UPDATE 0x8 +#define SPRD_VAU_AUTH_CFG 0xc +#define SPRD_VAU_FIRST_PPN 0x10 +#define SPRD_VAU_DEFAULT_PPN_RD 0x14 +#define SPRD_VAU_DEFAULT_PPN_WR 0x18 +#define SPRD_VAU_FIRST_VPN 0x1c +#define SPRD_VAU_VPN_RANGE 0x20 + +enum sprd_iommu_version { + SPRD_IOMMU_EX, + SPRD_IOMMU_VAU, +}; + +/* + * struct sprd_iommu_device - high-level sprd IOMMU device representation, + * including hardware information and configuration, also driver data, etc + * + * @ver: sprd IOMMU IP version + * @prot_page_va: protect page base virtual address + * @prot_page_pa: protect page base physical address, data would be + * written to here while translation fault + * @base: mapped base address for accessing registers + * @dev: pointer to basic device structure + * @iommu: IOMMU core representation + * @group: IOMMU group + * @eb: gate clock which controls IOMMU access + */ +struct sprd_iommu_device { + enum sprd_iommu_version ver; + u32 *prot_page_va; + dma_addr_t prot_page_pa; + void __iomem *base; + struct device *dev; + struct iommu_device iommu; + struct iommu_group *group; + struct clk *eb; +}; + +struct sprd_iommu_domain { + spinlock_t pgtlock; /* lock for page table */ + struct iommu_domain domain; + u32 *pgt_va; /* page table virtual address base */ + dma_addr_t pgt_pa; /* page table physical address base */ + struct sprd_iommu_device *sdev; +}; + +static const struct iommu_ops sprd_iommu_ops; + +static struct sprd_iommu_domain *to_sprd_domain(struct iommu_domain *dom) +{ + return container_of(dom, struct sprd_iommu_domain, domain); +} + +static inline void +sprd_iommu_write(struct sprd_iommu_device *sdev, unsigned int reg, u32 val) +{ + writel_relaxed(val, sdev->base + reg); +} + +static inline u32 +sprd_iommu_read(struct sprd_iommu_device *sdev, unsigned int reg) +{ + return readl_relaxed(sdev->base + reg); +} + +static inline void +sprd_iommu_update_bits(struct sprd_iommu_device *sdev, unsigned int reg, + u32 mask, u32 shift, u32 val) +{ + u32 t = sprd_iommu_read(sdev, reg); + + t = (t & (~(mask << shift))) | ((val & mask) << shift); + sprd_iommu_write(sdev, reg, t); +} + +static inline int +sprd_iommu_get_version(struct sprd_iommu_device *sdev) +{ + int ver = (sprd_iommu_read(sdev, SPRD_IOMMU_VERSION) & + SPRD_VERSION_MASK) >> SPRD_VERSION_SHIFT; + + switch (ver) { + case SPRD_IOMMU_EX: + case SPRD_IOMMU_VAU: + return ver; + default: + return -EINVAL; + } +} + +static size_t +sprd_iommu_pgt_size(struct iommu_domain *domain) +{ + return ((domain->geometry.aperture_end - + domain->geometry.aperture_start + 1) >> + SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32); +} + +static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) +{ + struct sprd_iommu_domain *dom; + + if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED) + return NULL; + + dom = kzalloc(sizeof(*dom), GFP_KERNEL); + if (!dom) + return NULL; + + if (iommu_get_dma_cookie(&dom->domain)) { + kfree(dom); + return NULL; + } + + spin_lock_init(&dom->pgtlock); + + dom->domain.geometry.aperture_start = 0; + dom->domain.geometry.aperture_end = SZ_256M - 1; + + return &dom->domain; +} + +static void sprd_iommu_domain_free(struct iommu_domain *domain) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + + iommu_put_dma_cookie(domain); + kfree(dom); +} + +static void sprd_iommu_first_vpn(struct sprd_iommu_domain *dom) +{ + struct sprd_iommu_device *sdev = dom->sdev; + u32 val; + unsigned int reg; + + if (sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_FIRST_VPN; + else + reg = SPRD_VAU_FIRST_VPN; + + val = dom->domain.geometry.aperture_start >> SPRD_IOMMU_PAGE_SHIFT; + sprd_iommu_write(sdev, reg, val); +} + +static void sprd_iommu_vpn_range(struct sprd_iommu_domain *dom) +{ + struct sprd_iommu_device *sdev = dom->sdev; + u32 val; + unsigned int reg; + + if (sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_VPN_RANGE; + else + reg = SPRD_VAU_VPN_RANGE; + + val = (dom->domain.geometry.aperture_end - + dom->domain.geometry.aperture_start) >> SPRD_IOMMU_PAGE_SHIFT; + sprd_iommu_write(sdev, reg, val); +} + +static void sprd_iommu_first_ppn(struct sprd_iommu_domain *dom) +{ + u32 val = dom->pgt_pa >> SPRD_IOMMU_PAGE_SHIFT; + struct sprd_iommu_device *sdev = dom->sdev; + unsigned int reg; + + if (sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_FIRST_PPN; + else + reg = SPRD_VAU_FIRST_PPN; + + sprd_iommu_write(sdev, reg, val); +} + +static void sprd_iommu_default_ppn(struct sprd_iommu_device *sdev) +{ + u32 val = sdev->prot_page_pa >> SPRD_IOMMU_PAGE_SHIFT; + + if (sdev->ver == SPRD_IOMMU_EX) { + sprd_iommu_write(sdev, SPRD_EX_DEFAULT_PPN, val); + } else if (sdev->ver == SPRD_IOMMU_VAU) { + sprd_iommu_write(sdev, SPRD_VAU_DEFAULT_PPN_RD, val); + sprd_iommu_write(sdev, SPRD_VAU_DEFAULT_PPN_WR, val); + } +} + +static void sprd_iommu_hw_en(struct sprd_iommu_device *sdev, bool en) +{ + unsigned int reg_cfg; + u32 mask, val; + + if (sdev->ver == SPRD_IOMMU_EX) + reg_cfg = SPRD_EX_CFG; + else + reg_cfg = SPRD_VAU_CFG; + + mask = SPRD_IOMMU_EN | SPRD_IOMMU_GATE_EN; + val = en ? mask : 0; + sprd_iommu_update_bits(sdev, reg_cfg, mask, 0, val); +} + +static int sprd_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + size_t pgt_size = sprd_iommu_pgt_size(domain); + + if (dom->sdev) { + pr_err("There's already a device attached to this domain.\n"); + return -EINVAL; + } + + dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); + if (!dom->pgt_va) + return -ENOMEM; + + dom->sdev = sdev; + + sprd_iommu_first_ppn(dom); + sprd_iommu_first_vpn(dom); + sprd_iommu_vpn_range(dom); + sprd_iommu_default_ppn(sdev); + sprd_iommu_hw_en(sdev, true); + + return 0; +} + +static void sprd_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + struct sprd_iommu_device *sdev = dom->sdev; + size_t pgt_size = sprd_iommu_pgt_size(domain); + + if (!sdev) + return; + + dma_free_coherent(sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); + sprd_iommu_hw_en(sdev, false); + dom->sdev = NULL; +} + +static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + unsigned long flags; + unsigned int i; + u32 *pgt_base_iova; + u32 pabase = (u32)paddr; + unsigned long start = domain->geometry.aperture_start; + unsigned long end = domain->geometry.aperture_end; + + if (!dom->sdev) { + pr_err("No sprd_iommu_device attached to the domain\n"); + return -EINVAL; + } + + if (iova < start || (iova + size) > (end + 1)) { + dev_err(dom->sdev->dev, "(iova(0x%lx) + sixe(%zx)) are not in the range!\n", + iova, size); + return -EINVAL; + } + + pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); + + spin_lock_irqsave(&dom->pgtlock, flags); + for (i = 0; i < page_num; i++) { + pgt_base_iova[i] = pabase >> SPRD_IOMMU_PAGE_SHIFT; + pabase += SPRD_IOMMU_PAGE_SIZE; + } + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return 0; +} + +static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *iotlb_gather) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned long flags; + u32 *pgt_base_iova; + unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT; + unsigned long start = domain->geometry.aperture_start; + unsigned long end = domain->geometry.aperture_end; + + if (iova < start || (iova + size) > (end + 1)) + return -EINVAL; + + pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT); + + spin_lock_irqsave(&dom->pgtlock, flags); + memset(pgt_base_iova, 0, page_num * sizeof(u32)); + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return 0; +} + +static void sprd_iommu_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned int reg; + + if (dom->sdev->ver == SPRD_IOMMU_EX) + reg = SPRD_EX_UPDATE; + else + reg = SPRD_VAU_UPDATE; + + /* clear IOMMU TLB buffer after page table updated */ + sprd_iommu_write(dom->sdev, reg, 0xffffffff); +} + +static void sprd_iommu_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather) +{ + sprd_iommu_sync_map(domain, 0, 0); +} + +static phys_addr_t sprd_iommu_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct sprd_iommu_domain *dom = to_sprd_domain(domain); + unsigned long flags; + phys_addr_t pa; + unsigned long start = domain->geometry.aperture_start; + unsigned long end = domain->geometry.aperture_end; + + if (WARN_ON(iova < start || iova > end)) + return 0; + + spin_lock_irqsave(&dom->pgtlock, flags); + pa = *(dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT)); + pa = (pa << SPRD_IOMMU_PAGE_SHIFT) + ((iova - start) & (SPRD_IOMMU_PAGE_SIZE - 1)); + spin_unlock_irqrestore(&dom->pgtlock, flags); + + return pa; +} + +static struct iommu_device *sprd_iommu_probe_device(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct sprd_iommu_device *sdev; + + if (!fwspec || fwspec->ops != &sprd_iommu_ops) + return ERR_PTR(-ENODEV); + + sdev = dev_iommu_priv_get(dev); + + return &sdev->iommu; +} + +static void sprd_iommu_release_device(struct device *dev) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (!fwspec || fwspec->ops != &sprd_iommu_ops) + return; + + iommu_fwspec_free(dev); +} + +static struct iommu_group *sprd_iommu_device_group(struct device *dev) +{ + struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev); + + return iommu_group_ref_get(sdev->group); +} + +static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) +{ + struct platform_device *pdev; + + if (!dev_iommu_priv_get(dev)) { + pdev = of_find_device_by_node(args->np); + dev_iommu_priv_set(dev, platform_get_drvdata(pdev)); + platform_device_put(pdev); + } + + return 0; +} + + +static const struct iommu_ops sprd_iommu_ops = { + .domain_alloc = sprd_iommu_domain_alloc, + .domain_free = sprd_iommu_domain_free, + .attach_dev = sprd_iommu_attach_device, + .detach_dev = sprd_iommu_detach_device, + .map = sprd_iommu_map, + .unmap = sprd_iommu_unmap, + .iotlb_sync_map = sprd_iommu_sync_map, + .iotlb_sync = sprd_iommu_sync, + .iova_to_phys = sprd_iommu_iova_to_phys, + .probe_device = sprd_iommu_probe_device, + .release_device = sprd_iommu_release_device, + .device_group = sprd_iommu_device_group, + .of_xlate = sprd_iommu_of_xlate, + .pgsize_bitmap = ~0UL << SPRD_IOMMU_PAGE_SHIFT, +}; + +static const struct of_device_id sprd_iommu_of_match[] = { + { .compatible = "sprd,iommu-v1" }, + { }, +}; +MODULE_DEVICE_TABLE(of, sprd_iommu_of_match); + +/* + * Clock is not required, access to some of IOMMUs is controlled by gate + * clk, enabled clocks for that kind of IOMMUs before accessing. + * Return 0 for success or no clocks found. + */ +static int sprd_iommu_clk_enable(struct sprd_iommu_device *sdev) +{ + struct clk *eb; + + eb = clk_get_optional(sdev->dev, 0); + if (!eb) + return 0; + + if (IS_ERR(eb)) + return PTR_ERR(eb); + + sdev->eb = eb; + return clk_prepare_enable(eb); +} + +static void sprd_iommu_clk_disable(struct sprd_iommu_device *sdev) +{ + if (sdev->eb) + clk_disable_unprepare(sdev->eb); +} + +static int sprd_iommu_probe(struct platform_device *pdev) +{ + struct sprd_iommu_device *sdev; + struct device *dev = &pdev->dev; + void __iomem *base; + int ret; + + sdev = devm_kzalloc(dev, sizeof(*sdev), GFP_KERNEL); + if (!sdev) + return -ENOMEM; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) { + dev_err(dev, "Failed to get ioremap resource.\n"); + return PTR_ERR(base); + } + sdev->base = base; + + sdev->prot_page_va = dma_alloc_coherent(dev, SPRD_IOMMU_PAGE_SIZE, + &sdev->prot_page_pa, GFP_KERNEL); + if (!sdev->prot_page_va) + return -ENOMEM; + + platform_set_drvdata(pdev, sdev); + sdev->dev = dev; + + /* All the client devices are in the same iommu-group */ + sdev->group = iommu_group_alloc(); + if (IS_ERR(sdev->group)) { + ret = PTR_ERR(sdev->group); + goto free_page; + } + + ret = iommu_device_sysfs_add(&sdev->iommu, dev, NULL, dev_name(dev)); + if (ret) + goto put_group; + + iommu_device_set_ops(&sdev->iommu, &sprd_iommu_ops); + iommu_device_set_fwnode(&sdev->iommu, &dev->of_node->fwnode); + + ret = iommu_device_register(&sdev->iommu); + if (ret) + goto remove_sysfs; + + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &sprd_iommu_ops); + + ret = sprd_iommu_clk_enable(sdev); + if (ret) + goto unregister_iommu; + + ret = sprd_iommu_get_version(sdev); + if (ret < 0) { + dev_err(dev, "IOMMU version(%d) is invalid.\n", ret); + goto disable_clk; + } + sdev->ver = ret; + + return 0; + +disable_clk: + sprd_iommu_clk_disable(sdev); +unregister_iommu: + iommu_device_unregister(&sdev->iommu); +remove_sysfs: + iommu_device_sysfs_remove(&sdev->iommu); +put_group: + iommu_group_put(sdev->group); +free_page: + dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa); + return ret; +} + +static int sprd_iommu_remove(struct platform_device *pdev) +{ + struct sprd_iommu_device *sdev = platform_get_drvdata(pdev); + + dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa); + + iommu_group_put(sdev->group); + sdev->group = NULL; + + bus_set_iommu(&platform_bus_type, NULL); + + platform_set_drvdata(pdev, NULL); + iommu_device_sysfs_remove(&sdev->iommu); + iommu_device_unregister(&sdev->iommu); + + return 0; +} + +static struct platform_driver sprd_iommu_driver = { + .driver = { + .name = "sprd-iommu", + .of_match_table = sprd_iommu_of_match, + .suppress_bind_attrs = true, + }, + .probe = sprd_iommu_probe, + .remove = sprd_iommu_remove, +}; +module_platform_driver(sprd_iommu_driver); + +MODULE_DESCRIPTION("IOMMU driver for Unisoc SoCs"); +MODULE_ALIAS("platform:sprd-iommu"); +MODULE_LICENSE("GPL"); -- Gitee From b74a572b1d39842fb66c1eae39687e85bf08231f Mon Sep 17 00:00:00 2001 From: Kyung Min Park Date: Sun, 14 Mar 2021 13:15:34 -0700 Subject: [PATCH 30/48] iommu/vt-d: Disable SVM when ATS/PRI/PASID are not enabled in the device mainline inclusion from mainline-v5.13-rc1 commit 028e0f9de7c42d32dd88652c59c494652b5010ec category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 028e0f9de7c4 iommu/vt-d: Disable SVM when ATS/PRI/PASID are not Backport for intel SPR ENQCMD Instruction support -------------------------------- Currently, the Intel VT-d supports Shared Virtual Memory (SVM) only when IO page fault is supported. Otherwise, shared memory pages can not be swapped out and need to be pinned. The device needs the Address Translation Service (ATS), Page Request Interface (PRI) and Process Address Space Identifier (PASID) capabilities to be enabled to support IO page fault. Disable SVM when ATS, PRI and PASID are not enabled in the device. Signed-off-by: Kyung Min Park Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210314201534.918-1-kyung.min.park@intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8b6d10c2add5..d953619bd819 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6197,6 +6197,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) + return -EINVAL; + if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) return 0; } -- Gitee From d6ff54ac79caa9c7f471e5f06aa260cb17e2d60f Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:57 +0800 Subject: [PATCH 31/48] iommu/vt-d: Remove svm_dev_ops mainline inclusion from mainline-v5.13-rc1 commit 03744d588567aae6c3ae0dfc8fcb81a89ac21f55 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 03744d588567 iommu/vt-d: Remove svm_dev_ops Backport for intel SPR ENQCMD Instruction support -------------------------------- The svm_dev_ops has never been referenced in the tree, and there's no plan to have anything to use it. Remove it to make the code neat. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 15 +-------------- include/linux/intel-iommu.h | 3 --- include/linux/intel-svm.h | 7 ------- 3 files changed, 1 insertion(+), 24 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index aaf9f9589bdc..18fa4bdab92c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -462,7 +462,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) /* Caller must hold pasid_mutex, mm reference */ static int intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct svm_dev_ops *ops, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); @@ -512,10 +511,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Find the matching device in svm list */ for_each_svm_dev(sdev, svm, dev) { - if (sdev->ops != ops) { - ret = -EBUSY; - goto out; - } sdev->users++; goto success; } @@ -550,7 +545,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Finish the setup now we know we're keeping it */ sdev->users = 1; - sdev->ops = ops; init_rcu_head(&sdev->rcu); if (!svm) { @@ -1006,13 +1000,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) mmap_read_unlock(svm->mm); mmput(svm->mm); bad_req: - WARN_ON(!sdev); - if (sdev && sdev->ops && sdev->ops->fault_cb) { - int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->pm_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, - req->priv_data, rwxp, result); - } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; @@ -1087,7 +1074,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) if (drvdata) flags = *(unsigned int *)drvdata; mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev); + ret = intel_svm_bind_mm(dev, flags, mm, &sdev); if (ret) sva = ERR_PTR(ret); else if (sdev) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 805e748f7eae..0e157779c977 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -761,14 +761,11 @@ u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); -struct svm_dev_ops; - struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; int users; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 39d368a810b8..6c9d10c0fb1e 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -8,13 +8,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -struct device; - -struct svm_dev_ops { - void (*fault_cb)(struct device *dev, u32 pasid, u64 address, - void *private, int rwxp, int response); -}; - /* Values for rxwp in fault_cb callback */ #define SVM_REQ_READ (1<<3) #define SVM_REQ_WRITE (1<<2) -- Gitee From 4e873f85d989156056ceabb7843fe9a007bbaba2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Mar 2021 09:05:58 +0800 Subject: [PATCH 32/48] iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID mainline inclusion from mainline-v5.13-rc1 commit 072df4e47af508ef272f0d0ca8fd29a67df782ce category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 072df4e47af5 iommu/vt-d: Remove SVM_FLAG_PRIVATE_PASID Backport for intel SPR ENQCMD Instruction support -------------------------------- The SVM_FLAG_PRIVATE_PASID has never been referenced in the tree, and there's no plan to have anything to use it. So cleanup it. Signed-off-by: Lu Baolu Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210323010600.678627-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 40 ++++++++++++++++++--------------------- include/linux/intel-svm.h | 16 +++------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18fa4bdab92c..069b539db81a 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -465,9 +465,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct mm_struct *mm, struct intel_svm_dev **sd) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct intel_svm *svm = NULL, *t; struct device_domain_info *info; struct intel_svm_dev *sdev; - struct intel_svm *svm = NULL; unsigned long iflags; int pasid_max; int ret; @@ -493,30 +493,26 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - if (!(flags & SVM_FLAG_PRIVATE_PASID)) { - struct intel_svm *t; - - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID)) - continue; - - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; - } + list_for_each_entry(t, &global_svm_list, list) { + if (t->mm != mm) + continue; - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; - } + svm = t; + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } + + break; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 6c9d10c0fb1e..10fa80eef13a 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -14,16 +14,6 @@ #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) -/* - * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" - * PASID for the current process. Even if a PASID already exists, a new one - * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID - * will not be given to subsequent callers. This facility allows a driver to - * disambiguate between multiple device contexts which access the same MM, - * if there is no other way to do so. It should be used sparingly, if at all. - */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) - /* * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only * for access to kernel addresses. No IOTLB flushes are automatically done @@ -35,18 +25,18 @@ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to * do such IOTLB flushes automatically. */ -#define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#define SVM_FLAG_SUPERVISOR_MODE BIT(0) /* * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest * processes. Compared to the host bind, the primary differences are: * 1. mm life cycle management * 2. fault reporting */ -#define SVM_FLAG_GUEST_MODE (1<<2) +#define SVM_FLAG_GUEST_MODE BIT(1) /* * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space, * which requires guest and host PASID translation at both directions. */ -#define SVM_FLAG_GUEST_PASID (1<<3) +#define SVM_FLAG_GUEST_PASID BIT(2) #endif /* __INTEL_SVM_H__ */ -- Gitee From 368077c3ad59c134859d61f118dcffba0a9d41c4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:58 +0800 Subject: [PATCH 33/48] iommu/vt-d: Add pasid private data helpers mainline inclusion from mainline-v5.14-rc1 commit 2516f1b5a449b38946a4a1b146e8d3b1d8f13102 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 2516f1b5a449 iommu/vt-d: Add pasid private data helpers Backport for intel SPR ENQCMD Instruction support -------------------------------- We are about to use iommu_sva_alloc/free_pasid() helpers in iommu core. That means the pasid life cycle will be managed by iommu core. Use a local array to save the per pasid private data instead of attaching it the real pasid. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-7-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 62 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 069b539db81a..4e036e71b8c2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,23 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid); #define PRQ_ORDER 0 +static DEFINE_XARRAY_ALLOC(pasid_private_array); +static int pasid_private_add(ioasid_t pasid, void *priv) +{ + return xa_alloc(&pasid_private_array, &pasid, priv, + XA_LIMIT(pasid, pasid), GFP_ATOMIC); +} + +static void pasid_private_remove(ioasid_t pasid) +{ + xa_erase(&pasid_private_array, pasid); +} + +static void *pasid_private_find(ioasid_t pasid) +{ + return xa_load(&pasid_private_array, pasid); +} + int intel_svm_enable_prq(struct intel_iommu *iommu) { struct page *pages; @@ -224,7 +242,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, if (pasid == INVALID_IOASID || pasid >= PASID_MAX) return -EINVAL; - svm = ioasid_find(NULL, pasid, NULL); + svm = pasid_private_find(pasid); if (IS_ERR(svm)) return PTR_ERR(svm); @@ -334,7 +352,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, svm->gpasid = data->gpasid; svm->flags |= SVM_FLAG_GUEST_PASID; } - ioasid_set_data(data->hpasid, svm); + pasid_private_add(data->hpasid, svm); INIT_LIST_HEAD_RCU(&svm->devs); mmput(svm->mm); } @@ -388,7 +406,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, list_add_rcu(&sdev->list, &svm->devs); out: if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) { - ioasid_set_data(data->hpasid, NULL); + pasid_private_remove(data->hpasid); kfree(svm); } @@ -431,7 +449,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid) * the unbind, IOMMU driver will get notified * and perform cleanup. */ - ioasid_set_data(pasid, NULL); + pasid_private_remove(pasid); kfree(svm); } } @@ -547,8 +565,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) { ret = -ENOMEM; - kfree(sdev); - goto out; + goto sdev_err; } if (pasid_max > intel_pasid_max_id) @@ -556,13 +573,16 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, /* Do not use PASID 0, reserved for RID to PASID */ svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, svm); + pasid_max - 1, NULL); if (svm->pasid == INVALID_IOASID) { - kfree(svm); - kfree(sdev); ret = -ENOSPC; - goto out; + goto svm_err; } + + ret = pasid_private_add(svm->pasid, svm); + if (ret) + goto pasid_err; + svm->notifier.ops = &intel_mmuops; svm->mm = mm; svm->flags = flags; @@ -571,12 +591,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, ret = -ENOMEM; if (mm) { ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - ioasid_put(svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } + if (ret) + goto priv_err; } spin_lock_irqsave(&iommu->lock, iflags); @@ -590,8 +606,13 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); +priv_err: + pasid_private_remove(svm->pasid); +pasid_err: ioasid_put(svm->pasid); +svm_err: kfree(svm); +sdev_err: kfree(sdev); goto out; } @@ -614,10 +635,8 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - kfree(sdev); - goto out; - } + if (ret) + goto sdev_err; } list_add_rcu(&sdev->list, &svm->devs); success: @@ -670,6 +689,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) load_pasid(svm->mm, PASID_DISABLED); } list_del(&svm->list); + pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -925,7 +945,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) } if (!svm || svm->pasid != req->pasid) { rcu_read_lock(); - svm = ioasid_find(NULL, req->pasid, NULL); + svm = pasid_private_find(req->pasid); /* It *can't* go away, because the driver is not permitted * to unbind the mm while any page faults are outstanding. * So we only need RCU to protect the internal idr code. */ -- Gitee From 03d41eb7ade8f6976be8288d2effb96ef1eb1e67 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 10 Jun 2021 10:00:59 +0800 Subject: [PATCH 34/48] iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers mainline inclusion from mainline-v5.14-rc1 commit 761fed4ec0c575a77f93ae6eafe76fb43566cb18 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 761fed4ec0c5 iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers Backport for intel SPR ENQCMD Instruction support -------------------------------- Align the pasid alloc/free code with the generic helpers defined in the iommu core. This also refactored the SVA binding code to improve the readability. Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-8-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/Kconfig | 1 + drivers/iommu/intel/iommu.c | 3 + drivers/iommu/intel/svm.c | 278 +++++++++++++++--------------------- include/linux/intel-iommu.h | 1 - 4 files changed, 120 insertions(+), 163 deletions(-) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 5337ee1584b0..4a6919f95094 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,6 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID + select IOMMU_SVA_LIB help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d953619bd819..89bb473fa11f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -6197,6 +6197,9 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) if (!info) return -EINVAL; + if (intel_iommu_enable_pasid(info->iommu, dev)) + return -ENODEV; + if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled) return -EINVAL; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4e036e71b8c2..7675de59e60b 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -23,9 +23,11 @@ #include #include "pasid.h" +#include "../iommu-sva-lib.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, u32 pasid); +#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) #define PRQ_ORDER 0 @@ -222,7 +224,6 @@ static const struct mmu_notifier_ops intel_mmuops = { }; static DEFINE_MUTEX(pasid_mutex); -static LIST_HEAD(global_svm_list); #define for_each_svm_dev(sdev, svm, d) \ list_for_each_entry((sdev), &(svm)->devs, list) \ @@ -477,79 +478,80 @@ static void load_pasid(struct mm_struct *mm, u32 pasid) mutex_unlock(&mm->context.lock); } -/* Caller must hold pasid_mutex, mm reference */ -static int -intel_svm_bind_mm(struct device *dev, unsigned int flags, - struct mm_struct *mm, struct intel_svm_dev **sd) +static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, + unsigned int flags) { - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - struct intel_svm *svm = NULL, *t; - struct device_domain_info *info; - struct intel_svm_dev *sdev; - unsigned long iflags; - int pasid_max; - int ret; + ioasid_t max_pasid = dev_is_pci(dev) ? + pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - if (!iommu || dmar_disabled) - return -EINVAL; + return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); +} - if (!intel_svm_capable(iommu)) - return -ENOTSUPP; +static void intel_svm_free_pasid(struct mm_struct *mm) +{ + iommu_sva_free_pasid(mm); +} - if (dev_is_pci(dev)) { - pasid_max = pci_max_pasids(to_pci_dev(dev)); - if (pasid_max < 0) - return -EINVAL; - } else - pasid_max = 1 << 20; +static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, + struct device *dev, + struct mm_struct *mm, + unsigned int flags) +{ + struct device_domain_info *info = get_domain_info(dev); + unsigned long iflags, sflags; + struct intel_svm_dev *sdev; + struct intel_svm *svm; + int ret = 0; - /* Bind supervisor PASID shuld have mm = NULL */ - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap) || mm) { - pr_err("Supervisor PASID with user provided mm.\n"); - return -EINVAL; - } - } + svm = pasid_private_find(mm->pasid); + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) + return ERR_PTR(-ENOMEM); - list_for_each_entry(t, &global_svm_list, list) { - if (t->mm != mm) - continue; + svm->pasid = mm->pasid; + svm->mm = mm; + svm->flags = flags; + INIT_LIST_HEAD_RCU(&svm->devs); - svm = t; - if (svm->pasid >= pasid_max) { - dev_warn(dev, - "Limited PASID width. Cannot use existing PASID %d\n", - svm->pasid); - ret = -ENOSPC; - goto out; + if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); + } } - /* Find the matching device in svm list */ - for_each_svm_dev(sdev, svm, dev) { - sdev->users++; - goto success; + ret = pasid_private_add(svm->pasid, svm); + if (ret) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + kfree(svm); + return ERR_PTR(ret); } + } - break; + /* Find the matching device in svm list */ + for_each_svm_dev(sdev, svm, dev) { + sdev->users++; + goto success; } sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; - goto out; + goto free_svm; } + sdev->dev = dev; sdev->iommu = iommu; - - ret = intel_iommu_enable_pasid(iommu, dev); - if (ret) { - kfree(sdev); - goto out; - } - - info = get_domain_info(dev); sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->users = 1; + sdev->pasid = svm->pasid; + sdev->sva.dev = dev; + init_rcu_head(&sdev->rcu); if (info->ats_enabled) { sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; @@ -557,96 +559,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, sdev->qdep = 0; } - /* Finish the setup now we know we're keeping it */ - sdev->users = 1; - init_rcu_head(&sdev->rcu); - - if (!svm) { - svm = kzalloc(sizeof(*svm), GFP_KERNEL); - if (!svm) { - ret = -ENOMEM; - goto sdev_err; - } - - if (pasid_max > intel_pasid_max_id) - pasid_max = intel_pasid_max_id; - - /* Do not use PASID 0, reserved for RID to PASID */ - svm->pasid = ioasid_alloc(NULL, PASID_MIN, - pasid_max - 1, NULL); - if (svm->pasid == INVALID_IOASID) { - ret = -ENOSPC; - goto svm_err; - } - - ret = pasid_private_add(svm->pasid, svm); - if (ret) - goto pasid_err; + /* Setup the pasid table: */ + sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? + PASID_FLAG_SUPERVISOR_MODE : 0; + sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + spin_lock_irqsave(&iommu->lock, iflags); + ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, + FLPT_DEFAULT_DID, sflags); + spin_unlock_irqrestore(&iommu->lock, iflags); - svm->notifier.ops = &intel_mmuops; - svm->mm = mm; - svm->flags = flags; - INIT_LIST_HEAD_RCU(&svm->devs); - INIT_LIST_HEAD(&svm->list); - ret = -ENOMEM; - if (mm) { - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) - goto priv_err; - } + if (ret) + goto free_sdev; - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) { - if (mm) - mmu_notifier_unregister(&svm->notifier, mm); -priv_err: - pasid_private_remove(svm->pasid); -pasid_err: - ioasid_put(svm->pasid); -svm_err: - kfree(svm); -sdev_err: - kfree(sdev); - goto out; - } + /* The newly allocated pasid is loaded to the mm. */ + if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs)) + load_pasid(mm, svm->pasid); - list_add_tail(&svm->list, &global_svm_list); - if (mm) { - /* The newly allocated pasid is loaded to the mm. */ - load_pasid(mm, svm->pasid); - } - } else { - /* - * Binding a new device with existing PASID, need to setup - * the PASID entry. - */ - spin_lock_irqsave(&iommu->lock, iflags); - ret = intel_pasid_setup_first_level(iommu, dev, - mm ? mm->pgd : init_mm.pgd, - svm->pasid, FLPT_DEFAULT_DID, - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | - (cpu_feature_enabled(X86_FEATURE_LA57) ? - PASID_FLAG_FL5LP : 0)); - spin_unlock_irqrestore(&iommu->lock, iflags); - if (ret) - goto sdev_err; - } list_add_rcu(&sdev->list, &svm->devs); success: - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; - if (sd) - *sd = sdev; - ret = 0; -out: - return ret; + return &sdev->sva; + +free_sdev: + kfree(sdev); +free_svm: + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(mm->pasid); + kfree(svm); + } + + return ERR_PTR(ret); } /* Caller must hold pasid_mutex */ @@ -655,6 +598,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; + struct mm_struct *mm; int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); @@ -664,6 +608,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); if (ret) goto out; + mm = svm->mm; if (sdev) { sdev->users--; @@ -682,13 +627,12 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - ioasid_put(svm->pasid); - if (svm->mm) { - mmu_notifier_unregister(&svm->notifier, svm->mm); + intel_svm_free_pasid(mm); + if (svm->notifier.ops) { + mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ - load_pasid(svm->mm, PASID_DISABLED); + load_pasid(mm, PASID_DISABLED); } - list_del(&svm->list); pasid_private_remove(svm->pasid); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. @@ -1073,31 +1017,42 @@ static irqreturn_t prq_event_thread(int irq, void *d) return IRQ_RETVAL(handled); } -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) -struct iommu_sva * -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) { - struct iommu_sva *sva = ERR_PTR(-EINVAL); - struct intel_svm_dev *sdev = NULL; + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); unsigned int flags = 0; + struct iommu_sva *sva; int ret; - /* - * TODO: Consolidate with generic iommu-sva bind after it is merged. - * It will require shared SVM data structures, i.e. combine io_mm - * and intel_svm etc. - */ if (drvdata) flags = *(unsigned int *)drvdata; + + if (flags & SVM_FLAG_SUPERVISOR_MODE) { + if (!ecap_srs(iommu->ecap)) { + dev_err(dev, "%s: Supervisor PASID not supported\n", + iommu->name); + return ERR_PTR(-EOPNOTSUPP); + } + + if (mm) { + dev_err(dev, "%s: Supervisor PASID with user provided mm\n", + iommu->name); + return ERR_PTR(-EINVAL); + } + + mm = &init_mm; + } + mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(dev, flags, mm, &sdev); - if (ret) - sva = ERR_PTR(ret); - else if (sdev) - sva = &sdev->sva; - else - WARN(!sdev, "SVM bind succeeded with no sdev!\n"); + ret = intel_svm_alloc_pasid(dev, mm, flags); + if (ret) { + mutex_unlock(&pasid_mutex); + return ERR_PTR(ret); + } + sva = intel_svm_bind_mm(iommu, dev, mm, flags); + if (IS_ERR_OR_NULL(sva)) + intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; @@ -1105,10 +1060,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) void intel_svm_unbind(struct iommu_sva *sva) { - struct intel_svm_dev *sdev; + struct intel_svm_dev *sdev = to_intel_svm_dev(sva); mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); intel_svm_unbind_mm(sdev->dev, sdev->pasid); mutex_unlock(&pasid_mutex); } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0e157779c977..3f54def870b8 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -782,7 +782,6 @@ struct intel_svm { u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ struct list_head devs; - struct list_head list; }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} -- Gitee From 5c72237a2bd7d4e51fe6b09add8bb2b3c4ce4f07 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Jul 2021 18:08:22 -0700 Subject: [PATCH 35/48] mm: add setup_initial_init_mm() helper mainline inclusion from mainline-v5.14-rc1 commit 8d0c7adaee0899a8b4c2e789fc8c237bb359afe1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 8d0c7adaee08 mm: add setup_initial_init_mm() helper Backport for intel SPR ENQCMD Instruction support -------------------------------- Patch series "init_mm: cleanup ARCH's text/data/brk setup code", v3. Add setup_initial_init_mm() helper, then use it to cleanup the text, data and brk setup code. This patch (of 15): Add setup_initial_init_mm() helper to setup kernel text, data and brk. Link: https://lkml.kernel.org/r/20210608083418.137226-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20210608083418.137226-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Souptick Joarder Cc: Christophe Leroy Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guo Ren Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jonas Bonn Cc: Ley Foon Tan Cc: Michael Ellerman Cc: Nick Hu Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Rich Felker Cc: Russell King (Oracle) Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Thomas Gleixner Cc: Vasily Gorbik Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jun Yang --- include/linux/mm.h | 3 +++ mm/init-mm.c | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index a886f48b6a0e..e73f98751680 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -246,6 +246,9 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk); + /* * Linux kernel virtual memory manager primitives. * The idea being to have a "virtual" mm in the same way diff --git a/mm/init-mm.c b/mm/init-mm.c index 153162669f80..b4a6f38fb51d 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -40,3 +40,12 @@ struct mm_struct init_mm = { .cpu_bitmap = CPU_BITS_NONE, INIT_MM_CONTEXT(init_mm) }; + +void setup_initial_init_mm(void *start_code, void *end_code, + void *end_data, void *brk) +{ + init_mm.start_code = (unsigned long)start_code; + init_mm.end_code = (unsigned long)end_code; + init_mm.end_data = (unsigned long)end_data; + init_mm.brk = (unsigned long)brk; +} -- Gitee From cf7774ce0f3842621b2a7cc8bdbf85be1029ac69 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 7 Jul 2021 18:09:03 -0700 Subject: [PATCH 36/48] x86: convert to setup_initial_init_mm() mainline inclusion from mainline-v5.14-rc1 commit cccb423b3ee1c03cde5fd9ed4b43d1ea0a862525 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit cccb423b3ee1 x86: convert to setup_initial_init_mm() Backport for intel SPR ENQCMD Instruction support -------------------------------- Use setup_initial_init_mm() helper to simplify code. Link: https://lkml.kernel.org/r/20210608083418.137226-16-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jun Yang --- arch/x86/kernel/setup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 62df2aa1ac32..bee79d4fb2cb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -728,10 +728,7 @@ void __init setup_arch(char **cmdline_p) if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = _brk_end; + setup_initial_init_mm(_text, _etext, _edata, (void *)_brk_end); code_resource.start = __pa_symbol(_text); code_resource.end = __pa_symbol(_etext)-1; -- Gitee From 7426c258353148492ed888dd93f30f103373495e Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 Aug 2021 20:43:20 +0800 Subject: [PATCH 37/48] iommu/vt-d: Fix PASID reference leak mainline inclusion from mainline-v5.14-rc7 commit fdfc71c26760521e81bbae095336fb32ac087e3b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit fdfc71c26760 iommu/vt-d: Fix PASID reference leak Backport for intel SPR ENQCMD Instruction support -------------------------------- A PASID reference is increased whenever a device is bound to an mm (and its PASID) successfully (i.e. the device's sdev user count is increased). But the reference is not dropped every time the device is unbound successfully from the mm (i.e. the device's sdev user count is decreased). The reference is dropped only once by calling intel_svm_free_pasid() when there isn't any device bound to the mm. intel_svm_free_pasid() drops the reference and only frees the PASID on zero reference. Fix the issue by dropping the PASID reference and freeing the PASID when no reference on successful unbinding the device by calling intel_svm_free_pasid() . Fixes: 4048377414162 ("iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers") Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20210813181345.1870742-1-fenghua.yu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210817124321.1517985-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/intel/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7675de59e60b..8825574dcef2 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -627,7 +627,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - intel_svm_free_pasid(mm); if (svm->notifier.ops) { mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ @@ -642,6 +641,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } + /* Drop a PASID reference and free it if no reference. */ + intel_svm_free_pasid(mm); } out: return ret; -- Gitee From 5a4af60d955c5c2bcfbda0509a245166a83e7dd0 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 28 Jan 2022 18:44:33 +0800 Subject: [PATCH 38/48] iommu: Fix some W=1 warnings mainline inclusion from mainline-v5.17-rc3 commit 2c3b4f8d87877da9ae8281e1fcba64ec1663b381 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 2c3b4f8d8787 iommu: Fix some W=1 warnings Backport for intel SPR ENQCMD Instruction support -------------------------------- The code is mostly free of W=1 warning, so fix the following: drivers/iommu/iommu.c:996: warning: expecting prototype for iommu_group_for_each_dev(). Prototype was for __iommu_group_for_each_dev() instead drivers/iommu/iommu.c:3048: warning: Function parameter or member 'drvdata' not described in 'iommu_sva_bind_device' drivers/iommu/ioasid.c:354: warning: Function parameter or member 'ioasid' not described in 'ioasid_get' drivers/iommu/omap-iommu.c:1098: warning: expecting prototype for omap_iommu_suspend_prepare(). Prototype was for omap_iommu_prepare() instead Signed-off-by: John Garry Reviewed-by: Robin Murphy Link: https://lore.kernel.org/r/1643366673-26803-1-git-send-email-john.garry@huawei.com Signed-off-by: Joerg Roedel Signed-off-by: Jun Yang --- drivers/iommu/ioasid.c | 1 + drivers/iommu/iommu.c | 24 ++++++++++++------------ drivers/iommu/omap-iommu.c | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 50ee27bbd04e..06fee7416816 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -349,6 +349,7 @@ EXPORT_SYMBOL_GPL(ioasid_alloc); /** * ioasid_get - obtain a reference to the IOASID + * @ioasid: the ID to get */ void ioasid_get(ioasid_t ioasid) { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e61d16f0ede2..b3d2555a794b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -968,17 +968,6 @@ static int iommu_group_device_count(struct iommu_group *group) return ret; } -/** - * iommu_group_for_each_dev - iterate over each device in the group - * @group: the group - * @data: caller opaque data to be passed to callback function - * @fn: caller supplied callback function - * - * This function is called by group users to iterate over group devices. - * Callers should hold a reference count to the group during callback. - * The group->mutex is held across callbacks, which will block calls to - * iommu_group_add/remove_device. - */ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -993,7 +982,17 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, return ret; } - +/** + * iommu_group_for_each_dev - iterate over each device in the group + * @group: the group + * @data: caller opaque data to be passed to callback function + * @fn: caller supplied callback function + * + * This function is called by group users to iterate over group devices. + * Callers should hold a reference count to the group during callback. + * The group->mutex is held across callbacks, which will block calls to + * iommu_group_add/remove_device. + */ int iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -3486,6 +3485,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_group); * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device * @mm: the mm to bind, caller must hold a reference to it + * @drvdata: opaque data pointer to pass to bind callback * * Create a bond between device and address space, allowing the device to access * the mm using the returned PASID. If a bond already exists between @device and diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 71f29c0927fc..cf5e9cb2cb26 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1086,7 +1086,7 @@ static __maybe_unused int omap_iommu_runtime_resume(struct device *dev) } /** - * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation + * omap_iommu_prepare - prepare() dev_pm_ops implementation * @dev: iommu device * * This function performs the necessary checks to determine if the IOMMU -- Gitee From d1d4324200507c2b6a2757806fb6c802ad9437d1 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:44 -0800 Subject: [PATCH 39/48] iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA mainline inclusion from mainline-v5.18-rc1 commit 8e71ebc3c8dfbc1bac3876bfb1e5f5264fabe47b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 8e71ebc3c8df iommu/sva: Rename CONFIG_IOMMU_SVA_LIB to CONFIG_IOMMU_SVA Backport for intel SPR ENQCMD Instruction support -------------------------------- This CONFIG option originally only referred to the Shared Virtual Address (SVA) library. But it is now also used for non-library portions of code. Drop the "_LIB" suffix so that there is just one configuration option for all code relating to SVA. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-2-fenghua.yu@intel.com Signed-off-by: Jun Yang --- drivers/iommu/Kconfig | 6 +++--- drivers/iommu/Makefile | 2 +- drivers/iommu/intel/Kconfig | 2 +- drivers/iommu/iommu-sva-lib.h | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3e393dca0173..d6cb78994461 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -103,8 +103,8 @@ config IOMMU_DMA select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH -# Shared Virtual Addressing library -config IOMMU_SVA_LIB +# Shared Virtual Addressing +config IOMMU_SVA bool select IOASID @@ -318,7 +318,7 @@ config ARM_SMMU_V3 config ARM_SMMU_V3_SVA bool "Shared Virtual Addressing support for the ARM SMMUv3" depends on ARM_SMMU_V3 - select IOMMU_SVA_LIB + select IOMMU_SVA select MMU_NOTIFIER help Support for sharing process address spaces with devices using the diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 0ac9c7fb4b8f..88337bfc0017 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -27,5 +27,5 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 4a6919f95094..cecdad7f2aba 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -40,7 +40,7 @@ config INTEL_IOMMU_SVM select PCI_PRI select MMU_NOTIFIER select IOASID - select IOMMU_SVA_LIB + select IOMMU_SVA help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 031155010ca8..95dc3ebc1928 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -17,7 +17,7 @@ struct device; struct iommu_fault; struct iopf_queue; -#ifdef CONFIG_IOMMU_SVA_LIB +#ifdef CONFIG_IOMMU_SVA int iommu_queue_iopf(struct iommu_fault *fault, void *cookie); int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); @@ -28,7 +28,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); -#else /* CONFIG_IOMMU_SVA_LIB */ +#else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) { return -ENODEV; @@ -64,5 +64,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } -#endif /* CONFIG_IOMMU_SVA_LIB */ +#endif /* CONFIG_IOMMU_SVA */ #endif /* _IOMMU_SVA_LIB_H */ -- Gitee From a9edf588e489204c3238825c7a9aa34e59a1e0d1 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:45 -0800 Subject: [PATCH 40/48] mm: Change CONFIG option for mm->pasid field mainline inclusion from mainline-v5.18-rc1 commit a5acc88f1ab7b60ff450665a9bd7011671724193 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit a5acc88f1ab7 mm: Change CONFIG option for mm->pasid field Backport for intel SPR ENQCMD Instruction support -------------------------------- This currently depends on CONFIG_IOMMU_SUPPORT. But it is only needed when CONFIG_IOMMU_SVA option is enabled. Change the CONFIG guards around definition and initialization of mm->pasid field. YES, refreshed. Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-3-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/mm_types.h | 2 +- kernel/fork.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c22e294f083..d9d10990661b 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -582,7 +582,7 @@ struct mm_struct { #endif struct work_struct async_put_work; -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA u32 pasid; #endif diff --git a/kernel/fork.c b/kernel/fork.c index 0fb86b65ae60..e73539a6b54a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -997,7 +997,7 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) static void mm_init_pasid(struct mm_struct *mm) { -#ifdef CONFIG_IOMMU_SUPPORT +#ifdef CONFIG_IOMMU_SVA mm->pasid = INIT_PASID; #endif } -- Gitee From cf2458980034ff5366c6eef3c85fa10b58b799d5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:46 -0800 Subject: [PATCH 41/48] iommu/ioasid: Introduce a helper to check for valid PASIDs mainline inclusion from mainline-v5.18-rc1 commit 35436e33a37d7686ef2a7a36706ba367ff82587e category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 35436e33a37d iommu/ioasid: Introduce a helper to check for valid PASIDs Backport for intel SPR ENQCMD Instruction support -------------------------------- Define a pasid_valid() helper to check if a given PASID is valid. [ bp: Massage commit message. ] Suggested-by: Ashok Raj Suggested-by: Jacob Pan Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20220207230254.3342514-4-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/ioasid.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index e9dacd4b9f6b..2237f64dbaae 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -41,6 +41,10 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); int ioasid_set_data(ioasid_t ioasid, void *data); +static inline bool pasid_valid(ioasid_t ioasid) +{ + return ioasid != INVALID_IOASID; +} #else /* !CONFIG_IOASID */ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, @@ -78,5 +82,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) return -ENOTSUPP; } +static inline bool pasid_valid(ioasid_t ioasid) +{ + return false; +} + #endif /* CONFIG_IOASID */ #endif /* __LINUX_IOASID_H */ -- Gitee From ac1a01721389a368c687857ba8406eaabd77c3d8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:47 -0800 Subject: [PATCH 42/48] kernel/fork: Initialize mm's PASID mainline inclusion from mainline-v5.18-rc1 commit 5a5686b57471cd4b0a03c4aa89d40de5ff3845ac category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 5a5686b57471 kernel/fork: Initialize mm's PASID Backport for intel SPR ENQCMD Instruction support -------------------------------- A new mm doesn't have a PASID yet when it's created. Initialize the mm's PASID on fork() or for init_mm to INVALID_IOASID (-1). INIT_PASID (0) is reserved for kernel legacy DMA PASID. It cannot be allocated to a user process. Initializing the process's PASID to 0 may cause confusion that's why the process uses the reserved kernel legacy DMA PASID. Initializing the PASID to INVALID_IOASID (-1) explicitly tells the process doesn't have a valid PASID yet. Even though the only user of mm_pasid_init() is in fork.c, define it in as the first of three mm/pasid life cycle functions (init/set/drop) to keep these all together. Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-5-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/sched/mm.h | 10 ++++++++++ kernel/fork.c | 11 ++--------- mm/init-mm.c | 4 ++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index dc1f4dcd9a82..b697833b4362 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Routines for handling mm_structs @@ -364,4 +365,13 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) } #endif +#ifdef CONFIG_IOMMU_SVA +static inline void mm_pasid_init(struct mm_struct *mm) +{ + mm->pasid = INVALID_IOASID; +} +#else +static inline void mm_pasid_init(struct mm_struct *mm) {} +#endif + #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index e73539a6b54a..87a77c3061d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,8 +97,8 @@ #include #include #include +#include -#include #include #include #include @@ -995,13 +995,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static void mm_init_pasid(struct mm_struct *mm) -{ -#ifdef CONFIG_IOMMU_SVA - mm->pasid = INIT_PASID; -#endif -} - static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1032,7 +1025,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); - mm_init_pasid(mm); + mm_pasid_init(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); diff --git a/mm/init-mm.c b/mm/init-mm.c index b4a6f38fb51d..fbe7844d0912 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -10,6 +10,7 @@ #include #include +#include #include #ifndef INIT_MM_CONTEXT @@ -38,6 +39,9 @@ struct mm_struct init_mm = { .mmlist = LIST_HEAD_INIT(init_mm.mmlist), .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, +#ifdef CONFIG_IOMMU_SVA + .pasid = INVALID_IOASID, +#endif INIT_MM_CONTEXT(init_mm) }; -- Gitee From b62d42e95156966053fd983d7775bc6d0378dd40 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:48 -0800 Subject: [PATCH 43/48] iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit mainline inclusion from mainline-v5.18-rc1 commit bb851179d340e4779a498dc9380a9fafdb726dfd category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit bb851179d340 iommu/sva: Assign a PASID to mm on PASID allocation and free it on mm exit Backport for intel SPR ENQCMD Instruction support -------------------------------- PASIDs are process-wide. It was attempted to use refcounted PASIDs to free them when the last thread drops the refcount. This turned out to be complex and error prone. Given the fact that the PASID space is 20 bits, which allows up to 1M processes to have a PASID associated concurrently, PASID resource exhaustion is not a realistic concern. Therefore, it was decided to simplify the approach and stick with lazy on demand PASID allocation, but drop the eager free approach and make an allocated PASID's lifetime bound to the lifetime of the process. Get rid of the refcounting mechanisms and replace/rename the interfaces to reflect this new approach. [ bp: Massage commit message. ] Suggested-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Lu Baolu Reviewed-by: Jacob Pan Reviewed-by: Thomas Gleixner Acked-by: Joerg Roedel Link: https://lore.kernel.org/r/20220207230254.3342514-6-fenghua.yu@intel.com Signed-off-by: Jun Yang --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 5 +-- drivers/iommu/intel/iommu.c | 4 +- drivers/iommu/intel/svm.c | 9 ----- drivers/iommu/ioasid.c | 39 ++----------------- drivers/iommu/iommu-sva-lib.c | 39 ++++++------------- drivers/iommu/iommu-sva-lib.h | 1 - include/linux/ioasid.h | 12 +----- include/linux/sched/mm.h | 16 ++++++++ kernel/fork.c | 1 + 9 files changed, 38 insertions(+), 88 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d4e1238347a6..a8e8fd9d7c8b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -351,15 +351,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { ret = PTR_ERR(bond->smmu_mn); - goto err_free_pasid; + goto err_free_bond; } list_add(&bond->list, &master->bonds); trace_smmu_bind_alloc(dev, mm->pasid); return &bond->sva; -err_free_pasid: - iommu_sva_free_pasid(mm); err_free_bond: kfree(bond); return ERR_PTR(ret); @@ -403,7 +401,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle) trace_smmu_unbind_free(handle->dev, bond->mm->pasid); list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); - iommu_sva_free_pasid(bond->mm); kfree(bond); } else { trace_smmu_unbind_put(handle->dev, bond->mm->pasid); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 89bb473fa11f..6622489e8fa4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5450,7 +5450,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain, link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); return ret; } @@ -5480,7 +5480,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (list_empty(&domain->subdevices) && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + ioasid_free(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 8825574dcef2..d886b494dddb 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -487,11 +487,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); } -static void intel_svm_free_pasid(struct mm_struct *mm) -{ - iommu_sva_free_pasid(mm); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, struct mm_struct *mm, @@ -641,8 +636,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } - /* Drop a PASID reference and free it if no reference. */ - intel_svm_free_pasid(mm); } out: return ret; @@ -1052,8 +1045,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void } sva = intel_svm_bind_mm(iommu, dev, mm, flags); - if (IS_ERR_OR_NULL(sva)) - intel_svm_free_pasid(mm); mutex_unlock(&pasid_mutex); return sva; diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 06fee7416816..a786c034907c 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -2,7 +2,7 @@ /* * I/O Address Space ID allocator. There is one global IOASID space, split into * subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and - * free IOASIDs with ioasid_alloc and ioasid_put. + * free IOASIDs with ioasid_alloc() and ioasid_free(). */ #include #include @@ -15,7 +15,6 @@ struct ioasid_data { struct ioasid_set *set; void *private; struct rcu_head rcu; - refcount_t refs; }; /* @@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, data->set = set; data->private = private; - refcount_set(&data->refs, 1); /* * Custom allocator needs allocator data to perform platform specific @@ -348,35 +346,11 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, EXPORT_SYMBOL_GPL(ioasid_alloc); /** - * ioasid_get - obtain a reference to the IOASID - * @ioasid: the ID to get - */ -void ioasid_get(ioasid_t ioasid) -{ - struct ioasid_data *ioasid_data; - - spin_lock(&ioasid_allocator_lock); - ioasid_data = xa_load(&active_allocator->xa, ioasid); - if (ioasid_data) - refcount_inc(&ioasid_data->refs); - else - WARN_ON(1); - spin_unlock(&ioasid_allocator_lock); -} -EXPORT_SYMBOL_GPL(ioasid_get); - -/** - * ioasid_put - Release a reference to an ioasid + * ioasid_free - Free an ioasid * @ioasid: the ID to remove - * - * Put a reference to the IOASID, free it when the number of references drops to - * zero. - * - * Return: %true if the IOASID was freed, %false otherwise. */ -bool ioasid_put(ioasid_t ioasid) +void ioasid_free(ioasid_t ioasid) { - bool free = false; struct ioasid_data *ioasid_data; spin_lock(&ioasid_allocator_lock); @@ -386,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid) goto exit_unlock; } - free = refcount_dec_and_test(&ioasid_data->refs); - if (!free) - goto exit_unlock; - active_allocator->ops->free(ioasid, active_allocator->ops->pdata); /* Custom allocator needs additional steps to free the xa element */ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) { @@ -399,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid) exit_unlock: spin_unlock(&ioasid_allocator_lock); - return free; } -EXPORT_SYMBOL_GPL(ioasid_put); +EXPORT_SYMBOL_GPL(ioasid_free); /** * ioasid_find - Find IOASID data diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c index bd41405d34e9..106506143896 100644 --- a/drivers/iommu/iommu-sva-lib.c +++ b/drivers/iommu/iommu-sva-lib.c @@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid); * * Try to allocate a PASID for this mm, or take a reference to the existing one * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid, and must be released with iommu_sva_free_pasid(). - * @min must be greater than 0, because 0 indicates an unused mm->pasid. + * available in mm->pasid and will be available for the lifetime of the mm. * * Returns 0 on success and < 0 on error. */ @@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) return -EINVAL; mutex_lock(&iommu_sva_lock); - if (mm->pasid) { - if (mm->pasid >= min && mm->pasid <= max) - ioasid_get(mm->pasid); - else + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) ret = -EOVERFLOW; - } else { - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (pasid == INVALID_IOASID) - ret = -ENOMEM; - else - mm->pasid = pasid; + goto out; } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: mutex_unlock(&iommu_sva_lock); return ret; } EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); -/** - * iommu_sva_free_pasid - Release the mm's PASID - * @mm: the mm - * - * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid() - */ -void iommu_sva_free_pasid(struct mm_struct *mm) -{ - mutex_lock(&iommu_sva_lock); - if (ioasid_put(mm->pasid)) - mm->pasid = 0; - mutex_unlock(&iommu_sva_lock); -} -EXPORT_SYMBOL_GPL(iommu_sva_free_pasid); - /* ioasid_find getter() requires a void * argument */ static bool __mmget_not_zero(void *mm) { diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h index 95dc3ebc1928..8909ea1094e3 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva-lib.h @@ -9,7 +9,6 @@ #include int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max); -void iommu_sva_free_pasid(struct mm_struct *mm); struct mm_struct *iommu_sva_find(ioasid_t pasid); /* I/O Page fault */ diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h index 2237f64dbaae..af1c9d62e642 100644 --- a/include/linux/ioasid.h +++ b/include/linux/ioasid.h @@ -34,8 +34,7 @@ struct ioasid_allocator_ops { #if IS_ENABLED(CONFIG_IOASID) ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max, void *private); -void ioasid_get(ioasid_t ioasid); -bool ioasid_put(ioasid_t ioasid); +void ioasid_free(ioasid_t ioasid); void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); @@ -53,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, return INVALID_IOASID; } -static inline void ioasid_get(ioasid_t ioasid) -{ -} - -static inline bool ioasid_put(ioasid_t ioasid) -{ - return false; -} +static inline void ioasid_free(ioasid_t ioasid) { } static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, bool (*getter)(void *)) diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index b697833b4362..88adf3a686bd 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -370,8 +370,24 @@ static inline void mm_pasid_init(struct mm_struct *mm) { mm->pasid = INVALID_IOASID; } + +/* Associate a PASID with an mm_struct: */ +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) +{ + mm->pasid = pasid; +} + +static inline void mm_pasid_drop(struct mm_struct *mm) +{ + if (pasid_valid(mm->pasid)) { + ioasid_free(mm->pasid); + mm->pasid = INVALID_IOASID; + } +} #else static inline void mm_pasid_init(struct mm_struct *mm) {} +static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} +static inline void mm_pasid_drop(struct mm_struct *mm) {} #endif #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 87a77c3061d4..422379890a74 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1098,6 +1098,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + mm_pasid_drop(mm); mmdrop(mm); } -- Gitee From d465ef82b172ce1ccb84008c8b7936b0d0200450 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 7 Feb 2022 15:02:50 -0800 Subject: [PATCH 44/48] sched: Define and initialize a flag to identify valid PASID in the task mainline inclusion from mainline-v5.18-rc1 commit b1b9cd41cfd58c81626dd3d7f15f1dad96b4a994 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit b1b9cd41cfd5 sched: Define and initialize a flag to identify valid PASID in the task Backport for intel SPR ENQCMD Instruction support -------------------------------- Add a new single bit field to the task structure to track whether this task has initialized the IA32_PASID MSR to the mm's PASID. Initialize the field to zero when creating a new task with fork/clone. Signed-off-by: Peter Zijlstra Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-8-fenghua.yu@intel.com Signed-off-by: Jun Yang --- include/linux/sched.h | 3 +++ kernel/fork.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4d..c763bd95a7ea 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -865,6 +865,9 @@ struct task_struct { /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif +#ifdef CONFIG_IOMMU_SVA + unsigned pasid_activated:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/fork.c b/kernel/fork.c index 422379890a74..5bfe124bcd7b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -943,6 +943,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->use_memdelay = 0; #endif +#ifdef CONFIG_IOMMU_SVA + tsk->pasid_activated = 0; +#endif + #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif -- Gitee From 33c2a017c2f4165a3a3b5ce8ad59ca50aa97dd4f Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:51 -0800 Subject: [PATCH 45/48] x86/traps: Demand-populate PASID MSR via #GP mainline inclusion from mainline-v5.18-rc1 commit 51ace79ac5d9544cdd9e1094bdebe260661d5704 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 51ace79ac5d9 x86/traps: Demand-populate PASID MSR via #GP Backport for intel SPR ENQCMD Instruction support -------------------------------- All tasks start with PASID state disabled. This means that the first time they execute an ENQCMD instruction they will take a #GP fault. Modify the #GP fault handler to check if the "mm" for the task has already been allocated a PASID. If so, try to fix the #GP fault by loading the IA32_PASID MSR. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-9-fenghua.yu@intel.com Signed-off-by: Jun Yang --- arch/x86/kernel/traps.c | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5da01819fb47..0778d5b6f265 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -558,6 +559,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -566,6 +618,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { -- Gitee From 49ae36b30a6a63f486700d168f416e1ee94e8841 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:52 -0800 Subject: [PATCH 46/48] x86/cpufeatures: Re-enable ENQCMD mainline inclusion from mainline-v5.18-rc1 commit d6e9c1ec98dc4b0aef6f78e3266913dfa1adad4f category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit d6e9c1ec98dc x86/cpufeatures: Re-enable ENQCMD Backport for intel SPR ENQCMD Instruction support -------------------------------- The ENQCMD feature can only be used if CONFIG_INTEL_IOMMU_SVM is set. Add X86_FEATURE_ENQCMD to the disabled features mask as appropriate so that cpu_feature_enabled() can be used to check the feature. [ bp: Massage commit message. ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-10-fenghua.yu@intel.com Signed-off-by: Jun Yang --- arch/x86/include/asm/disabled-features.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 018654e65c60..73aaf1e0cbb6 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 -- Gitee From e0a8cc85d195fb4b912f7a9ace95c2470b78c491 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:54 -0800 Subject: [PATCH 47/48] Documentation/x86: Update documentation for SVA (Shared Virtual Addressing) mainline inclusion from mainline-v5.18-rc1 commit 294b9a3dd092fb90c5439987c8df8b0c5dd21bc1 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit 294b9a3dd092 Documentation/x86: Update documentation for SVA (Shared Virtual Addressing) Backport for intel SPR ENQCMD Instruction support -------------------------------- Adjust the documentation to the new way how a PASID is being allocated, freed and fixed up. Based on a patch by Ashok Raj [ bp: Massage commit message, fix htmldocs build warning ] Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Thomas Gleixner Link: https://lore.kernel.org/r/20220207230254.3342514-12-fenghua.yu@intel.com Signed-off-by: Jun Yang --- Documentation/x86/sva.rst | 53 ++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst index 076efd51ef1f..2e9b8b0f9a0f 100644 --- a/Documentation/x86/sva.rst +++ b/Documentation/x86/sva.rst @@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application thread can interact with a device. Threads that belong to the same process share the same page tables, thus the same MSR value. -PASID is cleared when a process is created. The PASID allocation and MSR -programming may occur long after a process and its threads have been created. -One thread must call iommu_sva_bind_device() to allocate the PASID for the -process. If a thread uses ENQCMD without the MSR first being populated, a #GP -will be raised. The kernel will update the PASID MSR with the PASID for all -threads in the process. A single process PASID can be used simultaneously -with multiple devices since they all share the same address space. - -One thread can call iommu_sva_unbind_device() to free the allocated PASID. -The kernel will clear the PASID MSR for all threads belonging to the process. - -New threads inherit the MSR value from the parent. +PASID Life Cycle Management +=========================== + +PASID is initialized as INVALID_IOASID (-1) when a process is created. + +Only processes that access SVA-capable devices need to have a PASID +allocated. This allocation happens when a process opens/binds an SVA-capable +device but finds no PASID for this process. Subsequent binds of the same, or +other devices will share the same PASID. + +Although the PASID is allocated to the process by opening a device, +it is not active in any of the threads of that process. It's loaded to the +IA32_PASID MSR lazily when a thread tries to submit a work descriptor +to a device using the ENQCMD. + +That first access will trigger a #GP fault because the IA32_PASID MSR +has not been initialized with the PASID value assigned to the process +when the device was opened. The Linux #GP handler notes that a PASID has +been allocated for the process, and so initializes the IA32_PASID MSR +and returns so that the ENQCMD instruction is re-executed. + +On fork(2) or exec(2) the PASID is removed from the process as it no +longer has the same address space that it had when the device was opened. + +On clone(2) the new task shares the same address space, so will be +able to use the PASID allocated to the process. The IA32_PASID is not +preemptively initialized as the PASID value might not be allocated yet or +the kernel does not know whether this thread is going to access the device +and the cleared IA32_PASID MSR reduces context switch overhead by xstate +init optimization. Since #GP faults have to be handled on any threads that +were created before the PASID was assigned to the mm of the process, newly +created threads might as well be treated in a consistent way. + +Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in +all threads in unbind, free the PASID lazily only on mm exit. + +If a process does a close(2) of the device file descriptor and munmap(2) +of the device MMIO portal, then the driver will unbind the device. The +PASID is still marked VALID in the PASID_MSR for any threads in the +process that accessed the device. But this is harmless as without the +MMIO portal they cannot submit new work to the device. Relationships ============= -- Gitee From 2b6731456c7e2b30f5c02b19da54122236adfdc2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Mon, 7 Feb 2022 15:02:53 -0800 Subject: [PATCH 48/48] tools/objtool: Check for use of the ENQCMD instruction in the kernel mainline inclusion from mainline-v5.18-rc1 commit dcef0d39f0a0f6918628ba7de480af4431b93dc6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I590PB CVE: NA Intel-SIG: commit dcef0d39f0a0 tools/objtool: Check for use of the ENQCMD instruction in the kernel Backport for intel SPR ENQCMD Instruction support -------------------------------- The ENQCMD instruction implicitly accesses the PASID_MSR to fill in the pasid field of the descriptor being submitted to an accelerator. But there is no precise (and stable across kernel changes) point at which the PASID_MSR is updated from the value for one task to the next. Kernel code that uses accelerators must always use the ENQCMDS instruction which does not access the PASID_MSR. Check for use of the ENQCMD instruction in the kernel and warn on its usage. Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220207230254.3342514-11-fenghua.yu@intel.com Signed-off-by: Peter Zijlstra Signed-off-by: Jun Yang --- tools/objtool/arch/x86/decode.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae..dac78f7c8ad0 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -91,7 +91,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, + unsigned char op1, op2, op3, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0; struct stack_op *op = NULL; @@ -117,6 +117,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; + op3 = insn.opcode.bytes[2]; if (insn.rex_prefix.nbytes) { rex = insn.rex_prefix.bytes[0]; @@ -423,6 +424,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x38 && op3 == 0xf8) { + if (insn.prefixes.nbytes == 1 && + insn.prefixes.bytes[0] == 0xf2) { + /* ENQCMD cannot be used in the kernel. */ + WARN("ENQCMD instruction at %s:%lx", sec->name, + offset); + } + } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ -- Gitee