diff --git a/Documentation/ABI/testing/configfs-tsm b/Documentation/ABI/testing/configfs-tsm new file mode 100644 index 0000000000000000000000000000000000000000..dd24202b5ba52ba9cc7034fca8897eee0be5a997 --- /dev/null +++ b/Documentation/ABI/testing/configfs-tsm @@ -0,0 +1,82 @@ +What: /sys/kernel/config/tsm/report/$name/inblob +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Up to 64 bytes of user specified binary data. For replay + protection this should include a nonce, but the kernel does not + place any restrictions on the content. + +What: /sys/kernel/config/tsm/report/$name/outblob +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Binary attestation report generated from @inblob and other + options The format of the report is implementation specific + where the implementation is conveyed via the @provider + attribute. + +What: /sys/kernel/config/tsm/report/$name/auxblob +Date: October, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Optional supplemental data that a TSM may emit, visibility + of this attribute depends on TSM, and may be empty if no + auxiliary data is available. + + When @provider is "sev_guest" this file contains the + "cert_table" from SEV-ES Guest-Hypervisor Communication Block + Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ. + https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf + +What: /sys/kernel/config/tsm/report/$name/provider +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) A name for the format-specification of @outblob like + "sev_guest" [1] or "tdx_guest" [2] in the near term, or a + common standard format in the future. + + [1]: SEV Secure Nested Paging Firmware ABI Specification + Revision 1.55 Table 22 + https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56860.pdf + + [2]: IntelĀ® Trust Domain Extensions Data Center Attestation + Primitives : Quote Generation Library and Quote Verification + Library Revision 0.8 Appendix 4,5 + https://download.01.org/intel-sgx/latest/dcap-latest/linux/docs/Intel_TDX_DCAP_Quoting_Library_API.pdf + +What: /sys/kernel/config/tsm/report/$name/generation +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) The value in this attribute increments each time @inblob or + any option is written. Userspace can detect conflicts by + checking generation before writing to any attribute and making + sure the number of writes matches expectations after reading + @outblob, or it can prevent conflicts by creating a report + instance per requesting context. + +What: /sys/kernel/config/tsm/report/$name/privlevel +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports for TVMs running at + different privilege levels, like SEV-SNP "VMPL", specify the + privilege level via this attribute. The minimum acceptable + value is conveyed via @privlevel_floor and the maximum + acceptable value is TSM_PRIVLEVEL_MAX (3). + +What: /sys/kernel/config/tsm/report/$name/privlevel_floor +Date: September, 2023 +KernelVersion: v6.7 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Indicates the minimum permissible value that can be written + to @privlevel. diff --git a/Documentation/arch/arm64/arm-cca.rst b/Documentation/arch/arm64/arm-cca.rst new file mode 100644 index 0000000000000000000000000000000000000000..c48b7d4ab6bde4833c427056cfcadb79f03b6c26 --- /dev/null +++ b/Documentation/arch/arm64/arm-cca.rst @@ -0,0 +1,69 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +Arm Confidential Compute Architecture +===================================== + +Arm systems that support the Realm Management Extension (RME) contain +hardware to allow a VM guest to be run in a way which protects the code +and data of the guest from the hypervisor. It extends the older "two +world" model (Normal and Secure World) into four worlds: Normal, Secure, +Root and Realm. Linux can then also be run as a guest to a monitor +running in the Realm world. + +The monitor running in the Realm world is known as the Realm Management +Monitor (RMM) and implements the Realm Management Monitor +specification[1]. The monitor acts a bit like a hypervisor (e.g. it runs +in EL2 and manages the stage 2 page tables etc of the guests running in +Realm world), however much of the control is handled by a hypervisor +running in the Normal World. The Normal World hypervisor uses the Realm +Management Interface (RMI) defined by the RMM specification to request +the RMM to perform operations (e.g. mapping memory or executing a vCPU). + +The RMM defines an environment for guests where the address space (IPA) +is split into two. The lower half is protected - any memory that is +mapped in this half cannot be seen by the Normal World and the RMM +restricts what operations the Normal World can perform on this memory +(e.g. the Normal World cannot replace pages in this region without the +guest's cooperation). The upper half is shared, the Normal World is free +to make changes to the pages in this region, and is able to emulate MMIO +devices in this region too. + +A guest running in a Realm may also communicate with the RMM using the +Realm Services Interface (RSI) to request changes in its environment or +to perform attestation about its environment. In particular it may +request that areas of the protected address space are transitioned +between 'RAM' and 'EMPTY' (in either direction). This allows a Realm +guest to give up memory to be returned to the Normal World, or to +request new memory from the Normal World. Without an explicit request +from the Realm guest the RMM will otherwise prevent the Normal World +from making these changes. + +Linux as a Realm Guest +---------------------- + +To run Linux as a guest within a Realm, the following must be provided +either by the VMM or by a `boot loader` run in the Realm before Linux: + + * All protected RAM described to Linux (by DT or ACPI) must be marked + RIPAS RAM before handing control over to Linux. + + * MMIO devices must be either unprotected (e.g. emulated by the Normal + World) or marked RIPAS DEV. + + * MMIO devices emulated by the Normal World and used very early in boot + (specifically earlycon) must be specified in the upper half of IPA. + For earlycon this can be done by specifying the address on the + command line, e.g. with an IPA size of 33 bits and the base address + of the emulated UART at 0x1000000: ``earlycon=uart,mmio,0x101000000`` + + * Linux will use bounce buffers for communicating with unprotected + devices. It will transition some protected memory to RIPAS EMPTY and + expect to be able to access unprotected pages at the same IPA address + but with the highest valid IPA bit set. The expectation is that the + VMM will remove the physical pages from the protected mapping and + provide those pages as unprotected pages. + +References +---------- +[1] https://developer.arm.com/documentation/den0137/ diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index b540e0933ddebc90d6f7d82e3fbe90ba05c3d4d6..30d00b890b18cb8072077854485f2d1158834968 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -41,6 +41,9 @@ to automatically locate and size all RAM, or it may use knowledge of the RAM in the machine, or any other method the boot loader designer sees fit.) +For Arm Confidential Compute Realms this includes ensuring that all +protected RAM has a Realm IPA state (RIPAS) of "RAM". + 2. Setup the device tree ------------------------- diff --git a/Documentation/arch/arm64/index.rst b/Documentation/arch/arm64/index.rst index 78544de0a8a9e8b833c1fca80439999247137244..12c243c3af204cced7e364751e7f13d7d9ff5f2f 100644 --- a/Documentation/arch/arm64/index.rst +++ b/Documentation/arch/arm64/index.rst @@ -10,6 +10,7 @@ ARM64 Architecture acpi_object_usage amu arm-acpi + arm-cca asymmetric-32bit booting cpu-feature-registers diff --git a/MAINTAINERS b/MAINTAINERS index 28b2a80f6a8c0fdbbd9b4fe89555bc0b6d7534b4..5e5c5c17b5e57026da4310c25337cecb37eb9b05 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22036,6 +22036,14 @@ W: https://github.com/srcres258/linux-doc T: git git://github.com/srcres258/linux-doc.git doc-zh-tw F: Documentation/translations/zh_TW/ +TRUSTED SECURITY MODULE (TSM) ATTESTATION REPORTS +M: Dan Williams +L: linux-coco@lists.linux.dev +S: Maintained +F: Documentation/ABI/testing/configfs-tsm +F: drivers/virt/coco/tsm.c +F: include/linux/tsm.h + TTY LAYER AND SERIAL DRIVERS M: Greg Kroah-Hartman M: Jiri Slaby diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d9def7a9c023f14e67a254b7f9a56d08edf2dd0..5422d1502fd6f2817c80d962fe872708f053a401 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,6 +20,7 @@ config ARM64 select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CC_PLATFORM select ARCH_HAS_COPY_MC if ACPI_APEI_GHES select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL @@ -33,6 +34,7 @@ config ARM64 select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_NONLEAF_PMD_YOUNG if ARM64_HAFT @@ -44,6 +46,8 @@ config ARM64 select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY + select ARCH_HAS_MEM_ENCRYPT + select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_STACKWALK select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 3b694511b98f833722a089e5e34d5cb0c4b273db..e9d6bcfddf35c368e015de3eb9689c264b24fed4 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * Generic IO read/write. These perform native-endian accesses. @@ -139,6 +140,10 @@ extern void __memset_io(volatile void __iomem *, int, size_t); * I/O memory mapping functions. */ +typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size, + pgprot_t *prot); +int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook); + #define ioremap_prot ioremap_prot #define _PAGE_IOREMAP PROT_DEVICE_nGnRE @@ -182,4 +187,11 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, unsigned long flags); #define arch_memremap_can_ram_remap arch_memremap_can_ram_remap +static inline bool arm64_is_protected_mmio(phys_addr_t phys_addr, size_t size) +{ + if (unlikely(is_realm_world())) + return arm64_rsi_is_protected(phys_addr, size); + return false; +} + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h new file mode 100644 index 0000000000000000000000000000000000000000..f8f78f622dd2c92b7d05910a1a2236fc8d560a29 --- /dev/null +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_MEM_ENCRYPT_H +#define __ASM_MEM_ENCRYPT_H + +#include + +struct arm64_mem_crypt_ops { + int (*encrypt)(unsigned long addr, int numpages); + int (*decrypt)(unsigned long addr, int numpages); +}; + +int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops); + +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + +int realm_register_memory_enc_ops(void); + +static inline bool force_dma_unencrypted(struct device *dev) +{ + return is_realm_world(); +} + +#endif /* __ASM_MEM_ENCRYPT_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index eed814b00a389755e050afa4c1d6eae908a42d4d..b8646e82db8777171a2d890886e3e25ebc45fc6f 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -65,8 +65,12 @@ #include #include +#include extern bool arm64_use_ng_mappings; +extern unsigned long prot_ns_shared; + +#define PROT_NS_SHARED (is_realm_world() ? prot_ns_shared : 0) #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b7f025ba7f72bb75e70a13128b9bac61db4c7fb5..626e43967e0a310a305739c900bce2813e48b8eb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -568,6 +568,11 @@ static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, #define pgprot_nx(prot) \ __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) +#define pgprot_decrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, PROT_NS_SHARED) +#define pgprot_encrypted(prot) \ + __pgprot_modify(prot, PROT_NS_SHARED, 0) + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h new file mode 100644 index 0000000000000000000000000000000000000000..5f9c8623183dddf6c96be5dbe9fa1c5e7136adaa --- /dev/null +++ b/arch/arm64/include/asm/rsi.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 ARM Ltd. + */ + +#ifndef __ASM_RSI_H_ +#define __ASM_RSI_H_ + +#include +#include +#include + +DECLARE_STATIC_KEY_FALSE(rsi_present); + +void __init arm64_rsi_init(void); + +bool arm64_rsi_is_protected(phys_addr_t base, size_t size); + +static inline bool is_realm_world(void) +{ + return static_branch_unlikely(&rsi_present); +} + +static inline int rsi_set_memory_range(phys_addr_t start, phys_addr_t end, + enum ripas state, unsigned long flags) +{ + unsigned long ret; + phys_addr_t top; + + while (start != end) { + ret = rsi_set_addr_range_state(start, end, state, flags, &top); + if (ret || top < start || top > end) + return -EINVAL; + start = top; + } + + return 0; +} + +/* + * Convert the specified range to RAM. Do not use this if you rely on the + * contents of a page that may already be in RAM state. + */ +static inline int rsi_set_memory_range_protected(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_CHANGE_DESTROYED); +} + +/* + * Convert the specified range to RAM. Do not convert any pages that may have + * been DESTROYED, without our permission. + */ +static inline int rsi_set_memory_range_protected_safe(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_RAM, + RSI_NO_CHANGE_DESTROYED); +} + +static inline int rsi_set_memory_range_shared(phys_addr_t start, + phys_addr_t end) +{ + return rsi_set_memory_range(start, end, RSI_RIPAS_EMPTY, + RSI_CHANGE_DESTROYED); +} +#endif /* __ASM_RSI_H_ */ diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h new file mode 100644 index 0000000000000000000000000000000000000000..e6a211001bd38edbb8fa3922b17ec59e6d12bbc4 --- /dev/null +++ b/arch/arm64/include/asm/rsi_cmds.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_CMDS_H +#define __ASM_RSI_CMDS_H + +#include + +#include + +#define RSI_GRANULE_SHIFT 12 +#define RSI_GRANULE_SIZE (_AC(1, UL) << RSI_GRANULE_SHIFT) + +enum ripas { + RSI_RIPAS_EMPTY = 0, + RSI_RIPAS_RAM = 1, + RSI_RIPAS_DESTROYED = 2, + RSI_RIPAS_DEV = 3, +}; + +static inline unsigned long rsi_request_version(unsigned long req, + unsigned long *out_lower, + unsigned long *out_higher) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_ABI_VERSION, req, 0, 0, 0, 0, 0, 0, &res); + + if (out_lower) + *out_lower = res.a1; + if (out_higher) + *out_higher = res.a2; + + return res.a0; +} + +static inline unsigned long rsi_get_realm_config(struct realm_config *cfg) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_REALM_CONFIG, virt_to_phys(cfg), + 0, 0, 0, 0, 0, 0, &res); + return res.a0; +} + +static inline unsigned long rsi_ipa_state_get(phys_addr_t start, + phys_addr_t end, + enum ripas *state, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_GET, + start, end, 0, 0, 0, 0, 0, + &res); + + if (res.a0 == RSI_SUCCESS) { + if (top) + *top = res.a1; + if (state) + *state = res.a2; + } + + return res.a0; +} + +static inline long rsi_set_addr_range_state(phys_addr_t start, + phys_addr_t end, + enum ripas state, + unsigned long flags, + phys_addr_t *top) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_RSI_IPA_STATE_SET, start, end, state, + flags, 0, 0, 0, &res); + + if (top) + *top = res.a1; + + if (res.a2 != RSI_ACCEPT) + return -EPERM; + + return res.a0; +} + +/** + * rsi_attestation_token_init - Initialise the operation to retrieve an + * attestation token. + * + * @challenge: The challenge data to be used in the attestation token + * generation. + * @size: Size of the challenge data in bytes. + * + * Initialises the attestation token generation and returns an upper bound + * on the attestation token size that can be used to allocate an adequate + * buffer. The caller is expected to subsequently call + * rsi_attestation_token_continue() to retrieve the attestation token data on + * the same CPU. + * + * Returns: + * On success, returns the upper limit of the attestation report size. + * Otherwise, -EINVAL + */ +static inline long +rsi_attestation_token_init(const u8 *challenge, unsigned long size) +{ + struct arm_smccc_1_2_regs regs = { 0 }; + + /* The challenge must be at least 32bytes and at most 64bytes */ + if (!challenge || size < 32 || size > 64) + return -EINVAL; + + regs.a0 = SMC_RSI_ATTESTATION_TOKEN_INIT; + memcpy(®s.a1, challenge, size); + arm_smccc_1_2_smc(®s, ®s); + + if (regs.a0 == RSI_SUCCESS) + return regs.a1; + + return -EINVAL; +} + +/** + * rsi_attestation_token_continue - Continue the operation to retrieve an + * attestation token. + * + * @granule: {I}PA of the Granule to which the token will be written. + * @offset: Offset within Granule to start of buffer in bytes. + * @size: The size of the buffer. + * @len: The number of bytes written to the buffer. + * + * Retrieves up to a RSI_GRANULE_SIZE worth of token data per call. The caller + * is expected to call rsi_attestation_token_init() before calling this + * function to retrieve the attestation token. + * + * Return: + * * %RSI_SUCCESS - Attestation token retrieved successfully. + * * %RSI_INCOMPLETE - Token generation is not complete. + * * %RSI_ERROR_INPUT - A parameter was not valid. + * * %RSI_ERROR_STATE - Attestation not in progress. + */ +static inline unsigned long rsi_attestation_token_continue(phys_addr_t granule, + unsigned long offset, + unsigned long size, + unsigned long *len) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(SMC_RSI_ATTESTATION_TOKEN_CONTINUE, + granule, offset, size, 0, &res); + + if (len) + *len = res.a1; + return res.a0; +} + +#endif /* __ASM_RSI_CMDS_H */ diff --git a/arch/arm64/include/asm/rsi_smc.h b/arch/arm64/include/asm/rsi_smc.h new file mode 100644 index 0000000000000000000000000000000000000000..6cb070eca9e9b2b7650832c96aab3795a558058e --- /dev/null +++ b/arch/arm64/include/asm/rsi_smc.h @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#ifndef __ASM_RSI_SMC_H_ +#define __ASM_RSI_SMC_H_ + +#include + +/* + * This file describes the Realm Services Interface (RSI) Application Binary + * Interface (ABI) for SMC calls made from within the Realm to the RMM and + * serviced by the RMM. + */ + +/* + * The major version number of the RSI implementation. This is increased when + * the binary format or semantics of the SMC calls change. + */ +#define RSI_ABI_VERSION_MAJOR UL(1) + +/* + * The minor version number of the RSI implementation. This is increased when + * a bug is fixed, or a feature is added without breaking binary compatibility. + */ +#define RSI_ABI_VERSION_MINOR UL(0) + +#define RSI_ABI_VERSION ((RSI_ABI_VERSION_MAJOR << 16) | \ + RSI_ABI_VERSION_MINOR) + +#define RSI_ABI_VERSION_GET_MAJOR(_version) ((_version) >> 16) +#define RSI_ABI_VERSION_GET_MINOR(_version) ((_version) & 0xFFFF) + +#define RSI_SUCCESS UL(0) +#define RSI_ERROR_INPUT UL(1) +#define RSI_ERROR_STATE UL(2) +#define RSI_INCOMPLETE UL(3) +#define RSI_ERROR_UNKNOWN UL(4) + +#define SMC_RSI_FID(n) ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + n) + +/* + * Returns RSI version. + * + * arg1 == Requested interface revision + * ret0 == Status / error + * ret1 == Lower implemented interface revision + * ret2 == Higher implemented interface revision + */ +#define SMC_RSI_ABI_VERSION SMC_RSI_FID(0x190) + +/* + * Read feature register. + * + * arg1 == Feature register index + * ret0 == Status / error + * ret1 == Feature register value + */ +#define SMC_RSI_FEATURES SMC_RSI_FID(0x191) + +/* + * Read measurement for the current Realm. + * + * arg1 == Index, which measurements slot to read + * ret0 == Status / error + * ret1 == Measurement value, bytes: 0 - 7 + * ret2 == Measurement value, bytes: 8 - 15 + * ret3 == Measurement value, bytes: 16 - 23 + * ret4 == Measurement value, bytes: 24 - 31 + * ret5 == Measurement value, bytes: 32 - 39 + * ret6 == Measurement value, bytes: 40 - 47 + * ret7 == Measurement value, bytes: 48 - 55 + * ret8 == Measurement value, bytes: 56 - 63 + */ +#define SMC_RSI_MEASUREMENT_READ SMC_RSI_FID(0x192) + +/* + * Extend Realm Extensible Measurement (REM) value. + * + * arg1 == Index, which measurements slot to extend + * arg2 == Size of realm measurement in bytes, max 64 bytes + * arg3 == Measurement value, bytes: 0 - 7 + * arg4 == Measurement value, bytes: 8 - 15 + * arg5 == Measurement value, bytes: 16 - 23 + * arg6 == Measurement value, bytes: 24 - 31 + * arg7 == Measurement value, bytes: 32 - 39 + * arg8 == Measurement value, bytes: 40 - 47 + * arg9 == Measurement value, bytes: 48 - 55 + * arg10 == Measurement value, bytes: 56 - 63 + * ret0 == Status / error + */ +#define SMC_RSI_MEASUREMENT_EXTEND SMC_RSI_FID(0x193) + +/* + * Initialize the operation to retrieve an attestation token. + * + * arg1 == Challenge value, bytes: 0 - 7 + * arg2 == Challenge value, bytes: 8 - 15 + * arg3 == Challenge value, bytes: 16 - 23 + * arg4 == Challenge value, bytes: 24 - 31 + * arg5 == Challenge value, bytes: 32 - 39 + * arg6 == Challenge value, bytes: 40 - 47 + * arg7 == Challenge value, bytes: 48 - 55 + * arg8 == Challenge value, bytes: 56 - 63 + * ret0 == Status / error + * ret1 == Upper bound of token size in bytes + */ +#define SMC_RSI_ATTESTATION_TOKEN_INIT SMC_RSI_FID(0x194) + +/* + * Continue the operation to retrieve an attestation token. + * + * arg1 == The IPA of token buffer + * arg2 == Offset within the granule of the token buffer + * arg3 == Size of the granule buffer + * ret0 == Status / error + * ret1 == Length of token bytes copied to the granule buffer + */ +#define SMC_RSI_ATTESTATION_TOKEN_CONTINUE SMC_RSI_FID(0x195) + +#ifndef __ASSEMBLY__ + +struct realm_config { + union { + struct { + unsigned long ipa_bits; /* Width of IPA in bits */ + unsigned long hash_algo; /* Hash algorithm */ + }; + u8 pad[0x200]; + }; + union { + u8 rpv[64]; /* Realm Personalization Value */ + u8 pad2[0xe00]; + }; + /* + * The RMM requires the configuration structure to be aligned to a 4k + * boundary, ensure this happens by aligning this structure. + */ +} __aligned(0x1000); + +#endif /* __ASSEMBLY__ */ + +/* + * Read configuration for the current Realm. + * + * arg1 == struct realm_config addr + * ret0 == Status / error + */ +#define SMC_RSI_REALM_CONFIG SMC_RSI_FID(0x196) + +/* + * Request RIPAS of a target IPA range to be changed to a specified value. + * + * arg1 == Base IPA address of target region + * arg2 == Top of the region + * arg3 == RIPAS value + * arg4 == flags + * ret0 == Status / error + * ret1 == Top of modified IPA range + * ret2 == Whether the Host accepted or rejected the request + */ +#define SMC_RSI_IPA_STATE_SET SMC_RSI_FID(0x197) + +#define RSI_NO_CHANGE_DESTROYED UL(0) +#define RSI_CHANGE_DESTROYED UL(1) + +#define RSI_ACCEPT UL(0) +#define RSI_REJECT UL(1) + +/* + * Get RIPAS of a target IPA range. + * + * arg1 == Base IPA of target region + * arg2 == End of target IPA region + * ret0 == Status / error + * ret1 == Top of IPA region which has the reported RIPAS value + * ret2 == RIPAS value + */ +#define SMC_RSI_IPA_STATE_GET SMC_RSI_FID(0x198) + +/* + * Make a Host call. + * + * arg1 == IPA of host call structure + * ret0 == Status / error + */ +#define SMC_RSI_HOST_CALL SMC_RSI_FID(0x199) + +#endif /* __ASM_RSI_SMC_H_ */ diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h index 2031b31c05f911f9f6a7c0ef8615a82695e57a9d..c859602c9af72114c6b9a7dd7f8d70b714112e14 100644 --- a/arch/arm64/include/asm/set_memory.h +++ b/arch/arm64/include/asm/set_memory.h @@ -3,6 +3,7 @@ #ifndef _ASM_ARM64_SET_MEMORY_H #define _ASM_ARM64_SET_MEMORY_H +#include #include #include @@ -15,4 +16,7 @@ int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + #endif /* _ASM_ARM64_SET_MEMORY_H */ diff --git a/arch/arm64/include/asm/virtcca_cvm_guest.h b/arch/arm64/include/asm/virtcca_cvm_guest.h index c1ac1a9cd5370ced547f6e9512b3679a890f3715..a074bd65dab92706e4a499b8d5af38c17f324288 100644 --- a/arch/arm64/include/asm/virtcca_cvm_guest.h +++ b/arch/arm64/include/asm/virtcca_cvm_guest.h @@ -20,6 +20,13 @@ extern void virtcca_cvm_tsi_init(void); extern void swiotlb_unmap_notify(unsigned long paddr, unsigned long size); +extern void virtcca_its_init(void); + +extern struct page *virtcca_its_alloc_shared_pages_node(int node, gfp_t gfp, + unsigned int order); + +extern void virtcca_its_free_shared_pages(void *addr, int order); + #else static inline int set_cvm_memory_encrypted(unsigned long addr, int numpages) @@ -42,5 +49,6 @@ static inline void __init swiotlb_cvm_update_mem_attributes(void) {} static inline void virtcca_cvm_tsi_init(void) {} static inline void swiotlb_unmap_notify(unsigned long paddr, unsigned long size) {} + #endif /* CONFIG_HISI_VIRTCCA_GUEST */ #endif /* __VIRTCCA_CVM_GUEST_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4ce58887302a13f7a0995980623b64eb23b2047b..3d404a2cc961ae52657da727e3123d679061eea5 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -34,7 +34,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ syscall.o proton-pack.o idreg-override.o idle.o \ - patching.o + patching.o rsi.o obj-$(CONFIG_AARCH32_EL0) += binfmt_elf32.o sys32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 182a89565dc961fa530c9739832f6623b99256ab..e6f99d9f87f10a6eaaf44c3ec5a1353cb5d3d7ba 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -33,8 +33,16 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) u64 attr = md->attribute; u32 type = md->type; - if (type == EFI_MEMORY_MAPPED_IO) - return PROT_DEVICE_nGnRE; + if (type == EFI_MEMORY_MAPPED_IO) { + pgprot_t prot = __pgprot(PROT_DEVICE_nGnRE); + + if (arm64_is_protected_mmio(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT)) + prot = pgprot_encrypted(prot); + else + prot = pgprot_decrypted(prot); + return pgprot_val(prot); + } if (region_is_misaligned(md)) { static bool __initdata code_is_misaligned; diff --git a/arch/arm64/kernel/rsi.c b/arch/arm64/kernel/rsi.c new file mode 100644 index 0000000000000000000000000000000000000000..be868cb5a4b45cef31119826dbf9ca79645ca33f --- /dev/null +++ b/arch/arm64/kernel/rsi.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct realm_config config; + +unsigned long prot_ns_shared; +EXPORT_SYMBOL(prot_ns_shared); + +DEFINE_STATIC_KEY_FALSE_RO(rsi_present); +EXPORT_SYMBOL(rsi_present); + +bool cc_platform_has(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_MEM_ENCRYPT: + return is_realm_world(); + default: + return false; + } +} +EXPORT_SYMBOL_GPL(cc_platform_has); + +static bool rsi_version_matches(void) +{ + unsigned long ver_lower, ver_higher; + unsigned long ret = rsi_request_version(RSI_ABI_VERSION, + &ver_lower, + &ver_higher); + + if (ret == SMCCC_RET_NOT_SUPPORTED) + return false; + + if (ret != RSI_SUCCESS) { + pr_err("RME: RMM doesn't support RSI version %lu.%lu. Supported range: %lu.%lu-%lu.%lu\n", + RSI_ABI_VERSION_MAJOR, RSI_ABI_VERSION_MINOR, + RSI_ABI_VERSION_GET_MAJOR(ver_lower), + RSI_ABI_VERSION_GET_MINOR(ver_lower), + RSI_ABI_VERSION_GET_MAJOR(ver_higher), + RSI_ABI_VERSION_GET_MINOR(ver_higher)); + return false; + } + + pr_info("RME: Using RSI version %lu.%lu\n", + RSI_ABI_VERSION_GET_MAJOR(ver_lower), + RSI_ABI_VERSION_GET_MINOR(ver_lower)); + + return true; +} + +static void __init arm64_rsi_setup_memory(void) +{ + u64 i; + phys_addr_t start, end; + + /* + * Iterate over the available memory ranges and convert the state to + * protected memory. We should take extra care to ensure that we DO NOT + * permit any "DESTROYED" pages to be converted to "RAM". + * + * panic() is used because if the attempt to switch the memory to + * protected has failed here, then future accesses to the memory are + * simply going to be reflected as a SEA (Synchronous External Abort) + * which we can't handle. Bailing out early prevents the guest limping + * on and dying later. + */ + for_each_mem_range(i, &start, &end) { + if (rsi_set_memory_range_protected_safe(start, end)) { + panic("Failed to set memory range to protected: %pa-%pa", + &start, &end); + } + } +} + +/* + * Check if a given PA range is Trusted (e.g., Protected memory, a Trusted Device + * mapping, or an MMIO emulated in the Realm world). + * + * We can rely on the RIPAS value of the region to detect if a given region is + * protected. + * + * RIPAS_DEV - A trusted device memory or a trusted emulated MMIO (in the Realm + * world + * RIPAS_RAM - Memory (RAM), protected by the RMM guarantees. (e.g., Firmware + * reserved regions for data sharing). + * + * RIPAS_DESTROYED is a special case of one of the above, where the host did + * something without our permission and as such we can't do anything about it. + * + * The only case where something is emulated by the untrusted hypervisor or is + * backed by shared memory is indicated by RSI_RIPAS_EMPTY. + */ +bool arm64_rsi_is_protected(phys_addr_t base, size_t size) +{ + enum ripas ripas; + phys_addr_t end, top; + + /* Overflow ? */ + if (WARN_ON(base + size <= base)) + return false; + + end = ALIGN(base + size, RSI_GRANULE_SIZE); + base = ALIGN_DOWN(base, RSI_GRANULE_SIZE); + + while (base < end) { + if (WARN_ON(rsi_ipa_state_get(base, end, &ripas, &top))) + break; + if (WARN_ON(top <= base)) + break; + if (ripas == RSI_RIPAS_EMPTY) + break; + base = top; + } + + return base >= end; +} +EXPORT_SYMBOL(arm64_rsi_is_protected); + +static int realm_ioremap_hook(phys_addr_t phys, size_t size, pgprot_t *prot) +{ + if (arm64_rsi_is_protected(phys, size)) + *prot = pgprot_encrypted(*prot); + else + *prot = pgprot_decrypted(*prot); + + return 0; +} + +void __init arm64_rsi_init(void) +{ + if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_SMC) + return; + if (!rsi_version_matches()) + return; + if (!can_set_direct_map()) { + pr_err("RME: Cannot set the kernel direct map, consider CONFIG_RODATA_FULL_DEFAULT_ENABLED=y or rodata=full\n"); + return; + } + if (WARN_ON(rsi_get_realm_config(&config))) + return; + prot_ns_shared = BIT(config.ipa_bits - 1); + + if (arm64_ioremap_prot_hook_register(realm_ioremap_hook)) + return; + + if (realm_register_memory_enc_ops()) + return; + + arm64_rsi_setup_memory(); + + static_branch_enable(&rsi_present); +} + diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7099c69a2c7f829c60623f73cf32faf4a720c6ac..09a15a9a8b2b58184ca7a2c394ec758f0f62679d 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -420,6 +421,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) else psci_acpi_init(); + arm64_rsi_init(); + init_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); diff --git a/arch/arm64/kernel/virtcca_cvm_guest.c b/arch/arm64/kernel/virtcca_cvm_guest.c index e5368d3e1bfeea6a4f8fb03a8a43ec82fe005a9e..da2c4a17f8374a9d743dfd62f961be88dad09c94 100644 --- a/arch/arm64/kernel/virtcca_cvm_guest.c +++ b/arch/arm64/kernel/virtcca_cvm_guest.c @@ -154,3 +154,27 @@ void swiotlb_unmap_notify(unsigned long paddr, unsigned long size) arm_smccc_1_1_smc(SMC_TSI_SEC_MEM_UNMAP, paddr, size, &res); } + +static struct device cvm_alloc_device; + +void __init virtcca_its_init(void) +{ + if (is_virtcca_cvm_world()) { + device_initialize(&cvm_alloc_device); + enable_swiotlb_for_cvm_dev(&cvm_alloc_device, true); + } +} + +struct page *virtcca_its_alloc_shared_pages_node(int node, gfp_t gfp, + unsigned int order) +{ + return swiotlb_alloc(&cvm_alloc_device, (1 << order) * PAGE_SIZE); +} + +void virtcca_its_free_shared_pages(void *addr, int order) +{ + if (order < 0) + return; + + swiotlb_free(&cvm_alloc_device, (struct page *)addr, (1 << order) * PAGE_SIZE); +} diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 60454256945b8b3ff47bdb609b51b3d5ad9b0572..2fc8c6dd04070b61078e2e8627fd3acc606adc7e 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ - ioremap.o mmap.o pgd.o mmu.o \ + ioremap.o mmap.o pgd.o mem_encrypt.o mmu.o \ context.o proc.o pageattr.o fixmap.o obj-$(CONFIG_ARM64_CONTPTE) += contpte.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66a7fff9f3736f7c3e0fa7cccd1369fcf60e6257..1c02974de059a521e6a1c86204814936462df751 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -604,12 +605,19 @@ void __init bootmem_init(void) */ void __init mem_init(void) { + unsigned int flags = SWIOTLB_VERBOSE; bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); + if (is_realm_world()) { + swiotlb = true; + flags |= SWIOTLB_FORCE; + } + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC)) swiotlb = true; - swiotlb_init(swiotlb, SWIOTLB_VERBOSE); + swiotlb_init(swiotlb, flags); + swiotlb_update_mem_attributes(); swiotlb_cvm_update_mem_attributes(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 269f2f63ab7dc4c1d3dccf3f6831ea47abdb3477..6cc0b7e7eb0384b991dd59ce18ea2a476cb20958 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -3,10 +3,22 @@ #include #include +static ioremap_prot_hook_t ioremap_prot_hook; + +int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook) +{ + if (WARN_ON(ioremap_prot_hook)) + return -EBUSY; + + ioremap_prot_hook = hook; + return 0; +} + void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, unsigned long prot) { unsigned long last_addr = phys_addr + size - 1; + pgprot_t pgprot = __pgprot(prot); /* Don't allow outside PHYS_MASK */ if (last_addr & ~PHYS_MASK) @@ -16,7 +28,16 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr)))) return NULL; - return generic_ioremap_prot(phys_addr, size, __pgprot(prot)); + /* + * If a hook is registered (e.g. for confidential computing + * purposes), call that now and barf if it fails. + */ + if (unlikely(ioremap_prot_hook) && + WARN_ON(ioremap_prot_hook(phys_addr, size, &pgprot))) { + return NULL; + } + + return generic_ioremap_prot(phys_addr, size, pgprot); } EXPORT_SYMBOL(ioremap_prot); diff --git a/arch/arm64/mm/mem_encrypt.c b/arch/arm64/mm/mem_encrypt.c new file mode 100644 index 0000000000000000000000000000000000000000..ee3c0ab043845f9dc62715da4e60afa829f8ccb2 --- /dev/null +++ b/arch/arm64/mm/mem_encrypt.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of the memory encryption/decryption API. + * + * Since the low-level details of the operation depend on the + * Confidential Computing environment (e.g. pKVM, CCA, ...), this just + * acts as a top-level dispatcher to whatever hooks may have been + * registered. + * + * Author: Will Deacon + * Copyright (C) 2024 Google LLC + * + * "Hello, boils and ghouls!" + */ + +#include +#include +#include +#include + +#include + +static const struct arm64_mem_crypt_ops *crypt_ops; + +int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops) +{ + if (WARN_ON(crypt_ops)) + return -EBUSY; + + crypt_ops = ops; + return 0; +} + +int set_memory_encrypted(unsigned long addr, int numpages) +{ + if (likely(!crypt_ops) || WARN_ON(!PAGE_ALIGNED(addr))) + return 0; + + return crypt_ops->encrypt(addr, numpages); +} +EXPORT_SYMBOL_GPL(set_memory_encrypted); + +int set_memory_decrypted(unsigned long addr, int numpages) +{ + if (likely(!crypt_ops) || WARN_ON(!PAGE_ALIGNED(addr))) + return 0; + + return crypt_ops->decrypt(addr, numpages); +} +EXPORT_SYMBOL_GPL(set_memory_decrypted); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 06e81d1dbc1e968d89e46e55728f744441fc053b..7ca69627ae29f89f752d8fb3a73990b4656fe68d 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -5,10 +5,12 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -23,14 +25,16 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED bool can_set_direct_map(void) { /* - * rodata_full and DEBUG_PAGEALLOC require linear map to be - * mapped at page granularity, so that it is possible to + * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear + * map to be mapped at page granularity, so that it is possible to * protect/unprotect single pages. * * KFENCE pool requires page-granular mapping if initialized late. + * + * Realms need to make pages shared/protected at page granularity. */ return rodata_full || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + arm64_kfence_can_set_direct_map() || is_realm_world(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -60,7 +64,13 @@ static int __change_memory_common(unsigned long start, unsigned long size, ret = apply_to_page_range(&init_mm, start, size, change_page_range, &data); - flush_tlb_kernel_range(start, start + size); + /* + * If the memory is being made valid without changing any other bits + * then a TLBI isn't required as a non-valid entry cannot be cached in + * the TLB. + */ + if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask)) + flush_tlb_kernel_range(start, start + size); return ret; } @@ -192,6 +202,86 @@ int set_direct_map_default_noflush(struct page *page) PAGE_SIZE, change_page_range, &data); } +static int __set_memory_enc_dec(unsigned long addr, + int numpages, + bool encrypt) +{ + unsigned long set_prot = 0, clear_prot = 0; + phys_addr_t start, end; + int ret; + + if (!is_realm_world()) + return 0; + + if (!__is_lm_address(addr)) + return -EINVAL; + + start = __virt_to_phys(addr); + end = start + numpages * PAGE_SIZE; + + if (encrypt) + clear_prot = PROT_NS_SHARED; + else + set_prot = PROT_NS_SHARED; + + /* + * Break the mapping before we make any changes to avoid stale TLB + * entries or Synchronous External Aborts caused by RIPAS_EMPTY + */ + ret = __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(set_prot), + __pgprot(clear_prot | PTE_VALID)); + + if (ret) + return ret; + + if (encrypt) + ret = rsi_set_memory_range_protected(start, end); + else + ret = rsi_set_memory_range_shared(start, end); + + if (ret) + return ret; + + return __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(PTE_VALID), + __pgprot(0)); +} + +static int realm_set_memory_encrypted(unsigned long addr, int numpages) +{ + int ret = __set_memory_enc_dec(addr, numpages, true); + + /* + * If the request to change state fails, then the only sensible cause + * of action for the caller is to leak the memory + */ + WARN(ret, "Failed to encrypt memory, %d pages will be leaked", + numpages); + + return ret; +} + +static int realm_set_memory_decrypted(unsigned long addr, int numpages) +{ + int ret = __set_memory_enc_dec(addr, numpages, false); + + WARN(ret, "Failed to decrypt memory, %d pages will be leaked", + numpages); + + return ret; +} + +static const struct arm64_mem_crypt_ops realm_crypt_ops = { + .encrypt = realm_set_memory_encrypted, + .decrypt = realm_set_memory_decrypted, +}; + +int realm_register_memory_enc_ops(void) +{ + return arm64_mem_crypt_ops_register(&realm_crypt_ops); +} + #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index a2a3e89f2d9ab371a5a62ed24b7fa29f459726ab..136a7ec1b50ad44e97b1e90d0d282e7657adadd4 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -457,7 +457,6 @@ void free_initmem(void) unsigned long kernel_end = (unsigned long)&_end; /* Remap kernel text and data, but do not touch init section yet. */ - kernel_set_to_readonly = true; map_pages(init_end, __pa(init_end), kernel_end - init_end, PAGE_KERNEL, 0); @@ -491,11 +490,18 @@ void free_initmem(void) #ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void) { - /* rodata memory was already mapped with KERNEL_RO access rights by - pagetable_init() and map_pages(). No need to do additional stuff here */ - unsigned long roai_size = __end_ro_after_init - __start_ro_after_init; + unsigned long start = (unsigned long) &__start_rodata; + unsigned long end = (unsigned long) &__end_rodata; + + pr_info("Write protecting the kernel read-only data: %luk\n", + (end - start) >> 10); + + kernel_set_to_readonly = true; + map_pages(start, __pa(start), end - start, PAGE_KERNEL, 0); - pr_info("Write protected read-only-after-init data: %luk\n", roai_size >> 10); + /* force the kernel to see the new page table entries */ + flush_cache_all(); + flush_tlb_all(); } #endif diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9b5103d0e6de914bc45c1b6267939e066801dcce..92fd29a1990f5882c278eca5cab79ebe1105df2b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -12,12 +12,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -27,13 +29,10 @@ #include #include #include +#include #include #include -#ifdef CONFIG_HISI_VIRTCCA_GUEST -#include -#include #include -#endif #include #include @@ -293,6 +292,7 @@ struct its_device { struct its_node *its; struct event_lpi_map event_map; void *itt; + u32 itt_sz; u32 nr_ites; u32 device_id; bool shared; @@ -388,115 +388,117 @@ static int alloc_devid_from_rsv_pools(struct rsv_devid_pool **devid_pool, #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) #define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K) -#ifdef CONFIG_ARM64_HISI_IPIV -void __iomem *gic_data_rdist_get_vlpi_base(void) -{ - return gic_data_rdist_vlpi_base(); -} -EXPORT_SYMBOL(gic_data_rdist_get_vlpi_base); -#endif - -#ifdef CONFIG_VIRT_PLAT_DEV -/* - * Currently we only build *one* devid pool. - */ -static int build_devid_pools(void) +static struct page *its_alloc_pages_node(int node, gfp_t gfp, + unsigned int order) { - struct its_node *its; - - its = list_first_entry(&its_nodes, struct its_node, entry); - if (readl_relaxed(its->base + GITS_IIDR) != 0x00051736) - return -EINVAL; + struct page *page; + int ret = 0; - return probe_devid_pool_one(); -} -#endif + if (virtcca_cvm_domain()) + return virtcca_its_alloc_shared_pages_node(node, gfp, order); -#ifdef CONFIG_HISI_VIRTCCA_GUEST + page = alloc_pages_node(node, gfp, order); -static struct device cvm_alloc_device; -static LIST_HEAD(cvm_its_nodes); -static raw_spinlock_t cvm_its_lock; + if (!page) + return NULL; -struct its_device_order { - struct its_device *dev; - struct list_head entry; - int itt_order; -}; + ret = set_memory_decrypted((unsigned long)page_address(page), + 1 << order); + /* + * If set_memory_decrypted() fails then we don't know what state the + * page is in, so we can't free it. Instead we leak it. + * set_memory_decrypted() will already have WARNed. + */ + if (ret) + return NULL; -static inline struct page *its_alloc_shared_pages_node(int node, gfp_t gfp, - unsigned int order) -{ - return swiotlb_alloc(&cvm_alloc_device, (1 << order) * PAGE_SIZE); + return page; } -static inline struct page *its_alloc_shared_pages(gfp_t gfp, unsigned int order) +static struct page *its_alloc_pages(gfp_t gfp, unsigned int order) { - return its_alloc_shared_pages_node(NUMA_NO_NODE, gfp, order); + return its_alloc_pages_node(NUMA_NO_NODE, gfp, order); } -static void its_free_shared_pages(void *addr, int order) +static void its_free_pages(void *addr, unsigned int order) { - if (order < 0) + if (virtcca_cvm_domain()) { + virtcca_its_free_shared_pages(addr, order); return; + } - swiotlb_free(&cvm_alloc_device, (struct page *)addr, (1 << order) * PAGE_SIZE); + /* + * If the memory cannot be encrypted again then we must leak the pages. + * set_memory_encrypted() will already have WARNed. + */ + if (set_memory_encrypted((unsigned long)addr, 1 << order)) + return; + free_pages((unsigned long)addr, order); } -static int add_its_device_order(struct its_device *dev, int itt_order) +static struct gen_pool *itt_pool; + +static void *itt_alloc_pool(int node, int size) { - struct its_device_order *new; - unsigned long flags; + unsigned long addr; + struct page *page; - new = kmalloc(sizeof(struct its_device_order), GFP_KERNEL); - if (!new) - return -ENOMEM; - new->dev = dev; - new->itt_order = itt_order; - raw_spin_lock_irqsave(&cvm_its_lock, flags); - list_add_tail(&new->entry, &cvm_its_nodes); - raw_spin_unlock_irqrestore(&cvm_its_lock, flags); - return 0; + if (size >= PAGE_SIZE) { + page = its_alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, get_order(size)); + + return page ? page_address(page) : NULL; + } + + do { + addr = gen_pool_alloc(itt_pool, size); + if (addr) + break; + + page = its_alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!page) + break; + + gen_pool_add(itt_pool, (unsigned long)page_address(page), PAGE_SIZE, node); + } while (!addr); + + return (void *)addr; } -/* get its device order and then free its device order */ -static int get_its_device_order(struct its_device *dev) +static void itt_free_pool(void *addr, int size) { - struct its_device_order *pos, *tmp; - unsigned long flags; - int itt_order = -1; - - raw_spin_lock_irqsave(&cvm_its_lock, flags); - list_for_each_entry_safe(pos, tmp, &cvm_its_nodes, entry) { - if (pos->dev == dev) { - itt_order = pos->itt_order; - list_del(&pos->entry); - kfree(pos); - goto found; - } + if (!addr) + return; + + if (size >= PAGE_SIZE) { + its_free_pages(addr, get_order(size)); + return; } -found: - raw_spin_unlock_irqrestore(&cvm_its_lock, flags); - return itt_order; + + gen_pool_free(itt_pool, (unsigned long)addr, size); } -static void *its_alloc_shared_page_address(struct its_device *dev, - struct its_node *its, int sz) +#ifdef CONFIG_ARM64_HISI_IPIV +void __iomem *gic_data_rdist_get_vlpi_base(void) { - struct page *page; - int itt_order; + return gic_data_rdist_vlpi_base(); +} +EXPORT_SYMBOL(gic_data_rdist_get_vlpi_base); +#endif - itt_order = get_order(sz); - if (add_its_device_order(dev, itt_order)) - return NULL; +#ifdef CONFIG_VIRT_PLAT_DEV +/* + * Currently we only build *one* devid pool. + */ +static int build_devid_pools(void) +{ + struct its_node *its; - page = its_alloc_shared_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - itt_order); - if (!page) - return NULL; - return (void *)page_address(page); -} + its = list_first_entry(&its_nodes, struct its_node, entry); + if (readl_relaxed(its->base + GITS_IIDR) != 0x00051736) + return -EINVAL; + return probe_devid_pool_one(); +} #endif /* @@ -939,7 +941,6 @@ static struct its_collection *its_build_mapd_cmd(struct its_node *its, u8 size = ilog2(desc->its_mapd_cmd.dev->nr_ites); itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt); - itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN); its_encode_cmd(cmd, GITS_CMD_MAPD); its_encode_devid(cmd, desc->its_mapd_cmd.dev->device_id); @@ -2515,13 +2516,8 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) { struct page *prop_page; -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - prop_page = its_alloc_shared_pages(gfp_flags, - get_order(LPI_PROPBASE_SZ)); - else -#endif - prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); + prop_page = its_alloc_pages(gfp_flags, + get_order(LPI_PROPBASE_SZ)); if (!prop_page) return NULL; @@ -2532,14 +2528,7 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) static void its_free_prop_table(struct page *prop_page) { -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(page_address(prop_page), - get_order(LPI_PROPBASE_SZ)); - else -#endif - free_pages((unsigned long)page_address(prop_page), - get_order(LPI_PROPBASE_SZ)); + its_free_pages(page_address(prop_page), get_order(LPI_PROPBASE_SZ)); } static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size) @@ -2661,13 +2650,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, order = get_order(GITS_BASER_PAGES_MAX * psz); } -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - page = its_alloc_shared_pages_node(its->numa_node, - GFP_KERNEL | __GFP_ZERO, order); - else -#endif - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); + page = its_alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); if (!page) return -ENOMEM; @@ -2680,12 +2663,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, /* 52bit PA is supported only when PageSize=64K */ if (psz != SZ_64K) { pr_err("ITS: no 52bit PA support when psz=%d\n", psz); -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(base, order); - else -#endif - free_pages((unsigned long)base, order); + its_free_pages(base, order); return -ENXIO; } @@ -2741,12 +2719,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser, pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n", &its->phys_base, its_base_type_string[type], val, tmp); -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(base, order); - else -#endif - free_pages((unsigned long)base, order); + its_free_pages(base, order); return -ENXIO; } @@ -2885,14 +2858,7 @@ static void its_free_tables(struct its_node *its) for (i = 0; i < GITS_BASER_NR_REGS; i++) { if (its->tables[i].base) { -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(its->tables[i].base, - its->tables[i].order); - else -#endif - free_pages((unsigned long)its->tables[i].base, - its->tables[i].order); + its_free_pages(its->tables[i].base, its->tables[i].order); its->tables[i].base = NULL; } } @@ -3162,13 +3128,7 @@ static bool allocate_vpe_l2_table(int cpu, u32 id) /* Allocate memory for 2nd level table */ if (!table[idx]) { -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - page = its_alloc_shared_pages(GFP_KERNEL | __GFP_ZERO, - get_order(psz)); - else -#endif - page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); + page = its_alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); if (!page) return false; @@ -3291,13 +3251,7 @@ static int allocate_vpe_l1_table(void) pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n", np, npg, psz, epp, esz); -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - page = its_alloc_shared_pages(GFP_ATOMIC | __GFP_ZERO, - get_order(np * PAGE_SIZE)); - else -#endif - page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); + page = its_alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); if (!page) return -ENOMEM; @@ -3343,14 +3297,7 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) { struct page *pend_page; -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - pend_page = its_alloc_shared_pages(gfp_flags | __GFP_ZERO, - get_order(LPI_PENDBASE_SZ)); - else -#endif - pend_page = alloc_pages(gfp_flags | __GFP_ZERO, - get_order(LPI_PENDBASE_SZ)); + pend_page = its_alloc_pages(gfp_flags | __GFP_ZERO, get_order(LPI_PENDBASE_SZ)); if (!pend_page) return NULL; @@ -3362,13 +3309,7 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) static void its_free_pending_table(struct page *pt) { -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(page_address(pt), - get_order(LPI_PENDBASE_SZ)); - else -#endif - free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); + its_free_pages(page_address(pt), get_order(LPI_PENDBASE_SZ)); } /* @@ -3703,15 +3644,8 @@ static bool its_alloc_table_entry(struct its_node *its, /* Allocate memory for 2nd level table */ if (!table[idx]) { -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - page = its_alloc_shared_pages_node(its->numa_node, - GFP_KERNEL | __GFP_ZERO, - get_order(baser->psz)); - else -#endif - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - get_order(baser->psz)); + page = its_alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(baser->psz)); if (!page) return false; @@ -3806,20 +3740,18 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, if (WARN_ON(!is_power_of_2(nvecs))) nvecs = roundup_pow_of_two(nvecs); - dev = kzalloc(sizeof(*dev), GFP_KERNEL); /* * Even if the device wants a single LPI, the ITT must be * sized as a power of two (and you need at least one bit...). */ nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); - sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - itt = its_alloc_shared_page_address(dev, its, sz); - else -#endif - itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node); + sz = max(sz, ITS_ITT_ALIGN); + + itt = itt_alloc_pool(its->numa_node, sz); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (alloc_lpis) { lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); if (lpi_map) @@ -3831,14 +3763,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, lpi_base = 0; } - if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) { + if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) { kfree(dev); -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(itt, get_order(sz)); - else -#endif - kfree(itt); + itt_free_pool(itt, sz); bitmap_free(lpi_map); kfree(col_map); return NULL; @@ -3848,6 +3775,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, dev->its = its; dev->itt = itt; + dev->itt_sz = sz; dev->nr_ites = nr_ites; dev->event_map.lpi_map = lpi_map; dev->event_map.col_map = col_map; @@ -3875,12 +3803,7 @@ static void its_free_device(struct its_device *its_dev) list_del(&its_dev->entry); raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); kfree(its_dev->event_map.col_map); -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(its_dev->itt, get_its_device_order(its_dev)); - else -#endif - kfree(its_dev->itt); + itt_free_pool(its_dev->itt, its_dev->itt_sz); #ifdef CONFIG_VIRT_PLAT_DEV if (its_dev->is_vdev) { @@ -5758,15 +5681,9 @@ static int __init its_probe_one(struct its_node *its) } } -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - page = its_alloc_shared_pages_node(its->numa_node, - GFP_KERNEL | __GFP_ZERO, - get_order(ITS_CMD_QUEUE_SZ)); - else -#endif - page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, - get_order(ITS_CMD_QUEUE_SZ)); + page = its_alloc_pages_node(its->numa_node, + GFP_KERNEL | __GFP_ZERO, + get_order(ITS_CMD_QUEUE_SZ)); if (!page) { err = -ENOMEM; goto out_unmap_sgir; @@ -5830,12 +5747,7 @@ static int __init its_probe_one(struct its_node *its) out_free_tables: its_free_tables(its); out_free_cmd: -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) - its_free_shared_pages(its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); - else -#endif - free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); + its_free_pages(its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); out_unmap_sgir: if (its->sgir_base) iounmap(its->sgir_base); @@ -6324,13 +6236,12 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, #endif int err; -#ifdef CONFIG_HISI_VIRTCCA_GUEST - if (is_virtcca_cvm_world()) { - device_initialize(&cvm_alloc_device); - enable_swiotlb_for_cvm_dev(&cvm_alloc_device, true); - raw_spin_lock_init(&cvm_its_lock); - } -#endif + virtcca_its_init(); + + itt_pool = gen_pool_create(get_order(ITS_ITT_ALIGN), -1); + if (!itt_pool) + return -ENOMEM; + gic_rdists = rdists; its_parent = parent_domain; diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index b1c4efa00182e0f40a8fa199620da19fb615918a..40129b6f0eca41ced6c6213776571a5ae8374ebe 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -48,12 +48,6 @@ source "drivers/virt/nitro_enclaves/Kconfig" source "drivers/virt/acrn/Kconfig" -source "drivers/virt/coco/efi_secret/Kconfig" - -source "drivers/virt/coco/sev-guest/Kconfig" - -source "drivers/virt/coco/tdx-guest/Kconfig" - -source "drivers/virt/coco/csv-guest/Kconfig" +source "drivers/virt/coco/Kconfig" endif diff --git a/drivers/virt/Makefile b/drivers/virt/Makefile index 62681a26075878b0bef4d2513bf9f36120a4cdd2..f29901bd782058d3552cdec2c2128ad47ce6fe27 100644 --- a/drivers/virt/Makefile +++ b/drivers/virt/Makefile @@ -9,7 +9,4 @@ obj-y += vboxguest/ obj-$(CONFIG_NITRO_ENCLAVES) += nitro_enclaves/ obj-$(CONFIG_ACRN_HSM) += acrn/ -obj-$(CONFIG_EFI_SECRET) += coco/efi_secret/ -obj-$(CONFIG_SEV_GUEST) += coco/sev-guest/ -obj-$(CONFIG_INTEL_TDX_GUEST) += coco/tdx-guest/ -obj-$(CONFIG_CSV_GUEST) += coco/csv-guest/ +obj-y += coco/ diff --git a/drivers/virt/coco/Kconfig b/drivers/virt/coco/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..dd1011c2e59ef52f04c03ebbe39a05757177375b --- /dev/null +++ b/drivers/virt/coco/Kconfig @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Confidential computing related collateral +# + +config TSM_REPORTS + select CONFIGFS_FS + tristate + +source "drivers/virt/coco/efi_secret/Kconfig" + +source "drivers/virt/coco/sev-guest/Kconfig" + +source "drivers/virt/coco/tdx-guest/Kconfig" + +source "drivers/virt/coco/csv-guest/Kconfig" + +source "drivers/virt/coco/arm-cca-guest/Kconfig" diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..a0c5840a3a33712fcbba8bfc0c129c1ee9bb2ad0 --- /dev/null +++ b/drivers/virt/coco/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Confidential computing related collateral +# +obj-$(CONFIG_TSM_REPORTS) += tsm.o +obj-$(CONFIG_EFI_SECRET) += efi_secret/ +obj-$(CONFIG_SEV_GUEST) += sev-guest/ +obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/ +obj-$(CONFIG_CSV_GUEST) += csv-guest/ +obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/ diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..9dd27c3ee215275cf985498c5d2731ded0708246 --- /dev/null +++ b/drivers/virt/coco/arm-cca-guest/Kconfig @@ -0,0 +1,11 @@ +config ARM_CCA_GUEST + tristate "Arm CCA Guest driver" + depends on ARM64 + default m + select TSM_REPORTS + help + The driver provides userspace interface to request and + attestation report from the Realm Management Monitor(RMM). + + If you choose 'M' here, this module will be called + arm-cca-guest. diff --git a/drivers/virt/coco/arm-cca-guest/Makefile b/drivers/virt/coco/arm-cca-guest/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..69eeba08e98a85259e608a44ba62a84ba4de583f --- /dev/null +++ b/drivers/virt/coco/arm-cca-guest/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest.o diff --git a/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c new file mode 100644 index 0000000000000000000000000000000000000000..f937f6e1009e02b20116401be8aae4333a2cea69 --- /dev/null +++ b/drivers/virt/coco/arm-cca-guest/arm-cca-guest.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/** + * struct arm_cca_token_info - a descriptor for the token buffer. + * @challenge: Pointer to the challenge data + * @challenge_size: Size of the challenge data + * @granule: PA of the granule to which the token will be written + * @offset: Offset within granule to start of buffer in bytes + * @result: result of rsi_attestation_token_continue operation + */ +struct arm_cca_token_info { + void *challenge; + unsigned long challenge_size; + phys_addr_t granule; + unsigned long offset; + unsigned long result; +}; + +static void arm_cca_attestation_init(void *param) +{ + struct arm_cca_token_info *info; + + info = (struct arm_cca_token_info *)param; + + info->result = rsi_attestation_token_init(info->challenge, + info->challenge_size); +} + +/** + * arm_cca_attestation_continue - Retrieve the attestation token data. + * + * @param: pointer to the arm_cca_token_info + * + * Attestation token generation is a long running operation and therefore + * the token data may not be retrieved in a single call. Moreover, the + * token retrieval operation must be requested on the same CPU on which the + * attestation token generation was initialised. + * This helper function is therefore scheduled on the same CPU multiple + * times until the entire token data is retrieved. + */ +static void arm_cca_attestation_continue(void *param) +{ + unsigned long len; + unsigned long size; + struct arm_cca_token_info *info; + + info = (struct arm_cca_token_info *)param; + + size = RSI_GRANULE_SIZE - info->offset; + info->result = rsi_attestation_token_continue(info->granule, + info->offset, size, &len); + info->offset += len; +} + +/** + * arm_cca_report_new - Generate a new attestation token. + * + * @report: pointer to the TSM report context information. + * @data: pointer to the context specific data for this module. + * + * Initialise the attestation token generation using the challenge data + * passed in the TSM descriptor. Allocate memory for the attestation token + * and schedule calls to retrieve the attestation token on the same CPU + * on which the attestation token generation was initialised. + * + * The challenge data must be at least 32 bytes and no more than 64 bytes. If + * less than 64 bytes are provided it will be zero padded to 64 bytes. + * + * Return: + * * %0 - Attestation token generated successfully. + * * %-EINVAL - A parameter was not valid. + * * %-ENOMEM - Out of memory. + * * %-EFAULT - Failed to get IPA for memory page(s). + * * A negative status code as returned by smp_call_function_single(). + */ +static int arm_cca_report_new(struct tsm_report *report, void *data) +{ + int ret; + int cpu; + long max_size; + unsigned long token_size = 0; + struct arm_cca_token_info info; + void *buf; + u8 *token __free(kvfree) = NULL; + struct tsm_desc *desc = &report->desc; + + if (desc->inblob_len < 32 || desc->inblob_len > 64) + return -EINVAL; + + /* + * The attestation token 'init' and 'continue' calls must be + * performed on the same CPU. smp_call_function_single() is used + * instead of simply calling get_cpu() because of the need to + * allocate outblob based on the returned value from the 'init' + * call and that cannot be done in an atomic context. + */ + cpu = smp_processor_id(); + + info.challenge = desc->inblob; + info.challenge_size = desc->inblob_len; + + ret = smp_call_function_single(cpu, arm_cca_attestation_init, + &info, true); + if (ret) + return ret; + max_size = info.result; + + if (max_size <= 0) + return -EINVAL; + + /* Allocate outblob */ + token = kvzalloc(max_size, GFP_KERNEL); + if (!token) + return -ENOMEM; + + /* + * Since the outblob may not be physically contiguous, use a page + * to bounce the buffer from RMM. + */ + buf = alloc_pages_exact(RSI_GRANULE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Get the PA of the memory page(s) that were allocated */ + info.granule = (unsigned long)virt_to_phys(buf); + + /* Loop until the token is ready or there is an error */ + do { + /* Retrieve one RSI_GRANULE_SIZE data per loop iteration */ + info.offset = 0; + do { + /* + * Schedule a call to retrieve a sub-granule chunk + * of data per loop iteration. + */ + ret = smp_call_function_single(cpu, + arm_cca_attestation_continue, + (void *)&info, true); + if (ret != 0) { + token_size = 0; + goto exit_free_granule_page; + } + } while (info.result == RSI_INCOMPLETE && + info.offset < RSI_GRANULE_SIZE); + + if (info.result != RSI_SUCCESS) { + ret = -ENXIO; + token_size = 0; + goto exit_free_granule_page; + } + + /* + * Copy the retrieved token data from the granule + * to the token buffer, ensuring that the RMM doesn't + * overflow the buffer. + */ + if (WARN_ON(token_size + info.offset > max_size)) + break; + memcpy(&token[token_size], buf, info.offset); + token_size += info.offset; + } while (info.result == RSI_INCOMPLETE); + + report->outblob = no_free_ptr(token); +exit_free_granule_page: + report->outblob_len = token_size; + free_pages_exact(buf, RSI_GRANULE_SIZE); + return ret; +} + +static const struct tsm_ops arm_cca_tsm_ops = { + .name = KBUILD_MODNAME, + .report_new = arm_cca_report_new, +}; + +/** + * arm_cca_guest_init - Register with the Trusted Security Module (TSM) + * interface. + * + * Return: + * * %0 - Registered successfully with the TSM interface. + * * %-ENODEV - The execution context is not an Arm Realm. + * * %-EBUSY - Already registered. + */ +static int __init arm_cca_guest_init(void) +{ + int ret; + + if (!is_realm_world()) + return -ENODEV; + + ret = tsm_register(&arm_cca_tsm_ops, NULL, NULL); + if (ret < 0) + pr_err("Error %d registering with TSM\n", ret); + + return ret; +} +module_init(arm_cca_guest_init); + +/** + * arm_cca_guest_exit - unregister with the Trusted Security Module (TSM) + * interface. + */ +static void __exit arm_cca_guest_exit(void) +{ + tsm_unregister(&arm_cca_tsm_ops); +} +module_exit(arm_cca_guest_exit); + +MODULE_AUTHOR("Sami Mujawar "); +MODULE_DESCRIPTION("Arm CCA Guest TSM Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/virt/coco/tsm.c b/drivers/virt/coco/tsm.c new file mode 100644 index 0000000000000000000000000000000000000000..d1c2db83a8ca9be1bb6bea6d2f287abfb8ba4313 --- /dev/null +++ b/drivers/virt/coco/tsm.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. All rights reserved. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +static struct tsm_provider { + const struct tsm_ops *ops; + const struct config_item_type *type; + void *data; +} provider; +static DECLARE_RWSEM(tsm_rwsem); + +/** + * DOC: Trusted Security Module (TSM) Attestation Report Interface + * + * The TSM report interface is a common provider of blobs that facilitate + * attestation of a TVM (confidential computing guest) by an attestation + * service. A TSM report combines a user-defined blob (likely a public-key with + * a nonce for a key-exchange protocol) with a signed attestation report. That + * combined blob is then used to obtain secrets provided by an agent that can + * validate the attestation report. The expectation is that this interface is + * invoked infrequently, however configfs allows for multiple agents to + * own their own report generation instances to generate reports as + * often as needed. + * + * The attestation report format is TSM provider specific, when / if a standard + * materializes that can be published instead of the vendor layout. Until then + * the 'provider' attribute indicates the format of 'outblob', and optionally + * 'auxblob'. + */ + +struct tsm_report_state { + struct tsm_report report; + unsigned long write_generation; + unsigned long read_generation; + struct config_item cfg; +}; + +enum tsm_data_select { + TSM_REPORT, + TSM_CERTS, +}; + +static struct tsm_report *to_tsm_report(struct config_item *cfg) +{ + struct tsm_report_state *state = + container_of(cfg, struct tsm_report_state, cfg); + + return &state->report; +} + +static struct tsm_report_state *to_state(struct tsm_report *report) +{ + return container_of(report, struct tsm_report_state, report); +} + +static int try_advance_write_generation(struct tsm_report *report) +{ + struct tsm_report_state *state = to_state(report); + + lockdep_assert_held_write(&tsm_rwsem); + + /* + * Malicious or broken userspace has written enough times for + * read_generation == write_generation by modular arithmetic without an + * interim read. Stop accepting updates until the current report + * configuration is read. + */ + if (state->write_generation == state->read_generation - 1) + return -EBUSY; + state->write_generation++; + return 0; +} + +static ssize_t tsm_report_privlevel_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + /* + * The valid privilege levels that a TSM might accept, if it accepts a + * privilege level setting at all, are a max of TSM_PRIVLEVEL_MAX (see + * SEV-SNP GHCB) and a minimum of a TSM selected floor value no less + * than 0. + */ + if (provider.ops->privlevel_floor > val || val > TSM_PRIVLEVEL_MAX) + return -EINVAL; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + report->desc.privlevel = val; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, privlevel); + +static ssize_t tsm_report_privlevel_floor_show(struct config_item *cfg, + char *buf) +{ + guard(rwsem_read)(&tsm_rwsem); + return sysfs_emit(buf, "%u\n", provider.ops->privlevel_floor); +} +CONFIGFS_ATTR_RO(tsm_report_, privlevel_floor); + +static ssize_t tsm_report_inblob_write(struct config_item *cfg, + const void *buf, size_t count) +{ + struct tsm_report *report = to_tsm_report(cfg); + int rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + + report->desc.inblob_len = count; + memcpy(report->desc.inblob, buf, count); + return count; +} +CONFIGFS_BIN_ATTR_WO(tsm_report_, inblob, NULL, TSM_INBLOB_MAX); + +static ssize_t tsm_report_generation_show(struct config_item *cfg, char *buf) +{ + struct tsm_report *report = to_tsm_report(cfg); + struct tsm_report_state *state = to_state(report); + + guard(rwsem_read)(&tsm_rwsem); + return sysfs_emit(buf, "%lu\n", state->write_generation); +} +CONFIGFS_ATTR_RO(tsm_report_, generation); + +static ssize_t tsm_report_provider_show(struct config_item *cfg, char *buf) +{ + guard(rwsem_read)(&tsm_rwsem); + return sysfs_emit(buf, "%s\n", provider.ops->name); +} +CONFIGFS_ATTR_RO(tsm_report_, provider); + +static ssize_t __read_report(struct tsm_report *report, void *buf, size_t count, + enum tsm_data_select select) +{ + loff_t offset = 0; + ssize_t len; + u8 *out; + + if (select == TSM_REPORT) { + out = report->outblob; + len = report->outblob_len; + } else { + out = report->auxblob; + len = report->auxblob_len; + } + + /* + * Recall that a NULL @buf is configfs requesting the size of + * the buffer. + */ + if (!buf) + return len; + return memory_read_from_buffer(buf, count, &offset, out, len); +} + +static ssize_t read_cached_report(struct tsm_report *report, void *buf, + size_t count, enum tsm_data_select select) +{ + struct tsm_report_state *state = to_state(report); + + guard(rwsem_read)(&tsm_rwsem); + if (!report->desc.inblob_len) + return -EINVAL; + + /* + * A given TSM backend always fills in ->outblob regardless of + * whether the report includes an auxblob or not. + */ + if (!report->outblob || + state->read_generation != state->write_generation) + return -EWOULDBLOCK; + + return __read_report(report, buf, count, select); +} + +static ssize_t tsm_report_read(struct tsm_report *report, void *buf, + size_t count, enum tsm_data_select select) +{ + struct tsm_report_state *state = to_state(report); + const struct tsm_ops *ops; + ssize_t rc; + + /* try to read from the existing report if present and valid... */ + rc = read_cached_report(report, buf, count, select); + if (rc >= 0 || rc != -EWOULDBLOCK) + return rc; + + /* slow path, report may need to be regenerated... */ + guard(rwsem_write)(&tsm_rwsem); + ops = provider.ops; + if (!ops) + return -ENOTTY; + if (!report->desc.inblob_len) + return -EINVAL; + + /* did another thread already generate this report? */ + if (report->outblob && + state->read_generation == state->write_generation) + goto out; + + kvfree(report->outblob); + kvfree(report->auxblob); + report->outblob = NULL; + report->auxblob = NULL; + rc = ops->report_new(report, provider.data); + if (rc < 0) + return rc; + state->read_generation = state->write_generation; +out: + return __read_report(report, buf, count, select); +} + +static ssize_t tsm_report_outblob_read(struct config_item *cfg, void *buf, + size_t count) +{ + struct tsm_report *report = to_tsm_report(cfg); + + return tsm_report_read(report, buf, count, TSM_REPORT); +} +CONFIGFS_BIN_ATTR_RO(tsm_report_, outblob, NULL, TSM_OUTBLOB_MAX); + +static ssize_t tsm_report_auxblob_read(struct config_item *cfg, void *buf, + size_t count) +{ + struct tsm_report *report = to_tsm_report(cfg); + + return tsm_report_read(report, buf, count, TSM_CERTS); +} +CONFIGFS_BIN_ATTR_RO(tsm_report_, auxblob, NULL, TSM_OUTBLOB_MAX); + +#define TSM_DEFAULT_ATTRS() \ + &tsm_report_attr_generation, \ + &tsm_report_attr_provider + +static struct configfs_attribute *tsm_report_attrs[] = { + TSM_DEFAULT_ATTRS(), + NULL, +}; + +static struct configfs_attribute *tsm_report_extra_attrs[] = { + TSM_DEFAULT_ATTRS(), + &tsm_report_attr_privlevel, + &tsm_report_attr_privlevel_floor, + NULL, +}; + +#define TSM_DEFAULT_BIN_ATTRS() \ + &tsm_report_attr_inblob, \ + &tsm_report_attr_outblob + +static struct configfs_bin_attribute *tsm_report_bin_attrs[] = { + TSM_DEFAULT_BIN_ATTRS(), + NULL, +}; + +static struct configfs_bin_attribute *tsm_report_bin_extra_attrs[] = { + TSM_DEFAULT_BIN_ATTRS(), + &tsm_report_attr_auxblob, + NULL, +}; + +static void tsm_report_item_release(struct config_item *cfg) +{ + struct tsm_report *report = to_tsm_report(cfg); + struct tsm_report_state *state = to_state(report); + + kvfree(report->auxblob); + kvfree(report->outblob); + kfree(state); +} + +static struct configfs_item_operations tsm_report_item_ops = { + .release = tsm_report_item_release, +}; + +const struct config_item_type tsm_report_default_type = { + .ct_owner = THIS_MODULE, + .ct_bin_attrs = tsm_report_bin_attrs, + .ct_attrs = tsm_report_attrs, + .ct_item_ops = &tsm_report_item_ops, +}; +EXPORT_SYMBOL_GPL(tsm_report_default_type); + +const struct config_item_type tsm_report_extra_type = { + .ct_owner = THIS_MODULE, + .ct_bin_attrs = tsm_report_bin_extra_attrs, + .ct_attrs = tsm_report_extra_attrs, + .ct_item_ops = &tsm_report_item_ops, +}; +EXPORT_SYMBOL_GPL(tsm_report_extra_type); + +static struct config_item *tsm_report_make_item(struct config_group *group, + const char *name) +{ + struct tsm_report_state *state; + + guard(rwsem_read)(&tsm_rwsem); + if (!provider.ops) + return ERR_PTR(-ENXIO); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return ERR_PTR(-ENOMEM); + + config_item_init_type_name(&state->cfg, name, provider.type); + return &state->cfg; +} + +static struct configfs_group_operations tsm_report_group_ops = { + .make_item = tsm_report_make_item, +}; + +static const struct config_item_type tsm_reports_type = { + .ct_owner = THIS_MODULE, + .ct_group_ops = &tsm_report_group_ops, +}; + +static const struct config_item_type tsm_root_group_type = { + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem tsm_configfs = { + .su_group = { + .cg_item = { + .ci_namebuf = "tsm", + .ci_type = &tsm_root_group_type, + }, + }, + .su_mutex = __MUTEX_INITIALIZER(tsm_configfs.su_mutex), +}; + +int tsm_register(const struct tsm_ops *ops, void *priv, + const struct config_item_type *type) +{ + const struct tsm_ops *conflict; + + if (!type) + type = &tsm_report_default_type; + if (!(type == &tsm_report_default_type || type == &tsm_report_extra_type)) + return -EINVAL; + + guard(rwsem_write)(&tsm_rwsem); + conflict = provider.ops; + if (conflict) { + pr_err("\"%s\" ops already registered\n", conflict->name); + return -EBUSY; + } + + provider.ops = ops; + provider.data = priv; + provider.type = type; + return 0; +} +EXPORT_SYMBOL_GPL(tsm_register); + +int tsm_unregister(const struct tsm_ops *ops) +{ + guard(rwsem_write)(&tsm_rwsem); + if (ops != provider.ops) + return -EBUSY; + provider.ops = NULL; + provider.data = NULL; + provider.type = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(tsm_unregister); + +static struct config_group *tsm_report_group; + +static int __init tsm_init(void) +{ + struct config_group *root = &tsm_configfs.su_group; + struct config_group *tsm; + int rc; + + config_group_init(root); + rc = configfs_register_subsystem(&tsm_configfs); + if (rc) + return rc; + + tsm = configfs_register_default_group(root, "report", + &tsm_reports_type); + if (IS_ERR(tsm)) { + configfs_unregister_subsystem(&tsm_configfs); + return PTR_ERR(tsm); + } + tsm_report_group = tsm; + + return 0; +} +module_init(tsm_init); + +static void __exit tsm_exit(void) +{ + configfs_unregister_default_group(tsm_report_group); + configfs_unregister_subsystem(&tsm_configfs); +} +module_exit(tsm_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Provide Trusted Security Module attestation reports via configfs"); diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index db13bb620f527e2a0d8204cfdafe877975c35394..c768de6f19a9a90d04eab66f522bad2f10ef1fd9 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -180,6 +180,11 @@ static inline bool is_kernel_rodata(unsigned long addr) addr < (unsigned long)__end_rodata; } +static inline bool is_kernel_ro_after_init(unsigned long addr) +{ + return addr >= (unsigned long)__start_ro_after_init && + addr < (unsigned long)__end_ro_after_init; +} /** * is_kernel_inittext - checks if the pointer address is located in the * .init.text section diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 39386db201a2891a05a02102941efcdd63e5f0c7..b43ceb20a689f547b6710046dd0cf42367e0409a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -216,6 +216,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); +extern void jump_label_init_ro(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -268,6 +269,8 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } +static __always_inline void jump_label_init_ro(void) { } + static __always_inline bool static_key_false(struct static_key *key) { if (unlikely_notrace(static_key_count(key) > 0)) diff --git a/include/linux/slab.h b/include/linux/slab.h index a761cc3559f295ccf13f5de282b1cd0b08c44d6e..f30f3ade52cf4b9de2414432f4d72072af36675c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -764,6 +764,8 @@ static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t fla extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); extern void kvfree(const void *addr); +DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T)) + extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); diff --git a/include/linux/tsm.h b/include/linux/tsm.h new file mode 100644 index 0000000000000000000000000000000000000000..de8324a2223c52597e476b6fb67f745281ca193a --- /dev/null +++ b/include/linux/tsm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TSM_H +#define __TSM_H + +#include +#include + +#define TSM_INBLOB_MAX 64 +#define TSM_OUTBLOB_MAX SZ_32K + +/* + * Privilege level is a nested permission concept to allow confidential + * guests to partition address space, 4-levels are supported. + */ +#define TSM_PRIVLEVEL_MAX 3 + +/** + * struct tsm_desc - option descriptor for generating tsm report blobs + * @privlevel: optional privilege level to associate with @outblob + * @inblob_len: sizeof @inblob + * @inblob: arbitrary input data + */ +struct tsm_desc { + unsigned int privlevel; + size_t inblob_len; + u8 inblob[TSM_INBLOB_MAX]; +}; + +/** + * struct tsm_report - track state of report generation relative to options + * @desc: input parameters to @report_new() + * @outblob_len: sizeof(@outblob) + * @outblob: generated evidence to provider to the attestation agent + * @auxblob_len: sizeof(@auxblob) + * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + */ +struct tsm_report { + struct tsm_desc desc; + size_t outblob_len; + u8 *outblob; + size_t auxblob_len; + u8 *auxblob; +}; + +/** + * struct tsm_ops - attributes and operations for tsm instances + * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider + * @privlevel_floor: convey base privlevel for nested scenarios + * @report_new: Populate @report with the report blob and auxblob + * (optional), return 0 on successful population, or -errno otherwise + * + * Implementation specific ops, only one is expected to be registered at + * a time i.e. only one of "sev-guest", "tdx-guest", etc. + */ +struct tsm_ops { + const char *name; + const unsigned int privlevel_floor; + int (*report_new)(struct tsm_report *report, void *data); +}; + +extern const struct config_item_type tsm_report_default_type; + +/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ +extern const struct config_item_type tsm_report_extra_type; + +int tsm_register(const struct tsm_ops *ops, void *priv, + const struct config_item_type *type); +int tsm_unregister(const struct tsm_ops *ops); +#endif /* __TSM_H */ diff --git a/include/linux/virtcca_cvm_domain.h b/include/linux/virtcca_cvm_domain.h index 5b6d6dfac05e80fbee8a62dbb8675000bc510b98..9a24a031d12c356eb5a61c02d85dcf1c3bb492da 100644 --- a/include/linux/virtcca_cvm_domain.h +++ b/include/linux/virtcca_cvm_domain.h @@ -24,6 +24,16 @@ static inline bool virtcca_cvm_domain(void) static inline void enable_swiotlb_for_cvm_dev(struct device *dev, bool enable) {} +static inline void virtcca_its_init(void) {} + +static inline struct page *virtcca_its_alloc_shared_pages_node(int node, gfp_t gfp, + unsigned int order) +{ + return NULL; +} + +static inline void virtcca_its_free_shared_pages(void *addr, int order) {} + #endif #ifdef CONFIG_HISI_VIRTCCA_HOST diff --git a/init/main.c b/init/main.c index 02a2c5d9be671b4aac158951a5551ad9524f34d8..afead48c58e48e8f8720b38665f5a4b302d67917 100644 --- a/init/main.c +++ b/init/main.c @@ -1413,6 +1413,7 @@ static void mark_readonly(void) * insecure pages which are W+X. */ flush_module_init_free_work(); + jump_label_init_ro(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/jump_label.c b/kernel/jump_label.c index ae29b818fd5646d015d390bd3fff1118aacdd89f..31c88eb5f9369384a7294a3cd686a0a4ff8c02b5 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -565,6 +565,45 @@ void __init jump_label_init(void) cpus_read_unlock(); } +static inline bool static_key_sealed(struct static_key *key) +{ + return (key->type & JUMP_TYPE_LINKED) && !(key->type & ~JUMP_TYPE_MASK); +} + +static inline void static_key_seal(struct static_key *key) +{ + unsigned long type = key->type & JUMP_TYPE_TRUE; + key->type = JUMP_TYPE_LINKED | type; +} + +void jump_label_init_ro(void) +{ + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + if (WARN_ON_ONCE(!static_key_initialized)) + return; + + cpus_read_lock(); + jump_label_lock(); + + for (iter = iter_start; iter < iter_stop; iter++) { + struct static_key *iterk = jump_entry_key(iter); + + if (!is_kernel_ro_after_init((unsigned long)iterk)) + continue; + + if (static_key_sealed(iterk)) + continue; + + static_key_seal(iterk); + } + + jump_label_unlock(); + cpus_read_unlock(); +} + #ifdef CONFIG_MODULES enum jump_label_type jump_label_init_type(struct jump_entry *entry) @@ -690,6 +729,15 @@ static int jump_label_add_module(struct module *mod) static_key_set_entries(key, iter); continue; } + + /* + * If the key was sealed at init, then there's no need to keep a + * reference to its module entries - just patch them now and be + * done with it. + */ + if (static_key_sealed(key)) + goto do_poke; + jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm) return -ENOMEM; @@ -715,6 +763,7 @@ static int jump_label_add_module(struct module *mod) static_key_set_linked(key); /* Only update if we've changed from our initial state */ +do_poke: if (jump_label_type(iter) != jump_label_init_type(iter)) __jump_label_update(key, iter, iter_stop, true); } @@ -744,6 +793,10 @@ static void jump_label_del_module(struct module *mod) if (within_module((unsigned long)key, mod)) continue; + /* No @jlm allocated because key was sealed at init. */ + if (static_key_sealed(key)) + continue; + /* No memory during module load */ if (WARN_ON(!static_key_linked(key))) continue;