From 8d80d3a973bfb32597029358d2c4883536f8aef4 Mon Sep 17 00:00:00 2001 From: Jiakun Shuai Date: Wed, 28 May 2025 18:49:50 +0800 Subject: [PATCH 1/2] dma: phytium: Add PSWIOTLB mechanism to improve DMA performance phytium inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICBHX3 CVE: NA ---------------------------------------------------------- This patch added additional "memory copy" to improve D2H direction DMA performance on Phytium Server SoCs. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- arch/arm64/mm/init.c | 11 + drivers/base/core.c | 10 + drivers/pci/pci.c | 12 + drivers/pci/probe.c | 10 + include/linux/device.h | 14 + include/linux/page-flags.h | 7 + include/linux/pswiotlb.h | 333 +++++ include/trace/events/pswiotlb.h | 44 + kernel/dma/Kconfig | 2 + kernel/dma/Makefile | 1 + kernel/dma/contiguous.c | 12 + kernel/dma/mapping.c | 51 + kernel/dma/phytium/Kconfig | 10 + kernel/dma/phytium/Makefile | 6 + kernel/dma/phytium/pswiotlb-direct.c | 146 +++ kernel/dma/phytium/pswiotlb-dma.h | 334 +++++ kernel/dma/phytium/pswiotlb-iommu.c | 1145 ++++++++++++++++ kernel/dma/phytium/pswiotlb-mapping.c | 157 +++ kernel/dma/phytium/pswiotlb.c | 1736 +++++++++++++++++++++++++ 19 files changed, 4041 insertions(+) create mode 100644 include/linux/pswiotlb.h create mode 100644 include/trace/events/pswiotlb.h create mode 100644 kernel/dma/phytium/Kconfig create mode 100644 kernel/dma/phytium/Makefile create mode 100644 kernel/dma/phytium/pswiotlb-direct.c create mode 100644 kernel/dma/phytium/pswiotlb-dma.h create mode 100644 kernel/dma/phytium/pswiotlb-iommu.c create mode 100644 kernel/dma/phytium/pswiotlb-mapping.c create mode 100644 kernel/dma/phytium/pswiotlb.c diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 66a7fff9f373..1ff01e978199 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -32,6 +32,10 @@ #include #include +#ifdef CONFIG_PSWIOTLB +#include +#endif + #include #include #include @@ -613,6 +617,13 @@ void __init mem_init(void) swiotlb_cvm_update_mem_attributes(); +#ifdef CONFIG_PSWIOTLB + /* enable pswiotlb default */ + if ((pswiotlb_force_disable != true) && + is_phytium_ps_socs()) + pswiotlb_init(1, PSWIOTLB_VERBOSE); +#endif + /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/drivers/base/core.c b/drivers/base/core.c index 4c8094dd8fe5..28f471043814 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -34,6 +34,11 @@ #include /* for dma_default_coherent */ #include +#if defined(CONFIG_PSWIOTLB) && !defined(__GENKSYMS__) +/* #include KABI_HIDE_INCLUDE() */ +#include +#endif + #include "base.h" #include "physical_location.h" #include "power/power.h" @@ -3147,6 +3152,11 @@ void device_initialize(struct device *dev) #endif swiotlb_dev_init(dev); enable_swiotlb_for_cvm_dev(dev, false); +#ifdef CONFIG_PSWIOTLB + if ((pswiotlb_force_disable != true) && + is_phytium_ps_socs()) + pswiotlb_dev_init(dev); +#endif } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6743cce7532d..ed25ef14b0d3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -36,6 +36,9 @@ #include #endif #include "pci.h" +#ifdef CONFIG_PSWIOTLB +#include +#endif DEFINE_MUTEX(pci_slot_mutex); @@ -4546,6 +4549,15 @@ void __weak pcibios_set_master(struct pci_dev *dev) */ void pci_set_master(struct pci_dev *dev) { +#ifdef CONFIG_PSWIOTLB + if ((pswiotlb_force_disable != true) && + is_phytium_ps_socs()) { + dev->dev.can_use_pswiotlb = pswiotlb_is_dev_in_passthroughlist(dev); + dev_info(&dev->dev, "The device %s use pswiotlb because vendor 0x%04x %s in pswiotlb passthroughlist\n", + dev->dev.can_use_pswiotlb ? "would" : "would NOT", + dev->vendor, dev->dev.can_use_pswiotlb ? "is NOT" : "is"); + } +#endif __pci_set_master(dev, true); pcibios_set_master(dev); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c879d88807e7..0cdfdd018bab 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -20,6 +20,9 @@ #include #include #include "pci.h" +#ifdef CONFIG_PSWIOTLB +#include +#endif #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 @@ -2575,6 +2578,13 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dma_set_max_seg_size(&dev->dev, 65536); dma_set_seg_boundary(&dev->dev, 0xffffffff); +#ifdef CONFIG_PSWIOTLB + if ((pswiotlb_force_disable != true) && + is_phytium_ps_socs()) { + pswiotlb_store_local_node(dev, bus); + dma_set_seg_boundary(&dev->dev, 0xffffffffffff); + } +#endif pcie_failed_link_retrain(dev); diff --git a/include/linux/device.h b/include/linux/device.h index 92176316a16c..13c8923788c0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -655,6 +655,8 @@ struct device_physical_location { * @dma_io_tlb_pools: List of transient swiotlb memory pools. * @dma_io_tlb_lock: Protects changes to the list of active pools. * @dma_uses_io_tlb: %true if device has used the software IO TLB. + * @dma_p_io_tlb_mem: Phytium Software IO TLB allocator. Not for driver use. + * @dma_uses_p_io_tlb: %true if device has used the Phytium software IO TLB. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. * @fwnode: Associated device node supplied by platform firmware. @@ -806,10 +808,22 @@ struct device { bool dma_ops_bypass : 1; #endif +#ifdef CONFIG_PSWIOTLB + KABI_USE(1, struct p_io_tlb_mem *dma_p_io_tlb_mem) + KABI_USE(2, struct { + bool dma_uses_p_io_tlb; + bool can_use_pswiotlb; + }) +#else KABI_RESERVE(1) KABI_RESERVE(2) +#endif KABI_RESERVE(3) +#if defined(CONFIG_NUMA) && defined(CONFIG_PSWIOTLB) + KABI_USE(4, int local_node) /* NUMA node this device is really belong to */ +#else KABI_RESERVE(4) +#endif KABI_RESERVE(5) KABI_RESERVE(6) KABI_RESERVE(7) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 7a67d997eece..965bd8cf33e2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -194,6 +194,13 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, PG_large_rmappable = PG_workingset, /* anon or file-backed */ + +#ifdef CONFIG_PSWIOTLB + /* check if pswiotlb is sync already */ + PG_pswiotlbsync = __NR_PAGEFLAGS + 1, + /* check if the page is used for pswiotlb */ + PG_pswiotlb, +#endif }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) diff --git a/include/linux/pswiotlb.h b/include/linux/pswiotlb.h new file mode 100644 index 000000000000..548a54730fed --- /dev/null +++ b/include/linux/pswiotlb.h @@ -0,0 +1,333 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_PSWIOTLB_H +#define __LINUX_PSWIOTLB_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct device; +struct page; +struct scatterlist; +extern bool pswiotlb_force_disable; +struct p_io_tlb_pool; + +#define SOC_ID_PS23064 0x8 +#define SOC_ID_PS24080 0x6 +#define MIDR_PS 0x700F8620 +#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) +#define PSWIOTLB_VERBOSE (1 << 0) /* verbose initialization */ +#define PSWIOTLB_FORCEOFF (1 << 1) /* force phytium bounce buffering off*/ +#define PSWIOTLB_ANY (1 << 2) /* allow any memory for the buffer */ +#define PSWIOTLB_FREE_THRESHOLD 30 +static bool is_ps_socs; + +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define P_IO_TLB_SEGSIZE 1024 + +/* + * log of the size of each Phytium IO TLB slab. The number of slabs is command line + * controllable. + */ +#define P_IO_TLB_SHIFT 11 +#define P_IO_TLB_SIZE (1 << P_IO_TLB_SHIFT) + +/* default to 256MB */ +#define P_IO_TLB_DEFAULT_SIZE (256UL<<20) +#define P_IO_TLB_INC_THR (64UL<<20) +#define P_IO_TLB_EXT_WATERMARK (80) + +/* passthroughlist which incompatible with pswiotlb temporarily */ +#define BL_PCI_VENDOR_ID_NVIDIA 0x10de +#define BL_PCI_VENDOR_ID_ILUVATAR 0x1E3E +#define BL_PCI_VENDOR_ID_METAX 0x9999 + +unsigned long pswiotlb_size_or_default(void); +void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)); + +phys_addr_t pswiotlb_tbl_map_single(struct device *hwdev, int nid, phys_addr_t phys, + size_t mapping_size, size_t alloc_size, unsigned int alloc_align_mask, + enum dma_data_direction dir, + unsigned long attrs); + +extern void pswiotlb_tbl_unmap_single(struct device *hwdev, + int nid, + phys_addr_t tlb_addr, + size_t offset, + size_t mapping_size, + enum dma_data_direction dir, + unsigned long attrs, + struct p_io_tlb_pool *pool); + +void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool); +void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool); +dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus); +void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, unsigned long iova, + size_t mapped, int nents, enum dma_data_direction dir, unsigned long attrs); +#ifdef CONFIG_PSWIOTLB +struct pswiotlb_passthroughlist { + struct list_head node; + unsigned short vendor; + unsigned short device; + bool from_grub; +}; +/** + * struct p_io_tlb_pool - Phytium IO TLB memory pool descriptor + * @start: The start address of the pswiotlb memory pool. Used to do a quick + * range check to see if the memory was in fact allocated by this + * API. + * @end: The end address of the pswiotlb memory pool. Used to do a quick + * range check to see if the memory was in fact allocated by this + * API. + * @nslabs: The number of Phytium IO TLB blocks (in groups of 64) between @start and + * @end. For default pswiotlb, this is command line adjustable via + * setup_io_tlb_npages. + * @used: The number of used Phytium IO TLB block. + * @list: The free list describing the number of free entries available + * from each index. + * @index: The index to start searching in the next round. + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. + * @vaddr: The vaddr of the pswiotlb memory pool. The pswiotlb memory pool + * may be remapped in the memory encrypted case and store virtual + * address for bounce buffer operation. + * @nslabs: The number of Phytium IO TLB slots between @start and @end. For the + * default pswiotlb, this can be adjusted with a boot parameter, + * see setup_io_tlb_npages(). + * @late_alloc: %true if allocated using the page allocator. + * @nareas: Number of areas in the pool. + * @area_nslabs: Number of slots in each area. + * @areas: Array of memory area descriptors. + * @slots: Array of slot descriptors. + * @node: Member of the Phytium IO TLB memory pool list. + * @rcu: RCU head for pswiotlb_dyn_free(). + * @transient: %true if transient memory pool. + * @busy_flag: %true if the pool is used by devices. + * @free_cnt: Counters every time the pool is free when checked by monitor. + * @free_th: Free threshold determine when to free the pool to memory. + * @busy_recode: Bitmap to record the busy status of the areas in the pool. + * @node_min_addr: Minimum physical address of the numa node. + * @numa_max_addr: Maximum physical address of the numa node. + * @numa_node_id: Numa node id the pool belong to. + */ +struct p_io_tlb_pool { + phys_addr_t start; + phys_addr_t end; + void *vaddr; + unsigned long nslabs; + bool late_alloc; + unsigned int nareas; + unsigned int area_nslabs; + struct p_io_tlb_area *areas; + struct p_io_tlb_slot *slots; + struct list_head node; + struct rcu_head rcu; + bool transient; + bool busy_flag; + unsigned int free_cnt; + unsigned int free_th; + unsigned long *busy_record; + phys_addr_t node_min_addr; + phys_addr_t node_max_addr; + int numa_node_id; +}; + +/** + * struct p_io_tlb_mem - Phytium Software IO TLB allocator + * @defpool: Default (initial) Phytium IO TLB memory pool descriptor. + * @pool: Phytium IO TLB memory pool descriptor (if not dynamic). + * @nslabs: Total number of Phytium IO TLB slabs in all pools. + * @debugfs: The dentry to debugfs. + * @force_bounce: %true if pswiotlb bouncing is forced + * @for_alloc: %true if the pool is used for memory allocation + * @can_grow: %true if more pools can be allocated dynamically. + * @phys_limit: Maximum allowed physical address. + * @pool_addr: Array where all the pools stored. + * @capacity: Number of pools which could be allocated. + * @whole_size: Number of pools which stored in the pool array. + * @lock: Lock to synchronize changes to the list. + * @pools: List of Phytium IO TLB memory pool descriptors (if dynamic). + * @dyn_alloc: Dynamic Phytium IO TLB pool allocation work. + * @total_used: The total number of slots in the pool that are currently used + * across all areas. Used only for calculating used_hiwater in + * debugfs. + * @used_hiwater: The high water mark for total_used. Used only for reporting + * in debugfs. + * @node_min_addr: Minimum physical address of the numa node. + * @numa_max_addr: Maximum physical address of the numa node. + * @numa_node_id: Numa node id the mem belong to. + */ +struct p_io_tlb_mem { + struct p_io_tlb_pool defpool; + unsigned long nslabs; + struct dentry *debugfs; + bool force_bounce; + bool for_alloc; + bool can_grow; + u64 phys_limit; + struct p_io_tlb_pool *pool_addr[64*1024/8]; + int capacity; + int whole_size; + spinlock_t lock; + struct list_head pools; + struct work_struct dyn_alloc; +#ifdef CONFIG_DEBUG_FS + atomic_long_t total_used; + atomic_long_t used_hiwater; +#endif + phys_addr_t node_min_addr; + phys_addr_t node_max_addr; + unsigned long node_total_mem; + int numa_node_id; +}; + +extern struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES]; + +struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr); + +static inline bool is_phytium_ps_socs(void) +{ + unsigned int soc_id; + unsigned int midr; + + if (likely(is_ps_socs)) + return true; + + soc_id = read_sysreg_s(SYS_AIDR_EL1); + midr = read_cpuid_id(); + if ((soc_id == SOC_ID_PS23064 || soc_id == SOC_ID_PS24080) + && midr == MIDR_PS) { + is_ps_socs = true; + return true; + } else + return false; +} + +static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr, + struct p_io_tlb_pool **pool) +{ + struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; + struct page *page; + + if (!paddr) + return false; + + page = pfn_to_page(PFN_DOWN(paddr)); + + if (test_bit(PG_pswiotlb, &page->flags) == false) + return false; + + if (!mem) + return false; + + /* + * All PSWIOTLB buffer addresses must have been returned by + * pswiotlb_tbl_map_single() and passed to a device driver. + * If a PSWIOTLB address is checked on another CPU, then it was + * presumably loaded by the device driver from an unspecified private + * data structure. Make sure that this load is ordered before reading + * dev->dma_uses_p_io_tlb here and mem->pools in pswiotlb_find_pool(). + * + * This barrier pairs with smp_mb() in pswiotlb_find_slots(). + */ + smp_rmb(); + + *pool = pswiotlb_find_pool(dev, nid, paddr); + if (READ_ONCE(dev->dma_uses_p_io_tlb) && *pool) + return true; + + return false; +} + +static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size) +{ + dma_addr_t end = addr + size - 1; + struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[nid]; + + if (addr >= mem->node_min_addr && end <= mem->node_max_addr) + return true; + + return false; +} + +void pswiotlb_init(bool addressing_limited, unsigned int flags); +void pswiotlb_dev_init(struct device *dev); +size_t pswiotlb_max_mapping_size(struct device *dev); +bool is_pswiotlb_allocated(struct device *dev); +bool is_pswiotlb_active(struct device *dev); +void __init pswiotlb_adjust_size(unsigned long size); +phys_addr_t default_pswiotlb_base(struct device *dev); +phys_addr_t default_pswiotlb_limit(struct device *dev); +bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev); +#else +static inline void pswiotlb_init(bool addressing_limited, unsigned int flags) +{ +} + +static inline void pswiotlb_dev_init(struct device *dev) +{ +} +static inline bool is_pswiotlb_buffer(struct device *dev, int nid, phys_addr_t paddr, + struct p_io_tlb_pool **pool) +{ + return false; +} +static inline bool dma_is_in_local_node(struct device *dev, int nid, dma_addr_t addr, size_t size) +{ + return false; +} +static inline size_t pswiotlb_max_mapping_size(struct device *dev) +{ + return SIZE_MAX; +} + +static inline bool is_pswiotlb_allocated(struct device *dev) +{ + return false; +} +static inline bool is_pswiotlb_active(struct device *dev) +{ + return false; +} + +static inline void pswiotlb_adjust_size(unsigned long size) +{ +} + +static inline phys_addr_t default_pswiotlb_base(struct device *dev) +{ + return 0; +} + +static inline phys_addr_t default_pswiotlb_limit(struct device *dev) +{ + return 0; +} + +static inline bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev) +{ + return false; +} +#endif /* CONFIG_PSWIOTLB */ + +extern void pswiotlb_print_info(int); +extern bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); + +#endif /* __LINUX_PSWIOTLB_H */ diff --git a/include/trace/events/pswiotlb.h b/include/trace/events/pswiotlb.h new file mode 100644 index 000000000000..ed26c41a4046 --- /dev/null +++ b/include/trace/events/pswiotlb.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pswiotlb + +#if !defined(_TRACE_PSWIOTLB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PSWIOTLB_H + +#include + +TRACE_EVENT(pswiotlb_bounced, + + TP_PROTO(struct device *dev, + dma_addr_t dev_addr, + size_t size), + + TP_ARGS(dev, dev_addr, size), + + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u64, dma_mask) + __field(dma_addr_t, dev_addr) + __field(size_t, size) + __field(bool, force) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->dma_mask = (dev->dma_mask ? *dev->dma_mask : 0); + __entry->dev_addr = dev_addr; + __entry->size = size; + ), + + TP_printk("dev_name: %s dma_mask=%llx dev_addr=%llx size=%zu %s", + __get_str(dev_name), + __entry->dma_mask, + (unsigned long long)__entry->dev_addr, + __entry->size, + __entry->force ? "NORMAL" : "FORCEOFF") +); + +#endif /* _TRACE_PSWIOTLB_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index f488997b0717..f13515fc1384 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -270,3 +270,5 @@ config DMA_MAP_BENCHMARK performance of dma_(un)map_page. See tools/testing/selftests/dma/dma_map_benchmark.c + +source "kernel/dma/phytium/Kconfig" diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 21926e46ef4f..c7c3cb4499e9 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o obj-$(CONFIG_MMU) += remap.o obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o +obj-$(CONFIG_PSWIOTLB) += phytium/ diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index df16afd0806f..5645ba3c3f99 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -52,6 +52,10 @@ #include #include +#ifdef CONFIG_PSWIOTLB +#include "./phytium/pswiotlb-dma.h" +#endif + #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES #else @@ -364,6 +368,10 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) + return NULL; +#endif #ifdef CONFIG_DMA_NUMA_CMA int nid = dev_to_node(dev); #endif @@ -416,6 +424,10 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) + __free_pages(page, get_order(size)); +#endif /* if dev has its own cma, free page from there */ if (dev->cma_area) { if (cma_release(dev->cma_area, page, count)) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index f1d9f01b283d..22bbf9092d6a 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -16,6 +16,9 @@ #include #include "debug.h" #include "direct.h" +#if defined(CONFIG_PSWIOTLB) && !defined(__GENKSYMS__) +#include "./phytium/pswiotlb-dma.h" +#endif #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ @@ -156,6 +159,12 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + addr = pswiotlb_dma_map_page_distribute(dev, page, offset, size, dir, attrs); + return addr; + } +#endif if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); @@ -174,6 +183,12 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + pswiotlb_dma_unmap_page_attrs_distribute(dev, addr, size, dir, attrs); + return; + } +#endif if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); @@ -194,6 +209,12 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (WARN_ON_ONCE(!dev->dma_mask)) return 0; +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + ents = pswiotlb_dma_map_sg_attrs_distribute(dev, sg, nents, dir, attrs); + return ents; + } +#endif if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); @@ -288,6 +309,12 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + pswiotlb_dma_unmap_sg_attrs_distribute(dev, sg, nents, dir, attrs); + return; + } +#endif if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); @@ -335,6 +362,12 @@ void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + pswiotlb_dma_sync_single_for_cpu_distribute(dev, addr, size, dir); + return; + } +#endif if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) @@ -349,6 +382,12 @@ void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + pswiotlb_dma_sync_single_for_device_distribute(dev, addr, size, dir); + return; + } +#endif if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) @@ -363,6 +402,12 @@ void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + pswiotlb_dma_sync_sg_for_cpu_distribute(dev, sg, nelems, dir); + return; + } +#endif if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) @@ -377,6 +422,12 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); +#ifdef CONFIG_PSWIOTLB + if (check_if_pswiotlb_is_applicable(dev)) { + pswiotlb_dma_sync_sg_for_device_distribute(dev, sg, nelems, dir); + return; + } +#endif if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) diff --git a/kernel/dma/phytium/Kconfig b/kernel/dma/phytium/Kconfig new file mode 100644 index 000000000000..8553a65027ee --- /dev/null +++ b/kernel/dma/phytium/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config PSWIOTLB + bool "Phytium software IO TLB" + select NEED_DMA_MAP_STATE + depends on ARCH_PHYTIUM && NUMA + help + This enables phytium software IO TLB. You can disable phytium software + IO TLB using "pswiotlb=forceoff" on the kernel command line if you do + not need it when PSWIOTLB is Y. diff --git a/kernel/dma/phytium/Makefile b/kernel/dma/phytium/Makefile new file mode 100644 index 000000000000..f94ea59e950f --- /dev/null +++ b/kernel/dma/phytium/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_PSWIOTLB) += pswiotlb.o +obj-$(CONFIG_PSWIOTLB) += pswiotlb-mapping.o +obj-$(CONFIG_PSWIOTLB) += pswiotlb-direct.o +obj-$(CONFIG_PSWIOTLB) += pswiotlb-iommu.o diff --git a/kernel/dma/phytium/pswiotlb-direct.c b/kernel/dma/phytium/pswiotlb-direct.c new file mode 100644 index 000000000000..f5e1b62c67c9 --- /dev/null +++ b/kernel/dma/phytium/pswiotlb-direct.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DMA operations based on Phytium software IO tlb that + * map physical memory directly without using an IOMMU. + * + * Copyright (c) 2024, Phytium Technology Co., Ltd. + */ +#include /* for max_pfn */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pswiotlb-dma.h" + +/* + * The following functions are ported from + * ./drivers/dma/direct.c + * static inline dma_addr_t phys_to_dma_direct(struct device *dev, + * phys_addr_t phys); + */ + +static inline dma_addr_t phys_to_dma_direct(struct device *dev, + phys_addr_t phys) +{ + if (force_dma_unencrypted(dev)) + return phys_to_dma_unencrypted(dev, phys); + return phys_to_dma(dev, phys); +} + +bool pswiotlb_dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) +{ + dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); + + if (dma_addr == DMA_MAPPING_ERROR) + return false; + return dma_addr + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); +} + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_PSWIOTLB) +void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); + + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_device(dev, paddr, sg->length, + dir); + + if (is_pswiotlb_active(dev) && + unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) + pswiotlb_sync_single_for_device(dev, nid, paddr, + sg->length, dir, pool); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(paddr, sg->length, + dir); + } +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_PSWIOTLB) +void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + for_each_sg(sgl, sg, nents, i) { + phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(paddr, sg->length, dir); + + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_cpu(dev, paddr, sg->length, + dir); + + if (is_pswiotlb_active(dev) && + unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) + pswiotlb_sync_single_for_cpu(dev, nid, paddr, + sg->length, dir, pool); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, sg->length); + } + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu_all(); +} + +/* + * Unmaps segments, except for ones marked as pci_p2pdma which do not + * require any further action as they contain a bus address. + */ +void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + pswiotlb_dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, + attrs); +} +#endif + +int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i, ret; + + for_each_sg(sgl, sg, nents, i) { + sg->dma_address = pswiotlb_dma_direct_map_page(dev, sg_page(sg), + sg->offset, sg->length, dir, attrs); + if (sg->dma_address == DMA_MAPPING_ERROR) { + ret = -EIO; + goto out_unmap; + } + sg_dma_len(sg) = sg->length; + } + + return nents; + +out_unmap: + pswiotlb_dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return ret; +} diff --git a/kernel/dma/phytium/pswiotlb-dma.h b/kernel/dma/phytium/pswiotlb-dma.h new file mode 100644 index 000000000000..98302401febf --- /dev/null +++ b/kernel/dma/phytium/pswiotlb-dma.h @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DMA operations based on Phytium software IO tlb that + * map physical memory. + * + * Copyright (c) 2024, Phytium Technology Co., Ltd. + */ +#ifndef _KERNEL_PSWIOTLB_DMA_DIRECT_H +#define _KERNEL_PSWIOTLB_DMA_DIRECT_H + +#include +#include +#include + +extern bool pswiotlb_force_disable; +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_PSWIOTLB) +void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void pswiotlb_dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_PSWIOTLB) +void pswiotlb_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void pswiotlb_dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void pswiotlb_dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#ifdef CONFIG_PSWIOTLB +int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs); +void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); +void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir); +void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, enum dma_data_direction dir); +void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, enum dma_data_direction dir); + +static inline bool check_if_pswiotlb_is_applicable(struct device *dev) +{ + if (dev->can_use_pswiotlb && is_phytium_ps_socs() + && !pswiotlb_force_disable) { + if (dev->numa_node == NUMA_NO_NODE || + dev->numa_node != dev->local_node) + dev->numa_node = dev->local_node; + + if (dev_is_pci(dev) && (dev->numa_node != NUMA_NO_NODE)) + return true; + } + + return false; +} + +static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_device(dev, paddr, size, dir); + + if (is_pswiotlb_active(dev)) { + if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) + pswiotlb_sync_single_for_device(dev, nid, paddr, size, dir, pool); + } + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(paddr, size, dir); +} + +static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(paddr, size, dir); + arch_sync_dma_for_cpu_all(); + } + + if (unlikely(is_swiotlb_buffer(dev, paddr))) + swiotlb_sync_single_for_cpu(dev, paddr, size, dir); + + if (is_pswiotlb_active(dev)) { + if (unlikely(is_pswiotlb_buffer(dev, nid, paddr, &pool))) + pswiotlb_sync_single_for_cpu(dev, nid, paddr, size, dir, pool); + } + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); +} + +static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); + int nid = dev->numa_node; + + if (is_swiotlb_force_bounce(dev)) + return swiotlb_map(dev, phys, size, dir, attrs); + + if (unlikely(!dma_capable(dev, dma_addr, size, true)) || + dma_kmalloc_needs_bounce(dev, size, dir)) { + if (is_swiotlb_active(dev)) + return swiotlb_map(dev, phys, size, dir, attrs); + + dev_WARN_ONCE(dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; + } + + /* check whether dma addr is in local node */ + if (is_pswiotlb_active(dev)) { + if (dir != DMA_TO_DEVICE) { + if (unlikely(!dma_is_in_local_node(dev, nid, dma_addr, size))) { + dma_addr = pswiotlb_map(dev, nid, phys, size, dir, attrs); + if (dma_addr == DMA_MAPPING_ERROR) { + dma_addr = phys_to_dma(dev, phys); + dev_warn_once(dev, + "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); + } else + return dma_addr; + } + } + } + + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + return dma_addr; +} + +static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = dma_to_phys(dev, addr); + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(phys, size, dir); + arch_sync_dma_for_cpu_all(); + } + + if (unlikely(is_swiotlb_buffer(dev, phys))) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + + if (is_pswiotlb_active(dev)) { + if (unlikely(is_pswiotlb_buffer(dev, nid, phys, &pool))) + pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE)) + arch_dma_mark_clean(phys, size); + } +} +#else +static inline int pswiotlb_dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return 0; +} + +static inline dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, + struct page *page, size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return 0; +} + +static inline void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} + +static inline int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + return 0; +} + +static inline void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} + +static inline void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} + +static inline dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return 0; +} + +static inline void pswiotlb_iommu_dma_unmap_page(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} + +static inline int pswiotlb_iommu_dma_map_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + return 0; +} + +static inline void pswiotlb_iommu_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} + +static inline void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, enum dma_data_direction dir) +{ +} + +static inline bool check_if_pswiotlb_is_applicable(struct device *dev) +{ + return false; +} + +static inline void pswiotlb_dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} + +static inline void pswiotlb_dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} + +static inline dma_addr_t pswiotlb_dma_direct_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return 0; +} + +static inline void pswiotlb_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ +} +#endif /* CONFIG_PSWIOTLB*/ +#endif /* _KERNEL_PSWIOTLB_DMA_DIRECT_H */ diff --git a/kernel/dma/phytium/pswiotlb-iommu.c b/kernel/dma/phytium/pswiotlb-iommu.c new file mode 100644 index 000000000000..7cb90f886827 --- /dev/null +++ b/kernel/dma/phytium/pswiotlb-iommu.c @@ -0,0 +1,1145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DMA operations based on Phytium software IO tlb that + * map physical memory indirectly with an IOMMU. + * + * Copyright (c) 2024, Phytium Technology Co., Ltd. + */ + +#define pr_fmt(fmt) "pswiotlb iommu: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_ARCH_PHYTIUM +#include +#endif + +#include "pswiotlb-dma.h" + +enum iommu_dma_cookie_type { + IOMMU_DMA_IOVA_COOKIE, + IOMMU_DMA_MSI_COOKIE, +}; + +struct iommu_dma_cookie { + enum iommu_dma_cookie_type type; + union { + /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ + struct { + struct iova_domain iovad; + + struct iova_fq __percpu *fq; /* Flush queue */ + /* Number of TLB flushes that have been started */ + atomic64_t fq_flush_start_cnt; + /* Number of TLB flushes that have been finished */ + atomic64_t fq_flush_finish_cnt; + /* Timer to regularily empty the flush queues */ + struct timer_list fq_timer; + /* 1 when timer is active, 0 when not */ + atomic_t fq_timer_on; + }; + /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */ + dma_addr_t msi_iova; + }; + struct list_head msi_page_list; + + /* Domain for flush queue callback; NULL if flush queue not in use */ + struct iommu_domain *fq_domain; + struct mutex mutex; +}; + +static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); + +/* Number of entries per flush queue */ +#define IOVA_FQ_SIZE 256 + +/* Timeout (in ms) after which entries are flushed from the queue */ +#define IOVA_FQ_TIMEOUT 10 + +/* Flush queue entry for deferred flushing */ +struct iova_fq_entry { + unsigned long iova_pfn; + unsigned long pages; + struct list_head freelist; + u64 counter; /* Flush counter when this entry was added */ +}; + +/* Per-CPU flush queue structure */ +struct iova_fq { + struct iova_fq_entry entries[IOVA_FQ_SIZE]; + unsigned int head, tail; + spinlock_t lock; +}; + +#define fq_ring_for_each(i, fq) \ + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + +/* + * The following functions are ported from + * ./drivers/iommu/dma-iommu.c + * ./drivers/iommu/iommu.c + * static int __iommu_map(struct iommu_domain *domain, unsigned long iova, + * phys_addr_t paddr, size_t size, int prot, gfp_t gfp); + * static bool dev_is_untrusted(struct device *dev); + * static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, + * unsigned long attrs); + * static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, + * size_t size, u64 dma_limit, struct device *dev); + * static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, + * dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather); + * static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, + * size_t size); + * static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, + * size_t size, int prot, u64 dma_mask); + * static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + * dma_addr_t dma_addr); + * static void __invalidate_sg(struct scatterlist *sg, int nents); + */ + +static inline bool fq_full(struct iova_fq *fq) +{ + assert_spin_locked(&fq->lock); + return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); +} + +static inline unsigned int fq_ring_add(struct iova_fq *fq) +{ + unsigned int idx = fq->tail; + + assert_spin_locked(&fq->lock); + + fq->tail = (idx + 1) % IOVA_FQ_SIZE; + + return idx; +} + +static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +{ + u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); + unsigned int idx; + + assert_spin_locked(&fq->lock); + + fq_ring_for_each(idx, fq) { + + if (fq->entries[idx].counter >= counter) + break; + + put_pages_list(&fq->entries[idx].freelist); + free_iova_fast(&cookie->iovad, + fq->entries[idx].iova_pfn, + fq->entries[idx].pages); + + fq->head = (fq->head + 1) % IOVA_FQ_SIZE; + } +} + +static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) +{ + atomic64_inc(&cookie->fq_flush_start_cnt); + cookie->fq_domain->ops->flush_iotlb_all(cookie->fq_domain); + atomic64_inc(&cookie->fq_flush_finish_cnt); +} + +static int __iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_domain_ops *ops = domain->ops; + unsigned long orig_iova = iova; + unsigned int min_pagesz; + size_t orig_size = size; + phys_addr_t orig_paddr = paddr; + int ret = 0; + + if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING))) + return -EINVAL; + + if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL)) + return -ENODEV; + + /* find out the minimum page size supported */ + min_pagesz = 1 << __ffs(domain->pgsize_bitmap); + + /* + * both the virtual address and the physical one, as well as + * the size of the mapping, must be aligned (at least) to the + * size of the smallest page supported by the hardware + */ + if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) { + pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n", + iova, &paddr, size, min_pagesz); + return -EINVAL; + } + + pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size); + + while (size) { + size_t pgsize, count, mapped = 0; + + pgsize = iommu_pgsize(domain, iova, paddr, size, &count); + + pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n", + iova, &paddr, pgsize, count); + ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot, + gfp, &mapped); + /* + * Some pages may have been mapped, even if an error occurred, + * so we should account for those so they can be unmapped. + */ + size -= mapped; + + if (ret) + break; + + iova += mapped; + paddr += mapped; + } + + /* unroll mapping in case something went wrong */ + if (ret) + iommu_unmap(domain, orig_iova, orig_size - size); + else + trace_map(orig_iova, orig_paddr, orig_size); + + return ret; +} + +static ssize_t __iommu_map_sg_dma(struct device *dev, struct iommu_domain *domain, + unsigned long iova, struct scatterlist *sg, unsigned int nents, + int prot, gfp_t gfp, unsigned long attrs) +{ + const struct iommu_domain_ops *ops = domain->ops; + size_t mapped = 0; + int ret; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t aligned_size; + int nid = dev->numa_node; + enum dma_data_direction dir = prot & (DMA_TO_DEVICE | DMA_FROM_DEVICE | DMA_BIDIRECTIONAL); + struct scatterlist *sg_orig = sg; + struct scatterlist *s; + int i; + + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + + for_each_sg(sg, s, nents, i) { + phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset; + + /* check whether dma addr is in local node */ + if (dir != DMA_TO_DEVICE) { + aligned_size = s->length; + if ((!dma_is_in_local_node(dev, nid, phys, + aligned_size)) && (pswiotlb_force_disable != true)) { + aligned_size = iova_align(iovad, s->length); + phys = pswiotlb_tbl_map_single(dev, nid, + phys, s->length, aligned_size, iova_mask(iovad), dir, attrs); + if (phys == DMA_MAPPING_ERROR) { + phys = page_to_phys(sg_page(s)) + s->offset; + dev_warn_once(dev, + "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); + } + } + } + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, s->length, dir); + + ret = __iommu_map(domain, iova + mapped, phys, + s->length, prot, gfp); + if (ret) + goto out_err; + + mapped += s->length; + } + + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain, iova, mapped); + return mapped; + +out_err: + /* undo mappings already done */ + iommu_dma_unmap_sg_pswiotlb(dev, sg_orig, iova, + mapped, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + iommu_unmap(domain, iova, mapped); + + return ret; +} + +static ssize_t pswiotlb_iommu_map_sg_atomic_dma(struct device *dev, + struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot, + unsigned long attrs) +{ + return __iommu_map_sg_dma(dev, domain, iova, sg, nents, prot, GFP_ATOMIC, attrs); +} + +static bool dev_is_untrusted(struct device *dev) +{ + return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; +} + +static bool dev_use_swiotlb(struct device *dev, size_t size, + enum dma_data_direction dir) +{ + return IS_ENABLED(CONFIG_SWIOTLB) && + (dev_is_untrusted(dev) || + dma_kmalloc_needs_bounce(dev, size, dir)); +} + +/** + * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API + * page flags. + * @dir: Direction of DMA transfer + * @coherent: Is the DMA master cache-coherent? + * @attrs: DMA attributes for the mapping + * + * Return: corresponding IOMMU API page protection flags + */ +static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, + unsigned long attrs) +{ + int prot = coherent ? IOMMU_CACHE : 0; + + if (attrs & DMA_ATTR_PRIVILEGED) + prot |= IOMMU_PRIV; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return 0; + } +} + +static void queue_iova(struct iommu_dma_cookie *cookie, + unsigned long pfn, unsigned long pages, + struct list_head *freelist) +{ + struct iova_fq *fq; + unsigned long flags; + unsigned int idx; + + /* + * Order against the IOMMU driver's pagetable update from unmapping + * @pte, to guarantee that fq_flush_iotlb() observes that if called + * from a different CPU before we release the lock below. Full barrier + * so it also pairs with iommu_dma_init_fq() to avoid seeing partially + * written fq state here. + */ + smp_mb(); + + fq = raw_cpu_ptr(cookie->fq); + spin_lock_irqsave(&fq->lock, flags); + + /* + * First remove all entries from the flush queue that have already been + * flushed out on another CPU. This makes the fq_full() check below less + * likely to be true. + */ + fq_ring_free(cookie, fq); + + if (fq_full(fq)) { + fq_flush_iotlb(cookie); + fq_ring_free(cookie, fq); + } + + idx = fq_ring_add(fq); + + fq->entries[idx].iova_pfn = pfn; + fq->entries[idx].pages = pages; + fq->entries[idx].counter = atomic64_read(&cookie->fq_flush_start_cnt); + list_splice(freelist, &fq->entries[idx].freelist); + + spin_unlock_irqrestore(&fq->lock, flags); + + /* Avoid false sharing as much as possible. */ + if (!atomic_read(&cookie->fq_timer_on) && + !atomic_xchg(&cookie->fq_timer_on, 1)) + mod_timer(&cookie->fq_timer, + jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); +} + +static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, + size_t size, u64 dma_limit, struct device *dev) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + unsigned long shift, iova_len, iova; + + if (cookie->type == IOMMU_DMA_MSI_COOKIE) { + cookie->msi_iova += size; + return cookie->msi_iova - size; + } + + shift = iova_shift(iovad); + iova_len = size >> shift; + + dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); + + if (domain->geometry.force_aperture) + dma_limit = min_t(u64, dma_limit, (u64)domain->geometry.aperture_end); + + /* + * Try to use all the 32-bit PCI addresses first. The original SAC vs. + * DAC reasoning loses relevance with PCIe, but enough hardware and + * firmware bugs are still lurking out there that it's safest not to + * venture into the 64-bit space until necessary. + * + * If your device goes wrong after seeing the notice then likely either + * its driver is not setting DMA masks accurately, the hardware has + * some inherent bug in handling >32-bit addresses, or not all the + * expected address bits are wired up between the device and the IOMMU. + */ + if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { + iova = alloc_iova_fast(iovad, iova_len, + DMA_BIT_MASK(32) >> shift, false); + if (iova) + goto done; + + dev->iommu->pci_32bit_workaround = false; + dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); + } + + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +done: + return (dma_addr_t)iova << shift; +} + +static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather) +{ + struct iova_domain *iovad = &cookie->iovad; + + /* The MSI case is only ever cleaning up its most recent allocation */ + if (cookie->type == IOMMU_DMA_MSI_COOKIE) + cookie->msi_iova -= size; + else if (gather && gather->queued) + queue_iova(cookie, iova_pfn(iovad, iova), + size >> iova_shift(iovad), + &gather->freelist); + else + free_iova_fast(iovad, iova_pfn(iovad, iova), + size >> iova_shift(iovad)); +} + +static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, + size_t size) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = iova_offset(iovad, dma_addr); + struct iommu_iotlb_gather iotlb_gather; + size_t unmapped; + + dma_addr -= iova_off; + size = iova_align(iovad, size + iova_off); + iommu_iotlb_gather_init(&iotlb_gather); + iotlb_gather.queued = READ_ONCE(cookie->fq_domain); + + unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); + WARN_ON(unmapped != size); + + if (!iotlb_gather.queued) + iommu_iotlb_sync(domain, &iotlb_gather); + iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather); +} + +static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, + size_t size, int prot, u64 dma_mask) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t iova_off = iova_offset(iovad, phys); + dma_addr_t iova; + + if (static_branch_unlikely(&iommu_deferred_attach_enabled) && + iommu_deferred_attach(dev, domain)) + return DMA_MAPPING_ERROR; + + size = iova_align(iovad, size + iova_off); + + iova = iommu_dma_alloc_iova(domain, size, dma_mask, dev); + if (!iova) + return DMA_MAPPING_ERROR; + + if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { + iommu_dma_free_iova(cookie, iova, size, NULL); + return DMA_MAPPING_ERROR; + } + return iova + iova_off; +} + +void pswiotlb_iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + if (is_pswiotlb_active(dev)) { + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + + if (is_pswiotlb_buffer(dev, nid, phys, &pool)) + pswiotlb_sync_single_for_cpu(dev, nid, phys, size, dir, pool); + + if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) + return; + + if (is_swiotlb_buffer(dev, phys)) + swiotlb_sync_single_for_cpu(dev, phys, size, dir); + } else { + if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) + return; + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + if (is_swiotlb_buffer(dev, phys)) + swiotlb_sync_single_for_cpu(dev, phys, size, dir); + } +} + +void pswiotlb_iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ + phys_addr_t phys; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + if (is_pswiotlb_active(dev)) { + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); + if (is_pswiotlb_buffer(dev, nid, phys, &pool)) + pswiotlb_sync_single_for_device(dev, nid, phys, size, dir, pool); + + if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) + return; + } else { + if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir)) + return; + + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle); + } + + if (is_swiotlb_buffer(dev, phys)) + swiotlb_sync_single_for_device(dev, phys, size, dir); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(phys, size, dir); +} + +void pswiotlb_iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + int nid = dev->numa_node; + dma_addr_t start_orig; + phys_addr_t phys; + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + struct p_io_tlb_pool *pool; + + if (is_pswiotlb_active(dev)) { + start_orig = sg_dma_address(sgl); + for_each_sg(sgl, sg, nelems, i) { + if (dir != DMA_TO_DEVICE) { + unsigned int s_iova_off = iova_offset(iovad, sg->offset); + + if (i > 0) + start_orig += s_iova_off; + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), start_orig); + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, sg->length, dir); + + if (is_pswiotlb_buffer(dev, nid, phys, &pool)) + pswiotlb_sync_single_for_cpu(dev, nid, phys, + sg->length, dir, pool); + start_orig -= s_iova_off; + start_orig += iova_align(iovad, sg->length + s_iova_off); + } else { + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); + } + } + } else { + if (dev_is_dma_coherent(dev) && !dev_is_untrusted(dev)) + return; + + for_each_sg(sgl, sg, nelems, i) { + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); + + if (is_swiotlb_buffer(dev, sg_phys(sg))) + swiotlb_sync_single_for_cpu(dev, sg_phys(sg), + sg->length, dir); + } + } +} + +void pswiotlb_iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + if (is_pswiotlb_active(dev)) { + for_each_sg(sgl, sg, nelems, i) { + if (is_pswiotlb_buffer(dev, nid, sg_phys(sg), &pool)) + pswiotlb_sync_single_for_device(dev, nid, sg_phys(sg), + sg->length, dir, pool); + if (dev_is_dma_coherent(dev) && !sg_dma_is_swiotlb(sgl)) + continue; + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); + } + } else { + if (dev_is_dma_coherent(dev) && !sg_dma_is_swiotlb(sgl)) + return; + + for_each_sg(sgl, sg, nelems, i) { + if (is_swiotlb_buffer(dev, sg_phys(sg))) + swiotlb_sync_single_for_device(dev, sg_phys(sg), + sg->length, dir); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); + } + } +} + +dma_addr_t pswiotlb_iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + bool coherent = dev_is_dma_coherent(dev); + + int prot = dma_info_to_prot(dir, coherent, attrs); + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + size_t aligned_size = size; + dma_addr_t iova, dma_mask = dma_get_mask(dev); + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + /* + * If both the physical buffer start address and size are + * page aligned, we don't need to use a bounce page. + */ + if (dev_use_swiotlb(dev, size, dir) && + iova_offset(iovad, phys | size)) { + if (!is_swiotlb_active(dev)) { + dev_warn_once(dev, "DMA bounce buffers are inactive, unable to map unaligned transaction.\n"); + return DMA_MAPPING_ERROR; + } + + trace_swiotlb_bounced(dev, phys, size); + + phys = swiotlb_tbl_map_single(dev, phys, size, + iova_mask(iovad), dir, attrs); + + if (phys == DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + /* + * Untrusted devices should not see padding areas with random + * leftover kernel data, so zero the pre- and post-padding. + * swiotlb_tbl_map_single() has initialized the bounce buffer + * proper to the contents of the original memory buffer. + */ + if (dev_is_untrusted(dev)) { + size_t start, virt = (size_t)phys_to_virt(phys); + + /* Pre-padding */ + start = iova_align_down(iovad, virt); + memset((void *)start, 0, virt - start); + + /* Post-padding */ + start = virt + size; + memset((void *)start, 0, + iova_align(iovad, start) - start); + } + } + + /* check whether dma addr is in local node */ + if (is_pswiotlb_active(dev)) { + if (dir != DMA_TO_DEVICE) { + if (unlikely(!dma_is_in_local_node(dev, nid, phys, aligned_size))) { + aligned_size = iova_align(iovad, size); + phys = pswiotlb_tbl_map_single(dev, nid, phys, size, + aligned_size, iova_mask(iovad), + dir, attrs); + if (phys == DMA_MAPPING_ERROR) { + phys = page_to_phys(page) + offset; + dev_warn_once(dev, + "Failed to allocate memory from pswiotlb, fall back to non-local dma\n"); + } + } + } + } + + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + + iova = __iommu_dma_map(dev, phys, size, prot, dma_mask); + if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(dev, phys)) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + if (iova == DMA_MAPPING_ERROR && is_pswiotlb_buffer(dev, nid, phys, &pool)) + pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); + return iova; +} + +void pswiotlb_iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + phys_addr_t phys; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + phys = iommu_iova_to_phys(domain, dma_handle); + if (WARN_ON(!phys)) + return; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + + __iommu_dma_unmap(dev, dma_handle, size); + + if (unlikely(is_swiotlb_buffer(dev, phys))) + swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); + + if (is_pswiotlb_active(dev) && + is_pswiotlb_buffer(dev, nid, phys, &pool)) + pswiotlb_tbl_unmap_single(dev, nid, phys, 0, size, dir, attrs, pool); +} + +static void iommu_dma_unmap_page_sg(struct device *dev, dma_addr_t dma_handle, + size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + phys_addr_t phys; + int nid = dev->numa_node; + struct p_io_tlb_pool *pool; + + phys = iommu_iova_to_phys(domain, dma_handle); + + if (WARN_ON(!phys)) + return; + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && !dev_is_dma_coherent(dev)) + arch_sync_dma_for_cpu(phys, size, dir); + + if (is_pswiotlb_buffer(dev, nid, phys, &pool)) + pswiotlb_tbl_unmap_single(dev, nid, phys, offset, size, dir, attrs, pool); +} + +/* + * Prepare a successfully-mapped scatterlist to give back to the caller. + * + * At this point the segments are already laid out by pswiotlb_iommu_dma_map_sg() to + * avoid individually crossing any boundaries, so we merely need to check a + * segment's start address to avoid concatenating across one. + */ +static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, + dma_addr_t dma_addr) +{ + struct scatterlist *s, *cur = sg; + unsigned long seg_mask = dma_get_seg_boundary(dev); + unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); + int i, count = 0; + + for_each_sg(sg, s, nents, i) { + /* Restore this segment's original unaligned fields first */ + dma_addr_t s_dma_addr = sg_dma_address(s); + unsigned int s_iova_off = sg_dma_address(s); + unsigned int s_length = sg_dma_len(s); + unsigned int s_iova_len = s->length; + + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + + if (sg_dma_is_bus_address(s)) { + if (i > 0) + cur = sg_next(cur); + + sg_dma_unmark_bus_address(s); + sg_dma_address(cur) = s_dma_addr; + sg_dma_len(cur) = s_length; + sg_dma_mark_bus_address(cur); + count++; + cur_len = 0; + continue; + } + + s->offset += s_iova_off; + s->length = s_length; + + /* + * Now fill in the real DMA data. If... + * - there is a valid output segment to append to + * - and this segment starts on an IOVA page boundary + * - but doesn't fall at a segment boundary + * - and wouldn't make the resulting output segment too long + */ + if (cur_len && !s_iova_off && (dma_addr & seg_mask) && + (max_len - cur_len >= s_length)) { + /* ...then concatenate it with the previous one */ + cur_len += s_length; + } else { + /* Otherwise start the next output segment */ + if (i > 0) + cur = sg_next(cur); + cur_len = s_length; + count++; + + sg_dma_address(cur) = dma_addr + s_iova_off; + } + + sg_dma_len(cur) = cur_len; + dma_addr += s_iova_len; + + if (s_length + s_iova_off < s_iova_len) + cur_len = 0; + } + return count; +} + +/* + * If mapping failed, then just restore the original list, + * but making sure the DMA fields are invalidated. + */ +static void __invalidate_sg(struct scatterlist *sg, int nents) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_is_bus_address(s)) { + sg_dma_unmark_bus_address(s); + } else { + if (sg_dma_address(s) != DMA_MAPPING_ERROR) + s->offset += sg_dma_address(s); + if (sg_dma_len(s)) + s->length = sg_dma_len(s); + } + sg_dma_address(s) = DMA_MAPPING_ERROR; + sg_dma_len(s) = 0; + } +} + +static void iommu_dma_unmap_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + pswiotlb_iommu_dma_unmap_page(dev, sg_dma_address(s), + sg_dma_len(s), dir, attrs); +} + +void iommu_dma_unmap_sg_pswiotlb(struct device *dev, struct scatterlist *sg, + unsigned long iova_start, size_t mapped, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + dma_addr_t start, start_orig; + struct scatterlist *s; + struct scatterlist *sg_orig = sg; + int i; + + start = iova_start; + start_orig = start; + for_each_sg(sg_orig, s, nents, i) { + if (!mapped || (start_orig > (start + mapped))) + break; + if (s->length == 0) + break; + iommu_dma_unmap_page_sg(dev, start_orig, 0, + s->length, dir, attrs); + start_orig += s->length; + } +} + +static int iommu_dma_map_sg_pswiotlb_pagesize(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *s; + int i; + + sg_dma_mark_swiotlb(sg); + + for_each_sg(sg, s, nents, i) { + sg_dma_address(s) = pswiotlb_iommu_dma_map_page(dev, sg_page(s), + s->offset, s->length, dir, attrs); + if (sg_dma_address(s) == DMA_MAPPING_ERROR) + goto out_unmap; + sg_dma_len(s) = s->length; + } + + return nents; + +out_unmap: + iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); + return -EIO; +} + +/* + * The DMA API client is passing in a scatterlist which could describe + * any old buffer layout, but the IOMMU API requires everything to be + * aligned to IOMMU pages. Hence the need for this complicated bit of + * impedance-matching, to be able to hand off a suitably-aligned list, + * but still preserve the original offsets and sizes for the caller. + */ +int pswiotlb_iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + struct scatterlist *s, *prev = NULL; + int prot = dma_info_to_prot(dir, dev_is_dma_coherent(dev), attrs); + struct pci_p2pdma_map_state p2pdma_state = {}; + enum pci_p2pdma_map_type map; + dma_addr_t iova; + size_t iova_len = 0; + unsigned long mask = dma_get_seg_boundary(dev); + ssize_t ret; + int i; + + if (static_branch_unlikely(&iommu_deferred_attach_enabled)) { + ret = iommu_deferred_attach(dev, domain); + goto out; + } + + if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev) + && ((nents == 1) && (sg->length < PAGE_SIZE))) + return iommu_dma_map_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs); + + if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nents, dir); + + /* + * Work out how much IOVA space we need, and align the segments to + * IOVA granules for the IOMMU driver to handle. With some clever + * trickery we can modify the list in-place, but reversibly, by + * stashing the unaligned parts in the as-yet-unused DMA fields. + */ + for_each_sg(sg, s, nents, i) { + size_t s_iova_off = iova_offset(iovad, s->offset); + size_t s_length = s->length; + size_t pad_len = (mask - iova_len + 1) & mask; + + if (is_pci_p2pdma_page(sg_page(s))) { + map = pci_p2pdma_map_segment(&p2pdma_state, dev, s); + switch (map) { + case PCI_P2PDMA_MAP_BUS_ADDR: + /* + * iommu_map_sg() will skip this segment as + * it is marked as a bus address, + * __finalise_sg() will copy the dma address + * into the output segment. + */ + continue; + case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: + /* + * Mapping through host bridge should be + * mapped with regular IOVAs, thus we + * do nothing here and continue below. + */ + break; + default: + ret = -EREMOTEIO; + goto out_restore_sg; + } + } + + sg_dma_address(s) = s_iova_off; + sg_dma_len(s) = s_length; + s->offset -= s_iova_off; + s_length = iova_align(iovad, s_length + s_iova_off); + s->length = s_length; + + /* + * Due to the alignment of our single IOVA allocation, we can + * depend on these assumptions about the segment boundary mask: + * - If mask size >= IOVA size, then the IOVA range cannot + * possibly fall across a boundary, so we don't care. + * - If mask size < IOVA size, then the IOVA range must start + * exactly on a boundary, therefore we can lay things out + * based purely on segment lengths without needing to know + * the actual addresses beforehand. + * - The mask must be a power of 2, so pad_len == 0 if + * iova_len == 0, thus we cannot dereference prev the first + * time through here (i.e. before it has a meaningful value). + */ + if (pad_len && pad_len < s_length - 1) { + prev->length += pad_len; + iova_len += pad_len; + } + + iova_len += s_length; + prev = s; + } + + if (!iova_len) + return __finalise_sg(dev, sg, nents, 0); + + iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); + if (!iova) { + ret = -ENOMEM; + goto out_restore_sg; + } + + /* + * We'll leave any physical concatenation to the IOMMU driver's + * implementation - it knows better than we do. + */ + if (dir != DMA_TO_DEVICE && is_pswiotlb_active(dev)) + ret = pswiotlb_iommu_map_sg_atomic_dma(dev, domain, iova, sg, nents, prot, attrs); + else + ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); + + if (ret < 0 || ret < iova_len) + goto out_free_iova; + + return __finalise_sg(dev, sg, nents, iova); + +out_free_iova: + iommu_dma_free_iova(cookie, iova, iova_len, NULL); +out_restore_sg: + __invalidate_sg(sg, nents); +out: + if (ret != -ENOMEM && ret != -EREMOTEIO) + return -EINVAL; + return ret; +} + +void pswiotlb_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + dma_addr_t start, end = 0, start_orig; + struct scatterlist *tmp, *s; + struct scatterlist *sg_orig = sg; + int i; + struct iommu_domain *domain = iommu_get_dma_domain(dev); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + + if ((dir != DMA_TO_DEVICE) && ((nents == 1) && (sg->length < PAGE_SIZE))) { + iommu_dma_unmap_sg_pswiotlb_pagesize(dev, sg, nents, dir, attrs); + return; + } + + if ((dir == DMA_TO_DEVICE) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nents, dir); + + /* + * The scatterlist segments are mapped into a single + * contiguous IOVA allocation, the start and end points + * just have to be determined. + */ + for_each_sg(sg, tmp, nents, i) { + if (sg_dma_is_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + start = sg_dma_address(tmp); + break; + } + + if (is_pswiotlb_active(dev)) { + /* check whether dma addr is in local node */ + start_orig = start; + if (dir != DMA_TO_DEVICE) { + for_each_sg(sg_orig, s, nents, i) { + unsigned int s_iova_off = iova_offset(iovad, s->offset); + + if (i > 0) + start_orig += s_iova_off; + iommu_dma_unmap_page_sg(dev, start_orig, + s_iova_off, s->length, + dir, attrs); + start_orig -= s_iova_off; + start_orig += iova_align(iovad, s->length + s_iova_off); + } + } + } + + nents -= i; + for_each_sg(tmp, tmp, nents, i) { + if (sg_dma_is_bus_address(tmp)) { + sg_dma_unmark_bus_address(tmp); + continue; + } + + if (sg_dma_len(tmp) == 0) + break; + + end = sg_dma_address(tmp) + sg_dma_len(tmp); + } + + if (end) + __iommu_dma_unmap(dev, start, end - start); +} diff --git a/kernel/dma/phytium/pswiotlb-mapping.c b/kernel/dma/phytium/pswiotlb-mapping.c new file mode 100644 index 000000000000..65674b7bdeab --- /dev/null +++ b/kernel/dma/phytium/pswiotlb-mapping.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Auxiliary DMA operations used by arch-independent dma-mapping + * routines when Phytium software IO tlb is required. + * + * Copyright (c) 2024, Phytium Technology Co., Ltd. + */ +#include /* for max_pfn */ +#include +#include +#include +#include +#include +#include +#include +#include "../debug.h" +#include "../direct.h" +#include "pswiotlb-dma.h" + +/* + * The following functions are ported from + * ./drivers/dma/mapping.c + * static bool dma_go_direct(struct device *dev, dma_addr_t mask, + * const struct dma_map_ops *ops); + * static inline bool dma_map_direct(struct device *dev, + * const struct dma_map_ops *ops); + */ + +static bool dma_go_direct(struct device *dev, dma_addr_t mask, + const struct dma_map_ops *ops) +{ + if (likely(!ops)) + return true; +#ifdef CONFIG_DMA_OPS_BYPASS + if (dev->dma_ops_bypass) + return min_not_zero(mask, dev->bus_dma_limit) >= + dma_direct_get_required_mask(dev); +#endif + return false; +} + +static inline bool dma_map_direct(struct device *dev, + const struct dma_map_ops *ops) +{ + return dma_go_direct(dev, *dev->dma_mask, ops); +} +dma_addr_t pswiotlb_dma_map_page_distribute(struct device *dev, struct page *page, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + dma_addr_t addr; + + if (dma_map_direct(dev, ops) || + arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) + addr = pswiotlb_dma_direct_map_page(dev, page, offset, size, dir, attrs); + else + addr = pswiotlb_iommu_dma_map_page(dev, page, offset, size, dir, attrs); + debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); + + return addr; +} + +void pswiotlb_dma_unmap_page_attrs_distribute(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops) || + arch_dma_unmap_page_direct(dev, addr + size)) + pswiotlb_dma_direct_unmap_page(dev, addr, size, dir, attrs); + else if (ops->unmap_page) + pswiotlb_iommu_dma_unmap_page(dev, addr, size, dir, attrs); + debug_dma_unmap_page(dev, addr, size, dir); +} + +int pswiotlb_dma_map_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + int ents; + + if (dma_map_direct(dev, ops) || + arch_dma_map_sg_direct(dev, sg, nents)) + ents = pswiotlb_dma_direct_map_sg(dev, sg, nents, dir, attrs); + else + ents = pswiotlb_iommu_dma_map_sg(dev, sg, nents, dir, attrs); + + if (ents > 0) + debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); + else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && + ents != -EIO)) + return -EIO; + + return ents; +} + +void pswiotlb_dma_unmap_sg_attrs_distribute(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops) || + arch_dma_unmap_sg_direct(dev, sg, nents)) + pswiotlb_dma_direct_unmap_sg(dev, sg, nents, dir, attrs); + else if (ops->unmap_sg) + pswiotlb_iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); +} + +void pswiotlb_dma_sync_single_for_cpu_distribute(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + pswiotlb_dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (ops->sync_single_for_cpu) + pswiotlb_iommu_dma_sync_single_for_cpu(dev, addr, size, dir); + debug_dma_sync_single_for_cpu(dev, addr, size, dir); +} + +void pswiotlb_dma_sync_single_for_device_distribute(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + pswiotlb_dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (ops->sync_single_for_device) + pswiotlb_iommu_dma_sync_single_for_device(dev, addr, size, dir); + debug_dma_sync_single_for_device(dev, addr, size, dir); +} + +void pswiotlb_dma_sync_sg_for_cpu_distribute(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + pswiotlb_dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (ops->sync_sg_for_cpu) + pswiotlb_iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); + debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +void pswiotlb_dma_sync_sg_for_device_distribute(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (dma_map_direct(dev, ops)) + pswiotlb_dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (ops->sync_sg_for_device) + pswiotlb_iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); + debug_dma_sync_sg_for_device(dev, sg, nelems, dir); +} diff --git a/kernel/dma/phytium/pswiotlb.c b/kernel/dma/phytium/pswiotlb.c new file mode 100644 index 000000000000..7b8f254d003a --- /dev/null +++ b/kernel/dma/phytium/pswiotlb.c @@ -0,0 +1,1736 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Phytium software IO tlb to improve DMA performance. + * + * Copyright (c) 2024, Phytium Technology Co., Ltd. + */ + +#define pr_fmt(fmt) "Phytium software IO TLB: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_DEBUG_FS +#include +#endif +#ifdef CONFIG_DMA_RESTRICTED_POOL +#include +#include +#include +#include +#include +#endif + +#include + +#define CREATE_TRACE_POINTS +#include + +#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - P_IO_TLB_SHIFT)) + +/* + * Minimum Phytium IO TLB size to bother booting with. If we can't + * allocate a contiguous 1MB, we're probably in trouble anyway. + */ +#define P_IO_TLB_MIN_SLABS ((1<<20) >> P_IO_TLB_SHIFT) +#define PSWIOTLB_VERSION "1.0.0" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) + +int pswiotlb_node_num; +bool pswiotlb_mtimer_alive; + +/** + * struct p_io_tlb_slot - Phytium IO TLB slot descriptor + * @orig_addr: The original address corresponding to a mapped entry. + * @alloc_size: Size of the allocated buffer. + * @list: The free list describing the number of free entries available + * from each index. + */ +struct p_io_tlb_slot { + phys_addr_t orig_addr; + size_t alloc_size; + unsigned int list; +}; + +bool pswiotlb_force_disable; + +static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes); + +struct p_io_tlb_mem p_io_tlb_default_mem[MAX_NUMNODES]; +static struct timer_list service_timer; + +static unsigned long default_npslabs = P_IO_TLB_DEFAULT_SIZE >> P_IO_TLB_SHIFT; +static unsigned long dynamic_inc_thr_npslabs = P_IO_TLB_INC_THR >> P_IO_TLB_SHIFT; +static unsigned long default_npareas; + +LIST_HEAD(passthroughlist); +static spinlock_t passthroughlist_lock; +static struct pswiotlb_passthroughlist passthroughlist_entry[1024]; +static struct dentry *passthroughlist_debugfs; +static struct dentry *pswiotlb_debugfs; +/** + * struct p_io_tlb_area - Phytium IO TLB memory area descriptor + * + * This is a single area with a single lock. + * + * @used: The number of used Phytium IO TLB block. + * @index: The slot index to start searching in this area for next round. + * @lock: The lock to protect the above data structures in the map and + * unmap calls. + */ +struct p_io_tlb_area { + unsigned long used; + unsigned int index; + spinlock_t lock; +}; + +static struct pswiotlb_passthroughlist_entry { + unsigned short vendor; + unsigned short device; +} ps_passthroughlist[] = { + {BL_PCI_VENDOR_ID_NVIDIA, 0xFFFF}, + {BL_PCI_VENDOR_ID_ILUVATAR, 0xFFFF}, + {BL_PCI_VENDOR_ID_METAX, 0xFFFF}, + {} +}; + +/* + * Round up number of slabs to the next power of 2. The last area is going + * be smaller than the rest if default_npslabs is not power of two. + * The number of slot in an area should be a multiple of P_IO_TLB_SEGSIZE, + * otherwise a segment may span two or more areas. It conflicts with free + * contiguous slots tracking: free slots are treated contiguous no matter + * whether they cross an area boundary. + * + * Return true if default_npslabs is rounded up. + */ +static bool round_up_default_npslabs(void) +{ + if (!default_npareas) + return false; + + if (default_npslabs < P_IO_TLB_SEGSIZE * default_npareas) + default_npslabs = P_IO_TLB_SEGSIZE * default_npareas; + else if (is_power_of_2(default_npslabs)) + return false; + default_npslabs = roundup_pow_of_two(default_npslabs); + return true; +} + +/** + * pswiotlb_adjust_nareas() - adjust the number of areas and slots + * @nareas: Desired number of areas. Zero is treated as 1. + * + * Adjust the default number of areas in a memory pool. + * The default size of the memory pool may also change to meet minimum area + * size requirements. + */ +static void pswiotlb_adjust_nareas(unsigned int nareas) +{ + if (!nareas) + nareas = 1; + else if (!is_power_of_2(nareas)) + nareas = roundup_pow_of_two(nareas); + + default_npareas = nareas; + + pr_info("area num %d.\n", nareas); + if (round_up_default_npslabs()) + pr_info("PSWIOTLB bounce buffer size roundup to %luMB", + (default_npslabs << P_IO_TLB_SHIFT) >> 20); +} + +/** + * limit_nareas() - get the maximum number of areas for a given memory pool size + * @nareas: Desired number of areas. + * @nslots: Total number of slots in the memory pool. + * + * Limit the number of areas to the maximum possible number of areas in + * a memory pool of the given size. + * + * Return: Maximum possible number of areas. + */ +static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) +{ + if (nslots < nareas * P_IO_TLB_SEGSIZE) + return nslots / P_IO_TLB_SEGSIZE; + return nareas; +} + +static int __init +setup_p_io_tlb_npages(char *str) +{ + unsigned long nareas; + + if (!strcmp(str, "forceoff")) { + pswiotlb_force_disable = true; + } else if (isdigit(*str) && !kstrtoul(str, 0, &default_npslabs)) { + default_npslabs = ALIGN(default_npslabs, P_IO_TLB_SEGSIZE); + str = strchr(str, ','); + if (str++ && isdigit(*str) && !kstrtoul(str, 0, &nareas)) + pswiotlb_adjust_nareas(nareas); + } + return 0; +} +early_param("pswiotlb", setup_p_io_tlb_npages); + +static int __init +setup_pswiotlb_passthroughlist(char *str) +{ + char tmp_str[5] = {'\0'}; + unsigned long flags; + int i, j, k; + int ret; + + for (i = 0, j = 0, k = 0; i < strlen(str) + 1; i++) { + if (*(str + i) != ',' && *(str + i) != '\0') { + tmp_str[j++] = *(str + i); + } else { + j = 0; + + ret = kstrtou16(tmp_str, 16, &passthroughlist_entry[k].vendor); + if (ret) + return ret; + + passthroughlist_entry[k].from_grub = true; + + spin_lock_irqsave(&passthroughlist_lock, flags); + list_add_rcu(&passthroughlist_entry[k].node, &passthroughlist); + spin_unlock_irqrestore(&passthroughlist_lock, flags); + + k++; + } + } + + return 0; +} +early_param("pswiotlb_passthroughlist", setup_pswiotlb_passthroughlist); + +unsigned long pswiotlb_size_or_default(void) +{ + return default_npslabs << P_IO_TLB_SHIFT; +} + +void __init pswiotlb_adjust_size(unsigned long size) +{ + if (default_npslabs != P_IO_TLB_DEFAULT_SIZE >> P_IO_TLB_SHIFT) + return; + size = ALIGN(size, P_IO_TLB_SIZE); + default_npslabs = ALIGN(size >> P_IO_TLB_SHIFT, P_IO_TLB_SEGSIZE); + if (round_up_default_npslabs()) + size = default_npslabs << P_IO_TLB_SHIFT; + pr_info("PSWIOTLB bounce buffer size adjusted to %luMB", size >> 20); +} + +void pswiotlb_print_info(int nid) +{ + struct p_io_tlb_pool *mem = &p_io_tlb_default_mem[nid].defpool; + + if (!mem->nslabs) { + pr_warn("No local mem of numa node %d\n", nid); + return; + } + + pr_info("numa %d mapped [mem %pa-%pa] (%luMB)\n", nid, &mem->start, &mem->end, + (mem->nslabs << P_IO_TLB_SHIFT) >> 20); +} + +static inline unsigned long io_tlb_offset(unsigned long val) +{ + return val & (P_IO_TLB_SEGSIZE - 1); +} + +static inline unsigned long nr_slots(u64 val) +{ + return DIV_ROUND_UP(val, P_IO_TLB_SIZE); +} + +static void pswiotlb_record_mem_range(struct p_io_tlb_mem *mem) +{ + unsigned long start_pfn, end_pfn; + unsigned long min_pfn = (~(phys_addr_t)0 >> PAGE_SHIFT), max_pfn = 0; + int i, nid; + unsigned long total_pfn = 0; + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, + (u64)start_pfn << PAGE_SHIFT, + ((u64)end_pfn << PAGE_SHIFT) - 1); + if (nid == mem->numa_node_id) { + if (min_pfn > start_pfn) + min_pfn = start_pfn; + if (max_pfn < end_pfn) + max_pfn = end_pfn; + total_pfn += end_pfn - start_pfn + 1; + } + } + + mem->node_min_addr = (u64)min_pfn << PAGE_SHIFT; + mem->node_max_addr = ((u64)max_pfn << PAGE_SHIFT) - 1; + mem->node_total_mem = (u64)total_pfn << PAGE_SHIFT; +} + +static void pswiotlb_init_io_tlb_pool(struct p_io_tlb_pool *mem, int nid, phys_addr_t start, + unsigned long npslabs, bool late_alloc, unsigned int nareas) +{ + void *vaddr = phys_to_virt(start); + unsigned long bytes = npslabs << P_IO_TLB_SHIFT, i; + + mem->nslabs = npslabs; + mem->start = start; + mem->end = mem->start + bytes; + mem->late_alloc = late_alloc; + mem->numa_node_id = nid; + mem->nareas = nareas; + mem->area_nslabs = npslabs / mem->nareas; + mem->free_th = PSWIOTLB_FREE_THRESHOLD; + + for (i = 0; i < mem->nareas; i++) { + spin_lock_init(&mem->areas[i].lock); + mem->areas[i].index = 0; + mem->areas[i].used = 0; + } + + for (i = 0; i < mem->nslabs; i++) { + mem->slots[i].list = P_IO_TLB_SEGSIZE - io_tlb_offset(i); + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; + } + memset(vaddr, 0, bytes); + mem->vaddr = vaddr; +} + +/** + * add_mem_pool() - add a memory pool to the allocator + * @mem: Phytium software IO TLB allocator. + * @pool: Memory pool to be added. + */ +static void add_mem_pool(struct p_io_tlb_mem *mem, struct p_io_tlb_pool *pool) +{ + spin_lock(&mem->lock); + if (mem->capacity != mem->whole_size) { + mem->pool_addr[mem->whole_size] = mem->pool_addr[mem->capacity]; + mem->pool_addr[mem->capacity] = pool; + } else { + mem->pool_addr[mem->capacity] = pool; + } + /* prevent any other writes prior to this time */ + smp_wmb(); + mem->capacity++; + mem->whole_size++; + mem->nslabs += pool->nslabs; + spin_unlock(&mem->lock); +} + +static void __init *pswiotlb_memblock_alloc(unsigned long npslabs, + int nid, unsigned int flags, + int (*remap)(void *tlb, unsigned long npslabs)) +{ + size_t bytes = PAGE_ALIGN(npslabs << P_IO_TLB_SHIFT); + void *tlb; + + tlb = memblock_alloc_node(bytes, PAGE_SIZE, nid); + + if (!tlb) { + pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", + __func__, bytes); + return NULL; + } + + if (remap && remap(tlb, npslabs) < 0) { + memblock_free(tlb, PAGE_ALIGN(bytes)); + pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); + return NULL; + } + + return tlb; +} + +static void check_if_pswiotlb_in_local_node(struct p_io_tlb_mem *mem, + struct p_io_tlb_pool *pool) +{ + if ((pool->start < mem->node_min_addr) || + pool->end > mem->node_max_addr) { + mem->nslabs = 0; + pool->nslabs = 0; + } +} + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the Phytium software IO TLB used to implement the DMA API. + */ +void __init pswiotlb_init_remap(bool addressing_limit, int nid, unsigned int flags, + int (*remap)(void *tlb, unsigned long npslabs)) +{ + struct p_io_tlb_pool *mem = &p_io_tlb_default_mem[nid].defpool; + unsigned long npslabs; + unsigned int nareas; + size_t alloc_size; + void *tlb; + + if (!addressing_limit) + return; + if (pswiotlb_force_disable) + return; + + if (!remap) + p_io_tlb_default_mem[nid].can_grow = true; + p_io_tlb_default_mem[nid].phys_limit = virt_to_phys(high_memory - 1); + + if (!default_npareas) + pswiotlb_adjust_nareas(num_possible_cpus()); + + npslabs = default_npslabs; + nareas = limit_nareas(default_npareas, npslabs); + while ((tlb = pswiotlb_memblock_alloc(npslabs, nid, flags, remap)) == NULL) { + if (npslabs <= P_IO_TLB_MIN_SLABS) + return; + npslabs = ALIGN(npslabs >> 1, P_IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, npslabs); + } + + if (default_npslabs != npslabs) { + pr_info("PSWIOTLB bounce buffer size adjusted %lu -> %lu slabs", + default_npslabs, npslabs); + default_npslabs = npslabs; + } + + alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), npslabs)); + mem->slots = memblock_alloc(alloc_size, PAGE_SIZE); + if (!mem->slots) { + pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n", + __func__, alloc_size, PAGE_SIZE); + return; + } + + mem->areas = memblock_alloc(array_size(sizeof(struct p_io_tlb_area), + nareas), SMP_CACHE_BYTES); + if (!mem->areas) { + pr_warn("%s: Failed to allocate mem->areas.\n", __func__); + return; + } + + pswiotlb_init_io_tlb_pool(mem, nid, __pa(tlb), npslabs, false, nareas); + add_mem_pool(&p_io_tlb_default_mem[nid], mem); + check_if_pswiotlb_in_local_node(&p_io_tlb_default_mem[nid], mem); + + if (flags & PSWIOTLB_VERBOSE) + pswiotlb_print_info(nid); +} +/** + * pswiotlb_free_tlb() - free a dynamically allocated Phytium IO TLB buffer + * @vaddr: Virtual address of the buffer. + * @bytes: Size of the buffer. + */ +static void pswiotlb_free_tlb(void *vaddr, size_t bytes) +{ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(NULL, vaddr, bytes)) + return; + + /* Intentional leak if pages cannot be encrypted again. */ + if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes))) + __free_pages(virt_to_page(vaddr), get_order(bytes)); +} +/** + * pswiotlb_alloc_tlb() - allocate a dynamic Phytium IO TLB buffer + * @dev: Device for which a memory pool is allocated. + * @bytes: Size of the buffer. + * @phys_limit: Maximum allowed physical address of the buffer. + * @gfp: GFP flags for the allocation. + * + * Return: Allocated pages, or %NULL on allocation failure. + */ +static struct page *pswiotlb_alloc_tlb(struct device *dev, int nid, size_t bytes, + u64 phys_limit, gfp_t gfp) +{ + struct page *page; + + /* + * Allocate from the atomic pools if memory is encrypted and + * the allocation is atomic, because decrypting may block. + */ + if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) { + void *vaddr; + + if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) + return NULL; + + return dma_alloc_from_pool(dev, bytes, &vaddr, gfp, + pswiotlb_dma_coherent_ok); + } + + gfp &= ~GFP_ZONEMASK; + if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + gfp |= __GFP_DMA; + else if (phys_limit <= DMA_BIT_MASK(32)) + gfp |= __GFP_DMA32; + + while ((page = alloc_dma_pages(nid, gfp, bytes)) && + page_to_phys(page) + bytes - 1 > phys_limit) { + /* allocated, but too high */ + __free_pages(page, get_order(bytes)); + + if (IS_ENABLED(CONFIG_ZONE_DMA32) && + phys_limit < DMA_BIT_MASK(64) && + !(gfp & (__GFP_DMA32 | __GFP_DMA))) + gfp |= __GFP_DMA32; + else if (IS_ENABLED(CONFIG_ZONE_DMA) && + !(gfp & __GFP_DMA)) + gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA; + else + return NULL; + } + + return page; +} +/** + * pswiotlb_alloc_pool() - allocate a new Phytium IO TLB memory pool + * @dev: Device for which a memory pool is allocated. + * @minslabs: Minimum number of slabs. + * @nslabs: Desired (maximum) number of slabs. + * @nareas: Number of areas. + * @phys_limit: Maximum DMA buffer physical address. + * @gfp: GFP flags for the allocations. + * + * Allocate and initialize a new Phytium IO TLB memory pool. The actual number of + * slabs may be reduced if allocation of @nslabs fails. If even + * @minslabs cannot be allocated, this function fails. + * + * Return: New memory pool, or %NULL on allocation failure. + */ +static struct p_io_tlb_pool *pswiotlb_alloc_pool(struct device *dev, + int nid, unsigned long minslabs, unsigned long nslabs, + unsigned int nareas, u64 phys_limit, bool transient, gfp_t gfp) +{ + struct p_io_tlb_pool *pool; + unsigned int slot_order; + struct page *tlb; + size_t pool_size; + size_t tlb_size; + + if (nslabs > SLABS_PER_PAGE << MAX_ORDER) { + nslabs = SLABS_PER_PAGE << MAX_ORDER; + nareas = limit_nareas(nareas, nslabs); + } + + pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas); + pool = kzalloc(pool_size, gfp); + if (!pool) + goto error; + pool->areas = (void *)pool + sizeof(*pool); + + if (!transient) { + nslabs = ALIGN(nslabs >> 1, P_IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); + } + tlb_size = nslabs << P_IO_TLB_SHIFT; + while (!(tlb = pswiotlb_alloc_tlb(dev, nid, tlb_size, phys_limit, gfp))) { + if (nslabs <= minslabs) + goto error_tlb; + nslabs = ALIGN(nslabs >> 1, P_IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); + tlb_size = nslabs << P_IO_TLB_SHIFT; + } + if (page_to_nid(tlb) != nid) + goto error_slots; + + slot_order = get_order(array_size(sizeof(*pool->slots), nslabs)); + pool->slots = (struct p_io_tlb_slot *) + __get_free_pages(gfp, slot_order); + if (!pool->slots) + goto error_slots; + + pswiotlb_init_io_tlb_pool(pool, nid, page_to_phys(tlb), nslabs, true, nareas); + return pool; + +error_slots: + pswiotlb_free_tlb(page_address(tlb), tlb_size); +error_tlb: + kfree(pool); +error: + return NULL; +} +static void pswiotlb_prepare_release_pool(struct p_io_tlb_mem *mem, + struct p_io_tlb_pool *pool, int pool_idx) +{ + int capacity; + + spin_lock(&mem->lock); + capacity = mem->capacity; + mem->pool_addr[pool_idx] = mem->pool_addr[capacity - 1]; + mem->pool_addr[capacity - 1] = pool; + mem->capacity--; + mem->nslabs -= pool->nslabs; + spin_unlock(&mem->lock); +} +static void pswiotlb_release_pool(struct p_io_tlb_mem *mem, + struct p_io_tlb_pool *pool, int pool_idx) +{ + unsigned int bytes = pool->nslabs * P_IO_TLB_SIZE; + unsigned int order = get_order(bytes); + struct page *page_start; + size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); + int pool_idx1; + + spin_lock(&mem->lock); + pool_idx1 = mem->whole_size - 1; + mem->pool_addr[pool_idx] = mem->pool_addr[pool_idx1]; + mem->whole_size--; + spin_unlock(&mem->lock); + + bitmap_free(pool->busy_record); + free_pages((unsigned long)pool->slots, get_order(slots_size)); + page_start = pfn_to_page(PFN_DOWN(pool->start)); + __free_pages(page_start, order); + kfree(pool); +} +static void pswiotlb_monitor_service(struct timer_list *timer) +{ + int i, j, pool_idx; + struct p_io_tlb_pool *pool; + struct p_io_tlb_mem *mem; + int capacity, whole_size; + + for (i = 0; i < pswiotlb_node_num; i++) { + mem = &p_io_tlb_default_mem[i]; + whole_size = mem->whole_size; + capacity = mem->capacity; + rcu_read_lock(); + for (pool_idx = 1; pool_idx < whole_size; pool_idx++) { + pool = mem->pool_addr[pool_idx]; + for (j = 0; j < DIV_ROUND_UP(pool->nareas, BITS_PER_LONG); j++) { + if (*(pool->busy_record + j) != 0) { + pool->busy_flag = true; + break; + } + pool->busy_flag = false; + } + if (!pool->busy_flag) + pool->free_cnt++; + else + pool->free_cnt = 0; + if (pool->free_cnt >= pool->free_th && pool_idx < capacity) { + pswiotlb_prepare_release_pool(mem, pool, pool_idx); + capacity--; + } + if (pool->free_cnt >= 2 * pool->free_th && !pool->busy_flag) { + pswiotlb_release_pool(mem, pool, pool_idx); + whole_size--; + } + } + rcu_read_unlock(); + } + + mod_timer(timer, jiffies + 2 * HZ); +} +static struct p_io_tlb_pool *pswiotlb_formal_alloc(struct device *dev, + struct p_io_tlb_mem *mem) +{ + struct p_io_tlb_pool *pool; + + pool = pswiotlb_alloc_pool(dev, mem->numa_node_id, + P_IO_TLB_MIN_SLABS, dynamic_inc_thr_npslabs, + dynamic_inc_thr_npslabs, mem->phys_limit, + 0, GFP_NOWAIT | __GFP_NOWARN); + if (!pool) { + pr_warn_once("Failed to allocate new formal pool"); + return NULL; + } + + pool->busy_record = bitmap_zalloc(pool->nareas, GFP_KERNEL); + if (!pool->busy_record) { + pr_warn_ratelimited("%s: Failed to allocate pool busy record.\n", __func__); + return NULL; + } + + add_mem_pool(mem, pool); + + return pool; +} + +/** + * pswiotlb_dyn_free() - RCU callback to free a memory pool + * @rcu: RCU head in the corresponding struct p_io_tlb_pool. + */ +static void pswiotlb_dyn_free(struct rcu_head *rcu) +{ + struct p_io_tlb_pool *pool = container_of(rcu, struct p_io_tlb_pool, rcu); + size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs); + size_t tlb_size = pool->end - pool->start; + + free_pages((unsigned long)pool->slots, get_order(slots_size)); + pswiotlb_free_tlb(pool->vaddr, tlb_size); + kfree(pool); +} +static void pswiotlb_init_tlb_mem_dynamic(struct p_io_tlb_mem *mem, int nid) +{ + spin_lock_init(&mem->lock); + mem->capacity = 0; + mem->whole_size = 0; + mem->numa_node_id = nid; +} + +bool pswiotlb_is_dev_in_passthroughlist(struct pci_dev *dev) +{ + struct pswiotlb_passthroughlist *bl_entry; + + rcu_read_lock(); + list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { + if (bl_entry->vendor == dev->vendor) { + rcu_read_unlock(); + goto out; + } + } + rcu_read_unlock(); + + return true; +out: + return false; +} + +static void pswiotlb_show_passthroughlist(void) +{ + struct pswiotlb_passthroughlist *bl_entry; + + pr_info("The following vendors devices belong to are incompatible with pswiotlb temporarily:\n"); + rcu_read_lock(); + list_for_each_entry_rcu(bl_entry, &passthroughlist, node) + pr_info("0x%06x", bl_entry->vendor); + rcu_read_unlock(); +} + +static void __init pswiotlb_passthroughlist_init(void) +{ + int dev_num = 0; + int i; + size_t alloc_size; + struct pswiotlb_passthroughlist *passthroughlist_array; + + spin_lock_init(&passthroughlist_lock); + + for (i = 0; ps_passthroughlist[i].vendor != 0; i++) + dev_num++; + + alloc_size = PAGE_ALIGN(array_size(sizeof(struct pswiotlb_passthroughlist), dev_num)); + passthroughlist_array = memblock_alloc(alloc_size, PAGE_SIZE); + if (!passthroughlist_array) { + pr_warn("%s: Failed to allocate memory for passthroughlist\n", + __func__); + return; + } + + for (i = 0; i < dev_num; i++) { + passthroughlist_array[i].vendor = ps_passthroughlist[i].vendor; + passthroughlist_array[i].device = ps_passthroughlist[i].device; + + spin_lock(&passthroughlist_lock); + list_add_rcu(&passthroughlist_array[i].node, &passthroughlist); + spin_unlock(&passthroughlist_lock); + } + + pswiotlb_show_passthroughlist(); +} + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init pswiotlb_init(bool addressing_limit, unsigned int flags) +{ + int i; + int nid; + unsigned long start_pfn, end_pfn; + + /* Get number of numa node*/ + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid); + pswiotlb_node_num = nid + 1; + pr_info("Total number of numa nodes is %d\n", pswiotlb_node_num); + for (i = 0; i < pswiotlb_node_num; i++) { + struct p_io_tlb_mem *mem = &p_io_tlb_default_mem[i]; + + pswiotlb_init_tlb_mem_dynamic(mem, i); + pswiotlb_record_mem_range(mem); + pr_info(" node %3d memory range: [%#018Lx-%#018Lx], total memory: %ldMB\n", + i, mem->node_min_addr, mem->node_max_addr, + mem->node_total_mem >> 20); + } + /* Get P TLB memory according to numa node id */ + for (i = 0; i < pswiotlb_node_num; i++) + pswiotlb_init_remap(addressing_limit, i, flags, NULL); + + pswiotlb_passthroughlist_init(); +} + +/** + * alloc_dma_pages() - allocate pages to be used for DMA + * @gfp: GFP flags for the allocation. + * @bytes: Size of the buffer. + * + * Allocate pages from the buddy allocator. If successful, make the allocated + * pages decrypted that they can be used for DMA. + * + * Return: Decrypted pages, or %NULL on failure. + */ +static struct page *alloc_dma_pages(int nid, gfp_t gfp, size_t bytes) +{ + unsigned int order = get_order(bytes); + struct page *page; + void *vaddr; + + page = alloc_pages_node(nid, gfp, order); + if (!page) + return NULL; + + vaddr = page_address(page); + if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes))) + goto error; + return page; + +error: + __free_pages(page, order); + return NULL; +} + +/** + * pswiotlb_find_pool() - find the Phytium IO TLB pool for a physical address + * @dev: Device which has mapped the DMA buffer. + * @paddr: Physical address within the DMA buffer. + * + * Find the Phytium IO TLB memory pool descriptor which contains the given physical + * address, if any. + * + * Return: Memory pool which contains @paddr, or %NULL if none. + */ +struct p_io_tlb_pool *pswiotlb_find_pool(struct device *dev, int nid, phys_addr_t paddr) +{ + struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; + struct p_io_tlb_pool *pool; + int i; + int whole_size; + + /* prevent any other reads prior to this time */ + smp_rmb(); + whole_size = mem->whole_size; + rcu_read_lock(); + for (i = 0; i < whole_size; i++) { + pool = mem->pool_addr[i]; + if (paddr >= pool->start && paddr < pool->end) + goto out; + } + + pool = NULL; +out: + rcu_read_unlock(); + return pool; +} + +/** + * pswiotlb_dev_init() - initialize pswiotlb fields in &struct device + * @dev: Device to be initialized. + */ +void pswiotlb_dev_init(struct device *dev) +{ + dev->dma_uses_p_io_tlb = false; +} + +void pswiotlb_store_local_node(struct pci_dev *dev, struct pci_bus *bus) +{ + int nid; + struct p_io_tlb_pool *defpool; + struct p_io_tlb_mem *mem; + + dev->dev.local_node = pcibus_to_node(bus); + /* register pswiotlb resources */ + dev->dev.dma_p_io_tlb_mem = p_io_tlb_default_mem; + nid = dev->dev.local_node; + defpool = &dev->dev.dma_p_io_tlb_mem[nid].defpool; + mem = &dev->dev.dma_p_io_tlb_mem[nid]; + pci_info(dev, "numa node: %d, pswiotlb defpool range: [%#018Lx-%#018Lx]\n" + "local node range: [%#018Lx-%#018Lx]\n", nid, + defpool->start, defpool->end, mem->node_min_addr, mem->node_max_addr); +} +/* + * Return the offset into a pswiotlb slot required to keep the device happy. + */ +static unsigned int pswiotlb_align_offset(struct device *dev, u64 addr) +{ + return addr & dma_get_min_align_mask(dev) & (P_IO_TLB_SIZE - 1); +} +/* + * Bounce: copy the pswiotlb buffer from or back to the original dma location + */ +static void pswiotlb_bounce(struct device *dev, int nid, phys_addr_t tlb_addr, size_t size, + enum dma_data_direction dir, struct p_io_tlb_pool *mem) +{ + int index = (tlb_addr - mem->start) >> P_IO_TLB_SHIFT; + phys_addr_t orig_addr = mem->slots[index].orig_addr; + size_t alloc_size = mem->slots[index].alloc_size; + unsigned long pfn = PFN_DOWN(orig_addr); + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; + unsigned int tlb_offset, orig_addr_offset; + + if (orig_addr == INVALID_PHYS_ADDR) + return; + + tlb_offset = tlb_addr & (P_IO_TLB_SIZE - 1); + orig_addr_offset = pswiotlb_align_offset(dev, orig_addr); + if (tlb_offset < orig_addr_offset) { + dev_WARN_ONCE(dev, 1, + "Access before mapping start detected. orig offset %u, requested offset %u.\n", + orig_addr_offset, tlb_offset); + return; + } + + tlb_offset -= orig_addr_offset; + if (tlb_offset > alloc_size) { + dev_WARN_ONCE(dev, 1, + "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n", + alloc_size, size, tlb_offset); + return; + } + + orig_addr += tlb_offset; + alloc_size -= tlb_offset; + + if (size > alloc_size) { + dev_WARN_ONCE(dev, 1, + "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n", + alloc_size, size); + size = alloc_size; + } + + if (PageHighMem(pfn_to_page(pfn))) { + unsigned int offset = orig_addr & ~PAGE_MASK; + struct page *page; + unsigned int sz = 0; + unsigned long flags; + + dev_info(dev, "%s line=%d !!!!!!HighMem!!!!!! dir: %d, tlb_addr: %#018Lx, size: %#lx\n", + __func__, __LINE__, dir, tlb_addr, size); + + while (size) { + sz = min_t(size_t, PAGE_SIZE - offset, size); + + local_irq_save(flags); + page = pfn_to_page(pfn); + if (dir == DMA_TO_DEVICE) + memcpy_from_page(vaddr, page, offset, sz); + else + memcpy_to_page(page, offset, vaddr, sz); + local_irq_restore(flags); + + size -= sz; + pfn++; + vaddr += sz; + offset = 0; + } + } else if (dir == DMA_TO_DEVICE) { + memcpy(vaddr, phys_to_virt(orig_addr), size); + } else { + memcpy(phys_to_virt(orig_addr), vaddr, size); + } +} +static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx) +{ + return start + (idx << P_IO_TLB_SHIFT); +} +/* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. + */ +static inline unsigned long get_max_slots(unsigned long boundary_mask) +{ + return (boundary_mask >> P_IO_TLB_SHIFT) + 1; +} + +static unsigned int wrap_area_index(struct p_io_tlb_pool *mem, unsigned int index) +{ + if (index >= mem->area_nslabs) + return 0; + return index; +} + +/** + * pswiotlb_area_find_slots() - search for slots in one Phytium IO TLB memory area + * @dev: Device which maps the buffer. + * @pool: Memory pool to be searched. + * @area_index: Index of the Phytium IO TLB memory area to be searched. + * @orig_addr: Original (non-bounced) Phytium IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Find a suitable sequence of Phytium IO TLB entries for the request and allocate + * a buffer from the given Phytium IO TLB memory area. + * This function takes care of locking. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int pswiotlb_area_find_slots(struct device *dev, int nid, struct p_io_tlb_pool *pool, + int area_index, phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask) +{ + struct p_io_tlb_area *area = pool->areas + area_index; + unsigned long boundary_mask = dma_get_seg_boundary(dev); + dma_addr_t tbl_dma_addr = + phys_to_dma_unencrypted(dev, pool->start) & boundary_mask; + unsigned long max_slots = get_max_slots(boundary_mask); + unsigned int iotlb_align_mask = + dma_get_min_align_mask(dev) | alloc_align_mask; + unsigned int nslots = nr_slots(alloc_size), stride; + unsigned int offset = pswiotlb_align_offset(dev, orig_addr); + unsigned int index, slots_checked, count = 0, i; + unsigned long flags; + unsigned int slot_base; + unsigned int slot_index; + + WARN_ON(!nslots); + WARN_ON(area_index >= pool->nareas); + + /* + * For allocations of PAGE_SIZE or larger only look for page aligned + * allocations. + */ + if (alloc_size >= PAGE_SIZE) + iotlb_align_mask |= ~PAGE_MASK; + iotlb_align_mask &= ~(P_IO_TLB_SIZE - 1); + + /* + * For mappings with an alignment requirement don't bother looping to + * unaligned slots once we found an aligned one. + */ + stride = (iotlb_align_mask >> P_IO_TLB_SHIFT) + 1; + + if (spin_trylock_irqsave(&area->lock, flags)) { + if (unlikely(nslots > pool->area_nslabs - area->used)) + goto not_found; + + slot_base = area_index * pool->area_nslabs; + index = area->index; + + for (slots_checked = 0; slots_checked < pool->area_nslabs;) { + slot_index = slot_base + index; + + if (orig_addr && + (slot_addr(tbl_dma_addr, slot_index) & + iotlb_align_mask) != (orig_addr & iotlb_align_mask)) { + index = wrap_area_index(pool, index + 1); + slots_checked++; + continue; + } + + if (!iommu_is_span_boundary(slot_index, nslots, + nr_slots(tbl_dma_addr), + max_slots)) { + if (pool->slots[slot_index].list >= nslots) + goto found; + } + index = wrap_area_index(pool, index + stride); + slots_checked += stride; + } + } else { + return -1; + } + +not_found: + spin_unlock_irqrestore(&area->lock, flags); + return -1; + +found: + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot onwards + * and set the list of free entries to '0' indicating unavailable. + */ + for (i = slot_index; i < slot_index + nslots; i++) { + pool->slots[i].list = 0; + pool->slots[i].alloc_size = alloc_size - (offset + + ((i - slot_index) << P_IO_TLB_SHIFT)); + } + for (i = slot_index - 1; + io_tlb_offset(i) != P_IO_TLB_SEGSIZE - 1 && + pool->slots[i].list; i--) + pool->slots[i].list = ++count; + + /* + * Update the indices to avoid searching in the next round. + */ + area->index = wrap_area_index(pool, index + nslots); + area->used += nslots; + spin_unlock_irqrestore(&area->lock, flags); + + return slot_index; +} + +/** + * pswiotlb_pool_find_slots() - search for slots in one memory pool + * @dev: Device which maps the buffer. + * @pool: Memory pool to be searched. + * @orig_addr: Original (non-bounced)Phytium IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * + * Search through one memory pool to find a sequence of slots that match the + * allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int pswiotlb_pool_find_slots(struct device *dev, int nid, struct p_io_tlb_pool *pool, + phys_addr_t orig_addr, size_t alloc_size, + unsigned int alloc_align_mask) +{ + int start = raw_smp_processor_id() & (pool->nareas - 1); + int i = start, index; + + do { + index = pswiotlb_area_find_slots(dev, nid, pool, i, orig_addr, + alloc_size, alloc_align_mask); + if (index >= 0) { + if ((pool != &p_io_tlb_default_mem[nid].defpool) && + !pool->transient) { + bitmap_set(pool->busy_record, i, 1); + } + return index; + } + if (++i >= pool->nareas) + i = 0; + } while (i != start); + + return -1; +} + +/** + * pswiotlb_find_slots() - search for slots in the whole pswiotlb + * @dev: Device which maps the buffer. + * @orig_addr: Original (non-bounced) Phytium IO buffer address. + * @alloc_size: Total requested size of the bounce buffer, + * including initial alignment padding. + * @alloc_align_mask: Required alignment of the allocated buffer. + * @retpool: Used memory pool, updated on return. + * + * Search through the whole Phytium software IO TLB to find a sequence of slots that + * match the allocation constraints. + * + * Return: Index of the first allocated slot, or -1 on error. + */ +static int pswiotlb_find_slots(struct device *dev, int nid, phys_addr_t orig_addr, + size_t alloc_size, unsigned int alloc_align_mask, + struct p_io_tlb_pool **retpool) +{ + struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; + struct p_io_tlb_pool *pool; + int index; + int try_pool_idx; + int i; + int cpuid; + int current_ratio; + unsigned long pswiotlb_mem; + unsigned long nslabs_per_pool = dynamic_inc_thr_npslabs; + + cpuid = raw_smp_processor_id(); + + rcu_read_lock(); +#ifndef CONFIG_ARM64_4K_PAGES + for (i = 0; i < 15; i++) { + if (i == 0) { + pool = mem->pool_addr[0]; + index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, + alloc_size, alloc_align_mask); + } else if (i == 1 && mem->capacity > (cpuid + 1)) { + pool = mem->pool_addr[cpuid + 1]; + index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, + alloc_size, alloc_align_mask); + } else { + try_pool_idx = get_random_u32() % mem->capacity; + pool = mem->pool_addr[try_pool_idx]; + index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, + alloc_size, alloc_align_mask); + } + + if (index >= 0) { + rcu_read_unlock(); + goto found; + } + } +#else + for (i = 0; i < 15; i++) { + try_pool_idx = get_random_u32() % mem->capacity; + pool = mem->pool_addr[try_pool_idx]; + index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, + alloc_size, alloc_align_mask); + + if (index >= 0) { + rcu_read_unlock(); + goto found; + } + } +#endif + rcu_read_unlock(); + if (nslabs_per_pool > SLABS_PER_PAGE << MAX_ORDER) + nslabs_per_pool = SLABS_PER_PAGE << MAX_ORDER; + + nslabs_per_pool = ALIGN(nslabs_per_pool >> 1, P_IO_TLB_SEGSIZE); + pswiotlb_mem = P_IO_TLB_DEFAULT_SIZE + + (nslabs_per_pool << P_IO_TLB_SHIFT) * (mem->whole_size - 1); + current_ratio = (pswiotlb_mem * 100 + mem->node_total_mem / 2) / mem->node_total_mem; + if (current_ratio >= P_IO_TLB_EXT_WATERMARK) { + dev_warn_once(dev, "Total pswiotlb (%ld MB) exceeds the watermark (%d%%)\n" + "of memory (%ld MB) in node %d, pswiotlb expansion is prohibited.\n", + pswiotlb_mem >> 20, P_IO_TLB_EXT_WATERMARK, + mem->node_total_mem >> 20, nid); + return -1; + } + + if (!mem->can_grow) + return -1; + + pool = pswiotlb_formal_alloc(dev, mem); + if (!pool) + return -1; + + /* retry */ + rcu_read_lock(); + index = pswiotlb_pool_find_slots(dev, nid, pool, orig_addr, + alloc_size, alloc_align_mask); + rcu_read_unlock(); + + if (index < 0) { + pswiotlb_dyn_free(&pool->rcu); + return -1; + } + +found: + WRITE_ONCE(dev->dma_uses_p_io_tlb, true); + + /* + * The general barrier orders reads and writes against a presumed store + * of the PSWIOTLB buffer address by a device driver (to a driver private + * data structure). It serves two purposes. + * + * First, the store to dev->dma_uses_p_io_tlb must be ordered before the + * presumed store. This guarantees that the returned buffer address + * cannot be passed to another CPU before updating dev->dma_uses_p_io_tlb. + * + * Second, the load from mem->pools must be ordered before the same + * presumed store. This guarantees that the returned buffer address + * cannot be observed by another CPU before an update of the RCU list + * that was made by pswiotlb_dyn_alloc() on a third CPU (cf. multicopy + * atomicity). + * + * See also the comment in is_pswiotlb_buffer(). + */ + smp_mb(); + + *retpool = pool; + return index; +} +#ifdef CONFIG_DEBUG_FS + +/** + * mem_used() - get number of used slots in an allocator + * @mem: Phytium software IO TLB allocator. + * + * The result is accurate in this version of the function, because an atomic + * counter is available if CONFIG_DEBUG_FS is set. + * + * Return: Number of used slots. + */ +static unsigned long mem_used(struct p_io_tlb_mem *mem) +{ + return atomic_long_read(&mem->total_used); +} + +#else /* !CONFIG_DEBUG_FS */ + +/** + * mem_pool_used() - get number of used slots in a memory pool + * @pool: Phytium software IO TLB memory pool. + * + * The result is not accurate, see mem_used(). + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_pool_used(struct p_io_tlb_pool *pool) +{ + int i; + unsigned long used = 0; + + for (i = 0; i < pool->nareas; i++) + used += pool->areas[i].used; + return used; +} + +/** + * mem_used() - get number of used slots in an allocator + * @mem: Phytium software IO TLB allocator. + * + * The result is not accurate, because there is no locking of individual + * areas. + * + * Return: Approximate number of used slots. + */ +static unsigned long mem_used(struct p_io_tlb_mem *mem) +{ + struct p_io_tlb_pool *pool; + unsigned long used = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(pool, &mem->pools, node) + used += mem_pool_used(pool); + rcu_read_unlock(); + + return used; +} + +#endif /* CONFIG_DEBUG_FS */ + +phys_addr_t pswiotlb_tbl_map_single(struct device *dev, int nid, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, + unsigned int alloc_align_mask, enum dma_data_direction dir, + unsigned long attrs) +{ + struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; + unsigned int offset = pswiotlb_align_offset(dev, orig_addr); + struct p_io_tlb_pool *pool; + unsigned int i; + unsigned long index; + phys_addr_t tlb_addr; + struct page *page; + + if (alloc_size > (P_IO_TLB_SEGSIZE << P_IO_TLB_SHIFT)) { + dev_warn_ratelimited(dev, "alloc size 0x%lx is larger than segment(0x%x) of pswiotlb\n", + alloc_size, P_IO_TLB_SEGSIZE << P_IO_TLB_SHIFT); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + if (!mem || !mem->nslabs) { + dev_warn_ratelimited(dev, + "Can not allocate PSWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + if (mapping_size > alloc_size) { + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", + mapping_size, alloc_size); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + index = pswiotlb_find_slots(dev, nid, orig_addr, + alloc_size + offset, alloc_align_mask, &pool); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_once(dev, + "pswiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + alloc_size, mem->nslabs, mem_used(mem)); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + for (i = 0; i < nr_slots(alloc_size + offset); i++) + pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); + tlb_addr = slot_addr(pool->start, index) + offset; + page = pfn_to_page(PFN_DOWN(tlb_addr)); + set_bit(PG_pswiotlb, &page->flags); + + /* + * When dir == DMA_FROM_DEVICE we could omit the copy from the orig + * to the tlb buffer, if we knew for sure the device will + * overwrite the entire current content. But we don't. Thus + * unconditional bounce may prevent leaking pswiotlb content (i.e. + * kernel memory) to user-space. + */ + pswiotlb_bounce(dev, nid, tlb_addr, mapping_size, DMA_TO_DEVICE, pool); + return tlb_addr; +} +static void pswiotlb_release_slots(struct device *dev, int nid, phys_addr_t tlb_addr, + struct p_io_tlb_pool *mem) +{ + unsigned long flags; + unsigned int offset = pswiotlb_align_offset(dev, tlb_addr); + int index = (tlb_addr - offset - mem->start) >> P_IO_TLB_SHIFT; + int nslots = nr_slots(mem->slots[index].alloc_size + offset); + int aindex = index / mem->area_nslabs; + struct p_io_tlb_area *area = &mem->areas[aindex]; + int count, i; + struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + WARN_ON(aindex >= mem->nareas); + + spin_lock_irqsave(&area->lock, flags); + if (index + nslots < ALIGN(index + 1, P_IO_TLB_SEGSIZE)) + count = mem->slots[index + nslots].list; + else + count = 0; + + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + mem->slots[i].list = ++count; + mem->slots[i].orig_addr = INVALID_PHYS_ADDR; + mem->slots[i].alloc_size = 0; + } + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != P_IO_TLB_SEGSIZE - 1 && mem->slots[i].list; + i--) + mem->slots[i].list = ++count; + area->used -= nslots; + if ((mem != &p_io_tlb_default_mem[nid].defpool) && (area->used == 0)) + bitmap_clear(mem->busy_record, aindex, 1); + clear_bit(PG_pswiotlb, &page->flags); + spin_unlock_irqrestore(&area->lock, flags); +} +/* + * tlb_addr is the physical address of the bounce buffer to unmap. + */ +void pswiotlb_tbl_unmap_single(struct device *dev, int nid, phys_addr_t tlb_addr, + size_t offset, size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct p_io_tlb_pool *pool) +{ + struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); + /* + * First, sync the memory before unmapping the entry + */ + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) && + (test_bit(PG_pswiotlbsync, &page->flags) == false)) + pswiotlb_bounce(dev, nid, tlb_addr, mapping_size, DMA_FROM_DEVICE, pool); + + tlb_addr -= offset; + pswiotlb_release_slots(dev, nid, tlb_addr, pool); + + clear_bit(PG_pswiotlbsync, &page->flags); +} +void pswiotlb_sync_single_for_device(struct device *dev, int nid, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool) +{ + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + pswiotlb_bounce(dev, nid, tlb_addr, size, DMA_TO_DEVICE, pool); + else + WARN_ON(dir != DMA_FROM_DEVICE); +} + +void pswiotlb_sync_single_for_cpu(struct device *dev, int nid, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, struct p_io_tlb_pool *pool) +{ + if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) { + struct page *page = pfn_to_page(PFN_DOWN(tlb_addr)); + + pswiotlb_bounce(dev, nid, tlb_addr, size, DMA_FROM_DEVICE, pool); + set_bit(PG_pswiotlbsync, &page->flags); + } else + WARN_ON(dir != DMA_TO_DEVICE); +} +/* + * Create a pswiotlb mapping for the buffer at @paddr, and in case of DMAing + * to the device copy the data into it as well. + */ +dma_addr_t pswiotlb_map(struct device *dev, int nid, phys_addr_t paddr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t pswiotlb_addr; + dma_addr_t dma_addr; + + trace_pswiotlb_bounced(dev, phys_to_dma(dev, paddr), size); + + pswiotlb_addr = pswiotlb_tbl_map_single(dev, nid, paddr, size, + PAGE_ALIGN(size), PAGE_SIZE - 1, dir, attrs); + if (pswiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + dma_addr = phys_to_dma_unencrypted(dev, pswiotlb_addr); + + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(pswiotlb_addr, size, dir); + return dma_addr; +} +size_t pswiotlb_max_mapping_size(struct device *dev) +{ + int min_align_mask = dma_get_min_align_mask(dev); + int min_align = 0; + + /* + * pswiotlb_find_slots() skips slots according to + * min align mask. This affects max mapping size. + * Take it into acount here. + */ + if (min_align_mask) + min_align = roundup(min_align_mask, P_IO_TLB_SIZE); + + return ((size_t)P_IO_TLB_SIZE) * P_IO_TLB_SEGSIZE - min_align; +} + +/** + * is_pswiotlb_allocated() - check if the default Phytium software IO TLB is initialized + */ +bool is_pswiotlb_allocated(struct device *dev) +{ + int nid = dev->local_node; + return p_io_tlb_default_mem[nid].nslabs; +} + +bool is_pswiotlb_active(struct device *dev) +{ + int nid = dev->local_node; + struct p_io_tlb_mem *mem = &dev->dma_p_io_tlb_mem[nid]; + + return mem && mem->nslabs; +} + +/** + * default_pswiotlb_base() - get the base address of the default PSWIOTLB + * + * Get the lowest physical address used by the default Phytium software IO TLB pool. + */ +phys_addr_t default_pswiotlb_base(struct device *dev) +{ + int nid = dev->local_node; + + p_io_tlb_default_mem[nid].can_grow = false; + + return p_io_tlb_default_mem[nid].defpool.start; +} + +/** + * default_pswiotlb_limit() - get the address limit of the default PSWIOTLB + * + * Get the highest physical address used by the default Phytium software IO TLB pool. + */ +phys_addr_t default_pswiotlb_limit(struct device *dev) +{ + int nid = dev->local_node; + + return p_io_tlb_default_mem[nid].phys_limit; +} +#ifdef CONFIG_DEBUG_FS + +static int p_io_tlb_used_get(void *data, u64 *val) +{ + struct p_io_tlb_mem *mem = data; + + *val = mem_used(mem); + return 0; +} + +static int p_io_tlb_hiwater_get(void *data, u64 *val) +{ + struct p_io_tlb_mem *mem = data; + + *val = atomic_long_read(&mem->used_hiwater); + return 0; +} + +static int p_io_tlb_hiwater_set(void *data, u64 val) +{ + struct p_io_tlb_mem *mem = data; + + /* Only allow setting to zero */ + if (val != 0) + return -EINVAL; + + atomic_long_set(&mem->used_hiwater, val); + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_p_io_tlb_used, p_io_tlb_used_get, NULL, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_p_io_tlb_hiwater, p_io_tlb_hiwater_get, + p_io_tlb_hiwater_set, "%llu\n"); + +static void pswiotlb_create_debugfs_files(struct p_io_tlb_mem *mem, + int nid, const char *dirname) +{ + atomic_long_set(&mem->total_used, 0); + atomic_long_set(&mem->used_hiwater, 0); + + mem->debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); + if (!mem->nslabs) + return; + + debugfs_create_ulong("p_io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); + debugfs_create_file("p_io_tlb_used", 0400, mem->debugfs, mem, + &fops_p_io_tlb_used); + debugfs_create_file("p_io_tlb_used_hiwater", 0600, mem->debugfs, mem, + &fops_p_io_tlb_hiwater); +} + +static int passthroughlist_display_show(struct seq_file *m, void *v) +{ + struct pswiotlb_passthroughlist *bl_entry; + + rcu_read_lock(); + list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { + seq_printf(m, "0x%04x\n", bl_entry->vendor); + } + rcu_read_unlock(); + + return 0; +} + +static int version_display_show(struct seq_file *m, void *v) +{ + seq_puts(m, "pswiotlb version "); + seq_printf(m, "%s\n", PSWIOTLB_VERSION); + + return 0; +} + +static int passthroughlist_add(void *data, u64 val) +{ + struct pswiotlb_passthroughlist *bl_entry; + unsigned long flags; + + bl_entry = kzalloc(sizeof(*bl_entry), GFP_ATOMIC); + if (!bl_entry) + return -ENOMEM; + + bl_entry->vendor = val; + bl_entry->from_grub = false; + + spin_lock_irqsave(&passthroughlist_lock, flags); + list_add_rcu(&bl_entry->node, &passthroughlist); + spin_unlock_irqrestore(&passthroughlist_lock, flags); + + return 0; +} + +static int passthroughlist_del(void *data, u64 val) +{ + struct pswiotlb_passthroughlist *bl_entry; + unsigned long flags; + + rcu_read_lock(); + list_for_each_entry_rcu(bl_entry, &passthroughlist, node) { + if (bl_entry->vendor == val) + goto found; + } + rcu_read_unlock(); + + return 0; +found: + rcu_read_unlock(); + spin_lock_irqsave(&passthroughlist_lock, flags); + list_del_rcu(&bl_entry->node); + spin_unlock_irqrestore(&passthroughlist_lock, flags); + + if (bl_entry->from_grub == false) + kfree(bl_entry); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(passthroughlist_display); +DEFINE_SHOW_ATTRIBUTE(version_display); +DEFINE_DEBUGFS_ATTRIBUTE(fops_passthroughlist_add, NULL, + passthroughlist_add, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(fops_passthroughlist_del, NULL, + passthroughlist_del, "%llu\n"); + +static void pswiotlb_create_passthroughlist_debugfs_files(const char *dirname) +{ + passthroughlist_debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); + if (!passthroughlist_debugfs) + return; + + debugfs_create_file("show_devices", 0400, passthroughlist_debugfs, NULL, + &passthroughlist_display_fops); + debugfs_create_file("add_device", 0600, passthroughlist_debugfs, NULL, + &fops_passthroughlist_add); + debugfs_create_file("del_device", 0600, passthroughlist_debugfs, NULL, + &fops_passthroughlist_del); +} + +static void pswiotlb_create_pswiotlb_debugfs_files(const char *dirname) +{ + int i; + char name[20] = ""; + char passthroughlist_name[50] = ""; + + pswiotlb_debugfs = debugfs_create_dir(dirname, pswiotlb_debugfs); + if (!pswiotlb_debugfs) + return; + + debugfs_create_file("version", 0400, pswiotlb_debugfs, NULL, + &version_display_fops); + + for (i = 0; i < pswiotlb_node_num; i++) { + sprintf(name, "%s-%d", "pswiotlb", i); + pswiotlb_create_debugfs_files(&p_io_tlb_default_mem[i], i, name); + } + sprintf(passthroughlist_name, "%s", "pswiotlb-passthroughlist"); + pswiotlb_create_passthroughlist_debugfs_files(passthroughlist_name); +} + +static int __init pswiotlb_create_default_debugfs(void) +{ + char name[20] = ""; + + if (!pswiotlb_mtimer_alive && !pswiotlb_force_disable) { + pr_info("setup pswiotlb monitor timer service\n"); + timer_setup(&service_timer, pswiotlb_monitor_service, 0); + pswiotlb_mtimer_alive = true; + + /* check pswiotlb every 2 seconds*/ + mod_timer(&service_timer, jiffies + 2 * HZ); + } + + if (!pswiotlb_force_disable) { + sprintf(name, "%s", "pswiotlb"); + pswiotlb_create_pswiotlb_debugfs_files(name); + } + + return 0; +} + +late_initcall(pswiotlb_create_default_debugfs); + +#else /* !CONFIG_DEBUG_FS */ + +static inline void pswiotlb_create_debugfs_files(struct p_io_tlb_mem *mem, + const char *dirname) +{ +} + +#endif /* CONFIG_DEBUG_FS */ -- Gitee From caa3a5eb9d834c295c8acc1ed974b4a34c0c87ad Mon Sep 17 00:00:00 2001 From: Jiakun Shuai Date: Fri, 30 May 2025 09:07:51 +0800 Subject: [PATCH 2/2] dma: phyitum: enable CONFIG_PSWIOTLB phytium inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICBHX3 CVE: NA ---------------------------------------------------------- Enable CONFIG_PSWIOTLB. This function is only effective on Phytium Server platform even when CONFIG_PSWIOTLB=y. Signed-off-by: Cui Chao Signed-off-by: Jiakun Shuai --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 1b982432ce51..0142841de040 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -7899,6 +7899,7 @@ CONFIG_CMA_SIZE_SEL_MBYTES=y CONFIG_CMA_ALIGNMENT=8 # CONFIG_DMA_API_DEBUG is not set CONFIG_DMA_MAP_BENCHMARK=y +CONFIG_PSWIOTLB=y CONFIG_SGL_ALLOC=y CONFIG_CHECK_SIGNATURE=y # CONFIG_FORCE_NR_CPUS is not set -- Gitee